virtio: add API for VIRTIO devices and add VIRTIO PCI driver

This commit adds the API for accessing VIRTIO devices, and adds a driver
for a VIRTIO PCIE device based on the newly added API.

This commit is based on the Virtual I/O Device (VIRTIO) Version 1.3
specification:
https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.pdf

Signed-off-by: Jakub Michalski <jmichalski@antmicro.com>
Signed-off-by: Filip Kokosinski <fkokosinski@antmicro.com>
This commit is contained in:
Jakub Michalski 2024-11-21 10:52:41 +01:00 committed by Benjamin Cabé
commit c7107fc4d6
12 changed files with 1379 additions and 0 deletions

View file

@ -2197,6 +2197,20 @@ Release Notes:
tests:
- drivers.video
"Drivers: VIRTIO":
status: maintained
maintainers:
- fkokosinski
- tgorochowik
collaborators:
- kgugala
files:
- drivers/virtio/
- dts/bindings/virtio/
- include/zephyr/virtio/
labels:
- "area: VIRTIO"
"Drivers: W1":
status: maintained
maintainers:

View file

@ -89,6 +89,7 @@ add_subdirectory_ifdef(CONFIG_SYSCON syscon)
add_subdirectory_ifdef(CONFIG_SYS_CLOCK_EXISTS timer)
add_subdirectory_ifdef(CONFIG_TEE tee)
add_subdirectory_ifdef(CONFIG_VIDEO video)
add_subdirectory_ifdef(CONFIG_VIRTIO virtio)
add_subdirectory_ifdef(CONFIG_VIRTUALIZATION virtualization)
add_subdirectory_ifdef(CONFIG_W1 w1)
add_subdirectory_ifdef(CONFIG_WATCHDOG watchdog)

View file

@ -88,6 +88,7 @@ source "drivers/timer/Kconfig"
source "drivers/usb/Kconfig"
source "drivers/usb_c/Kconfig"
source "drivers/video/Kconfig"
source "drivers/virtio/Kconfig"
source "drivers/virtualization/Kconfig"
source "drivers/w1/Kconfig"
source "drivers/watchdog/Kconfig"

View file

@ -0,0 +1,7 @@
# Copyright (c) 2024 Antmicro <www.antmicro.com>
# SPDX-License-Identifier: Apache-2.0
zephyr_library()
zephyr_library_sources_ifdef(CONFIG_VIRTIO virtqueue.c virtio_common.c)
zephyr_library_sources_ifdef(CONFIG_VIRTIO_PCI virtio_pci.c)

22
drivers/virtio/Kconfig Normal file
View file

@ -0,0 +1,22 @@
# Copyright (c) 2024 Antmicro <www.antmicro.com>
# SPDX-License-Identifier: Apache-2.0
config VIRTIO
bool "support for VIRTIO"
help
Enable options for VIRTIO
if VIRTIO
config VIRTIO_PCI
bool "support for VIRTIO over PCI"
default y
depends on DT_HAS_VIRTIO_PCI_ENABLED
help
Enable options for VIRTIO over PCI
endif # VIRTIO
module = VIRTIO
module-str = VIRTIO
source "subsys/logging/Kconfig.template.log_config"

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2025 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/logging/log.h>
#include <zephyr/sys/byteorder.h>
#include <zephyr/virtio/virtio.h>
#include <zephyr/virtio/virtqueue.h>
#include "virtio_common.h"
LOG_MODULE_REGISTER(virtio_common, CONFIG_VIRTIO_LOG_LEVEL);
void virtio_isr(const struct device *dev, uint8_t isr_status, uint16_t virtqueue_count)
{
if (isr_status & VIRTIO_QUEUE_INTERRUPT) {
for (int i = 0; i < virtqueue_count; i++) {
struct virtq *vq = virtio_get_virtqueue(dev, i);
uint16_t used_idx = sys_le16_to_cpu(vq->used->idx);
while (vq->last_used_idx != used_idx) {
uint16_t idx = vq->last_used_idx % vq->num;
uint16_t idx_le = sys_cpu_to_le16(idx);
uint16_t chain_head_le = vq->used->ring[idx_le].id;
uint16_t chain_head = sys_le16_to_cpu(chain_head_le);
uint32_t used_len = sys_le32_to_cpu(
vq->used->ring[idx_le].len
);
/*
* We are making a copy here, because chain will be
* returned before invoking the callback and may be
* overwritten by the time callback is called. This
* is to allow callback to immediately place the
* descriptors back in the avail_ring
*/
struct virtq_receive_callback_entry cbe =
vq->recv_cbs[chain_head];
uint16_t next = chain_head;
bool last = false;
/*
* We are done processing the descriptor chain, and
* we can add used descriptors back to the free stack.
* The only thing left to do is calling the callback
* associated with the chain, but it was saved above on
* the stack, so other code is free to use the descriptors
*/
while (!last) {
uint16_t curr = next;
uint16_t curr_le = sys_cpu_to_le16(curr);
next = vq->desc[curr_le].next;
last = !(vq->desc[curr_le].flags & VIRTQ_DESC_F_NEXT);
virtq_add_free_desc(vq, curr);
}
vq->last_used_idx++;
if (cbe.cb) {
cbe.cb(cbe.opaque, used_len);
}
}
}
}
if (isr_status & VIRTIO_DEVICE_CONFIGURATION_INTERRUPT) {
LOG_ERR("device configuration change interrupt is currently unsupported");
}
}

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2025 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_VIRTIO_VIRTIO_COMMON_H_
#define ZEPHYR_VIRTIO_VIRTIO_COMMON_H_
#define DEVICE_STATUS_ACKNOWLEDGE 0
#define DEVICE_STATUS_DRIVER 1
#define DEVICE_STATUS_DRIVER_OK 2
#define DEVICE_STATUS_FEATURES_OK 3
#define DEVICE_STATUS_NEEDS_RESET 6
#define DEVICE_STATUS_FAILED 7
#define VIRTIO_F_VERSION_1 32
/* Ranges of feature bits for specific device types (see spec 2.2)*/
#define DEV_TYPE_FEAT_RANGE_0_BEGIN 0
#define DEV_TYPE_FEAT_RANGE_0_END 23
#define DEV_TYPE_FEAT_RANGE_1_BEGIN 50
#define DEV_TYPE_FEAT_RANGE_1_END 127
/*
* While defined separately in 4.1.4.5 for PCI and in 4.2.2 for MMIO
* the same bits are responsible for the same interrupts, so defines
* with them can be unified
*/
#define VIRTIO_QUEUE_INTERRUPT 1
#define VIRTIO_DEVICE_CONFIGURATION_INTERRUPT 2
/**
* Common virtio isr
*
* @param dev virtio device it operates on
* @param isr_status value of isr status register
* @param virtqueue_count amount of available virtqueues
*/
void virtio_isr(const struct device *dev, uint8_t isr_status, uint16_t virtqueue_count);
#endif /*ZEPHYR_VIRTIO_VIRTIO_COMMON_H_*/

596
drivers/virtio/virtio_pci.c Normal file
View file

@ -0,0 +1,596 @@
/*
* Copyright (c) 2024 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/device.h>
#include <zephyr/drivers/pcie/pcie.h>
#include <zephyr/kernel/mm.h>
#include <zephyr/logging/log.h>
#include <zephyr/spinlock.h>
#include <zephyr/sys/barrier.h>
#include <zephyr/sys/byteorder.h>
#include <zephyr/virtio/virtio.h>
#include <zephyr/virtio/virtqueue.h>
#include "virtio_common.h"
#include "assert.h"
#define DT_DRV_COMPAT virtio_pci
LOG_MODULE_REGISTER(virtio_pci, CONFIG_VIRTIO_LOG_LEVEL);
/*
* Based on Virtual I/O Device (VIRTIO) Version 1.3 specification:
* https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.pdf
*/
struct virtio_pci_cap {
uint8_t cap_vndr;
uint8_t cap_next;
uint8_t cap_len;
uint8_t cfg_type;
uint8_t bar;
uint8_t id;
uint8_t pad[2];
uint32_t offset;
uint32_t length;
};
struct virtio_pci_notify_cap {
struct virtio_pci_cap cap;
uint32_t notify_off_multiplier;
};
struct virtio_pci_common_cfg {
uint32_t device_feature_select; /* read-write */
uint32_t device_feature; /* read-only for driver */
uint32_t driver_feature_select; /* read-write */
uint32_t driver_feature; /* read-write */
uint16_t config_msix_vector; /* read-write */
uint16_t num_queues; /* read-only for driver */
uint8_t device_status; /* read-write */
uint8_t config_generation; /* read-only for driver */
uint16_t queue_select; /* read-write */
uint16_t queue_size; /* read-write */
uint16_t queue_msix_vector; /* read-write */
uint16_t queue_enable; /* read-write */
uint16_t queue_notify_off; /* read-only for driver */
uint64_t queue_desc; /* read-write */
uint64_t queue_driver; /* read-write */
uint64_t queue_device; /* read-write */
uint16_t queue_notify_data; /* read-only for driver */
uint16_t queue_reset; /* read-write */
};
#define VIRTIO_PCI_CAP_COMMON_CFG 1
#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
#define VIRTIO_PCI_CAP_ISR_CFG 3
#define VIRTIO_PCI_CAP_DEVICE_CFG 4
#define VIRTIO_PCI_CAP_PCI_CFG 5
#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
#define VIRTIO_PCI_CAP_VENDOR_CFG 9
#define CABABILITY_LIST_VALID_BIT 4
#define STATUS_COMMAND_REG 0x1
#define CAPABILITIES_POINTER_REG 0xd
#define CAPABILITIES_POINTER_MASK 0xfc
#define VIRTIO_PCI_MSIX_NO_VECTOR 0xffff
struct virtio_pci_data {
volatile struct virtio_pci_common_cfg *common_cfg;
void *device_specific_cfg;
volatile uint8_t *isr_status;
volatile uint8_t *notify_cfg;
uint32_t notify_off_multiplier;
struct virtq *virtqueues;
uint16_t virtqueue_count;
struct k_spinlock isr_lock;
struct k_spinlock notify_lock;
};
struct virtio_pci_config {
struct pcie_dev *pcie;
};
/*
* Even though a virtio device is exposed as a PCI device, it's not a physical
* one, so we don't have to care about cache flushing/invalidating like we would
* with a real device that may write to the memory from the outside. Whatever
* will be written/read to shared memory by the virtio device will be
* written/read by a hypervisor running on the same cpu as zephyr guest, so the
* caches will stay coherent
*/
void virtio_pci_isr(const struct device *dev)
{
struct virtio_pci_data *data = dev->data;
k_spinlock_key_t key = k_spin_lock(&data->isr_lock);
virtio_isr(dev, *data->isr_status, data->virtqueue_count);
k_spin_unlock(&data->isr_lock, key);
}
static bool virtio_pci_read_cap(
pcie_bdf_t bdf, uint8_t cfg_type, void *cap_struct, size_t cap_struct_size)
{
struct virtio_pci_cap tmp;
uint16_t status = (pcie_conf_read(bdf, STATUS_COMMAND_REG) & GENMASK(31, 16)) >> 16;
if (!(status & BIT(CABABILITY_LIST_VALID_BIT))) {
LOG_ERR("no capability list for device with bdf 0x%x", bdf);
return false;
}
uint32_t cap_ptr =
pcie_conf_read(bdf, CAPABILITIES_POINTER_REG) & CAPABILITIES_POINTER_MASK;
uint32_t cap_off = cap_ptr / sizeof(uint32_t);
/*
* Every capability type struct has size and alignment of multiple of 4 bytes
* so pcie_conf_read() can be used directly without aligning
*/
do {
for (int i = 0; i < sizeof(struct virtio_pci_cap) / sizeof(uint32_t); i++) {
((uint32_t *)&tmp)[i] = pcie_conf_read(bdf, cap_off + i);
}
if (tmp.cfg_type == cfg_type) {
assert(tmp.cap_len == cap_struct_size);
size_t extra_data_words =
(tmp.cap_len - sizeof(struct virtio_pci_cap)) / sizeof(uint32_t);
size_t extra_data_offset =
cap_off + sizeof(struct virtio_pci_cap) / sizeof(uint32_t);
uint32_t *extra_data =
(uint32_t *)((struct virtio_pci_cap *)cap_struct + 1);
*(struct virtio_pci_cap *)cap_struct = tmp;
for (int i = 0; i < extra_data_words; i++) {
extra_data[i] = pcie_conf_read(bdf, extra_data_offset + i);
}
return true;
}
cap_off = (tmp.cap_next & 0xfc) / sizeof(uint32_t);
} while (cap_off != 0);
return false;
}
static void virtio_pci_reset(const struct device *dev)
{
struct virtio_pci_data *data = dev->data;
/*
* According to spec 4.1.4.3.1 and spec 2.4.2 to reset the device we
* must write 0 to the device_status register and wait until we read 0
* from it, which means that reset is complete
*/
data->common_cfg->device_status = 0;
while (data->common_cfg->device_status != 0) {
}
}
static void virtio_pci_notify_queue(const struct device *dev, uint16_t queue_idx)
{
struct virtio_pci_data *data = dev->data;
k_spinlock_key_t key = k_spin_lock(&data->notify_lock);
data->common_cfg->queue_select = sys_cpu_to_le16(queue_idx);
barrier_dmem_fence_full();
/*
* Because currently we are not negotiating VIRTIO_F_NOTIFICATION_DATA
* and VIRTIO_F_NOTIF_CONFIG_DATA, in order to notify the queue we have
* to write its index to notify_cfg at the offset
* cap.offset + queue_notify_off * notify_off_multiplier,
* which here is reduced to queue_notify_off * notify_off_multiplier,
* because data->notify_cfg was mapped in virtio_pci_map_cap() to start
* at cap.offset. See spec 4.1.4.4 for the offset formula, and spec
* 4.1.5.2 and spec 4.1.5.2.1 for the value written
*/
size_t notify_off =
sys_le16_to_cpu(data->common_cfg->queue_notify_off) * data->notify_off_multiplier;
volatile uint16_t *notify_addr = (uint16_t *)(data->notify_cfg + notify_off);
*notify_addr = sys_cpu_to_le16(queue_idx);
k_spin_unlock(&data->notify_lock, key);
}
/*
* According to the spec 4.1.3.1, PCI virtio driver must use n byte accesses for n byte fields,
* except for 64 bit fields where 32 bit accesses have to be used, so we are using this
* function to write 64 bit values to 64 bit fields
*/
static void virtio_pci_write64(uint64_t val, uint64_t *dst)
{
uint64_t val_le = sys_cpu_to_le64(val);
((uint32_t *)dst)[0] = val_le & GENMASK64(31, 0);
((uint32_t *)dst)[1] = (val_le & GENMASK64(63, 32)) >> 32;
}
static int virtio_pci_set_virtqueue(
const struct device *dev, uint16_t virtqueue_n, struct virtq *virtqueue)
{
struct virtio_pci_data *data = dev->data;
data->common_cfg->queue_select = sys_cpu_to_le16(virtqueue_n);
barrier_dmem_fence_full();
uint16_t max_queue_size = sys_le16_to_cpu(data->common_cfg->queue_size);
if (max_queue_size < virtqueue->num) {
LOG_ERR(
"virtio pci device doesn't support queue %d bigger than %d, tried to set one with size %d",
virtqueue_n,
max_queue_size,
virtqueue->num
);
return -EINVAL;
}
data->common_cfg->queue_size = sys_cpu_to_le16(virtqueue->num);
virtio_pci_write64(
k_mem_phys_addr(virtqueue->desc), (void *)&data->common_cfg->queue_desc
);
virtio_pci_write64(
k_mem_phys_addr(virtqueue->avail), (void *)&data->common_cfg->queue_driver
);
virtio_pci_write64(
k_mem_phys_addr(virtqueue->used), (void *)&data->common_cfg->queue_device
);
data->common_cfg->queue_msix_vector = sys_cpu_to_le16(VIRTIO_PCI_MSIX_NO_VECTOR);
data->common_cfg->queue_enable = sys_cpu_to_le16(1);
return 0;
}
static int virtio_pci_init_virtqueues(
const struct device *dev, uint16_t num_queues, virtio_enumerate_queues cb, void *opaque)
{
struct virtio_pci_data *data = dev->data;
uint16_t queue_count = sys_le16_to_cpu(data->common_cfg->num_queues);
if (num_queues > queue_count) {
LOG_ERR("requested more virtqueues than available");
return -EINVAL;
}
data->virtqueues = k_malloc(queue_count * sizeof(struct virtq));
if (!data->virtqueues) {
LOG_ERR("failed to allocate virtqueue array");
return -ENOMEM;
}
data->virtqueue_count = queue_count;
for (int i = 0; i < queue_count; i++) {
data->common_cfg->queue_select = sys_cpu_to_le16(i);
barrier_dmem_fence_full();
uint16_t queue_size = cb(i, sys_le16_to_cpu(data->common_cfg->queue_size), opaque);
int ret = virtq_create(&data->virtqueues[i], queue_size);
if (ret != 0) {
for (int j = 0; j < i; i++) {
virtq_free(&data->virtqueues[j]);
}
return ret;
}
ret = virtio_pci_set_virtqueue(dev, i, &data->virtqueues[i]);
if (ret != 0) {
for (int j = 0; j < i; i++) {
virtq_free(&data->virtqueues[j]);
}
return ret;
}
}
return 0;
}
static bool virtio_pci_map_cap(pcie_bdf_t bdf, struct virtio_pci_cap *cap, void **virt_ptr)
{
struct pcie_bar mbar;
if (!pcie_get_mbar(bdf, cap->bar, &mbar)) {
LOG_ERR("no mbar for capability type %d found", cap->cfg_type);
return false;
}
assert(mbar.phys_addr + cap->offset + cap->length <= mbar.phys_addr + mbar.size);
#ifdef CONFIG_MMU
k_mem_map_phys_bare(
(uint8_t **)virt_ptr, mbar.phys_addr + cap->offset, cap->length, K_MEM_PERM_RW
);
#else
*virt_ptr = (void *)(mbar.phys_addr + cap->offset);
#endif
return true;
}
static uint32_t virtio_pci_read_device_feature_word(const struct device *dev, uint32_t word_n)
{
struct virtio_pci_data *data = dev->data;
data->common_cfg->device_feature_select = sys_cpu_to_le32(word_n);
barrier_dmem_fence_full();
return sys_le32_to_cpu(data->common_cfg->device_feature);
}
static void virtio_pci_write_driver_feature_word(
const struct device *dev, uint32_t word_n, uint32_t val)
{
struct virtio_pci_data *data = dev->data;
data->common_cfg->driver_feature_select = sys_cpu_to_le32(word_n);
barrier_dmem_fence_full();
data->common_cfg->driver_feature = sys_cpu_to_le32(val);
}
static bool virtio_pci_read_device_feature_bit(const struct device *dev, int bit)
{
uint32_t word_n = bit / 32;
uint32_t mask = BIT(bit % 32);
return virtio_pci_read_device_feature_word(dev, word_n) & mask;
}
static void virtio_pci_write_driver_feature_bit(const struct device *dev, int bit, bool value)
{
uint32_t word_n = bit / 32;
uint32_t mask = BIT(bit % 32);
uint32_t word = virtio_pci_read_device_feature_word(dev, word_n);
virtio_pci_write_driver_feature_word(dev, word_n, value ? word | mask : word & ~mask);
}
static int virtio_pci_write_driver_feature_bit_range_check(
const struct device *dev, int bit, bool value)
{
if (!IN_RANGE(bit, DEV_TYPE_FEAT_RANGE_0_BEGIN, DEV_TYPE_FEAT_RANGE_0_END)
|| !IN_RANGE(bit, DEV_TYPE_FEAT_RANGE_1_BEGIN, DEV_TYPE_FEAT_RANGE_1_END)) {
return -EINVAL;
}
virtio_pci_write_driver_feature_bit(dev, bit, value);
return 0;
}
static bool virtio_pci_read_status_bit(const struct device *dev, int bit)
{
struct virtio_pci_data *data = dev->data;
uint32_t mask = BIT(bit);
barrier_dmem_fence_full();
return sys_le32_to_cpu(data->common_cfg->device_status) & mask;
}
static void virtio_pci_write_status_bit(const struct device *dev, int bit)
{
struct virtio_pci_data *data = dev->data;
uint32_t mask = BIT(bit);
barrier_dmem_fence_full();
data->common_cfg->device_status |= sys_cpu_to_le32(mask);
}
static int virtio_pci_init_common(const struct device *dev)
{
const struct virtio_pci_config *conf = dev->config;
struct virtio_pci_data *data = dev->data;
struct virtio_pci_cap vpc;
struct virtio_pci_notify_cap vpnc = { .notify_off_multiplier = 0 };
if (conf->pcie->bdf == PCIE_BDF_NONE) {
LOG_ERR("no virtio pci device with id 0x%x on the bus", conf->pcie->id);
return 1;
}
LOG_INF(
"found virtio pci device with id 0x%x and bdf 0x%x", conf->pcie->id, conf->pcie->bdf
);
if (virtio_pci_read_cap(conf->pcie->bdf, VIRTIO_PCI_CAP_COMMON_CFG, &vpc, sizeof(vpc))) {
if (!virtio_pci_map_cap(conf->pcie->bdf, &vpc, (void **)&data->common_cfg)) {
return 1;
}
} else {
LOG_ERR(
"no VIRTIO_PCI_CAP_COMMON_CFG for the device with id 0x%x and bdf 0x%x, legacy device?",
conf->pcie->id,
conf->pcie->bdf
);
return 1;
}
if (virtio_pci_read_cap(conf->pcie->bdf, VIRTIO_PCI_CAP_ISR_CFG, &vpc, sizeof(vpc))) {
if (!virtio_pci_map_cap(conf->pcie->bdf, &vpc, (void **)&data->isr_status)) {
return 1;
}
} else {
LOG_ERR(
"no VIRTIO_PCI_CAP_ISR_CFG for the device with id 0x%x and bdf 0x%x",
conf->pcie->id,
conf->pcie->bdf
);
return 1;
}
if (virtio_pci_read_cap(conf->pcie->bdf, VIRTIO_PCI_CAP_NOTIFY_CFG, &vpnc, sizeof(vpnc))) {
if (!virtio_pci_map_cap(
conf->pcie->bdf, (struct virtio_pci_cap *)&vpnc,
(void **)&data->notify_cfg)) {
return 1;
}
data->notify_off_multiplier = sys_le32_to_cpu(vpnc.notify_off_multiplier);
} else {
LOG_ERR(
"no VIRTIO_PCI_CAP_NOTIFY_CFG for the device with id 0x%x and bdf 0x%x",
conf->pcie->id,
conf->pcie->bdf
);
return 1;
}
/*
* Some of the device types may present VIRTIO_PCI_CAP_DEVICE_CFG capabilities as per spec
* 4.1.4.6. It states that there may be more than one such capability per device, however
* none of the devices specified in the Device Types (chapter 5) state that they need more
* than one (its always one or zero virtio_devtype_config structs), so we are just trying to
* read the first one
*/
if (virtio_pci_read_cap(conf->pcie->bdf, VIRTIO_PCI_CAP_DEVICE_CFG, &vpc, sizeof(vpc))) {
if (!virtio_pci_map_cap(
conf->pcie->bdf, &vpc, (void **)&data->device_specific_cfg)) {
return 1;
}
} else {
data->device_specific_cfg = NULL;
LOG_INF(
"no VIRTIO_PCI_CAP_DEVICE_CFG for the device with id 0x%x and bdf 0x%x",
conf->pcie->id,
conf->pcie->bdf
);
}
/*
* The device initialization goes as follows (see 3.1.1):
* - first we have to reset the device
* - then we have to write ACKNOWLEDGE bit
* - then we have to write DRIVER bit
* - after that negotiation of feature flags take place, currently this driver only needs
* VIRTIO_F_VERSION_1, the rest of flags is left to negotiate to the specific devices via
* this driver's api that must be finalized with commit_feature_bits() that writes
* FEATURES_OK bit
* - next the virtqueues have to be set, again via this driver's api (init_virtqueues())
* - initialization is finalized by writing DRIVER_OK bit, which is done by
* finalize_init() from api
*/
virtio_pci_reset(dev);
virtio_pci_write_status_bit(dev, DEVICE_STATUS_ACKNOWLEDGE);
virtio_pci_write_status_bit(dev, DEVICE_STATUS_DRIVER);
LOG_INF(
"virtio pci device with id 0x%x and bdf 0x%x advertised "
"feature bits: 0x%.8x%.8x%.8x%.8x",
conf->pcie->id,
conf->pcie->bdf,
virtio_pci_read_device_feature_word(dev, 3),
virtio_pci_read_device_feature_word(dev, 2),
virtio_pci_read_device_feature_word(dev, 1),
virtio_pci_read_device_feature_word(dev, 0)
);
/*
* In case of PCI this should never happen because legacy device would've been caught
* earlier in VIRTIO_PCI_CAP_COMMON_CFG check as this capability shouldn't be present
* in legacy devices, but we are leaving it here as a sanity check
*/
if (!virtio_pci_read_device_feature_bit(dev, VIRTIO_F_VERSION_1)) {
LOG_ERR(
"virtio pci device with id 0x%x and bdf 0x%x doesn't advertise "
"VIRTIO_F_VERSION_1 feature support",
conf->pcie->id,
conf->pcie->bdf
);
return 1;
}
virtio_pci_write_driver_feature_bit(dev, VIRTIO_F_VERSION_1, 1);
return 0;
};
struct virtq *virtio_pci_get_virtqueue(const struct device *dev, uint16_t queue_idx)
{
struct virtio_pci_data *data = dev->data;
return queue_idx < data->virtqueue_count ? &data->virtqueues[queue_idx] : NULL;
}
void *virtio_pci_get_device_specific_config(const struct device *dev)
{
struct virtio_pci_data *data = dev->data;
return data->device_specific_cfg;
}
void virtio_pci_finalize_init(const struct device *dev)
{
virtio_pci_write_status_bit(dev, DEVICE_STATUS_DRIVER_OK);
}
int virtio_pci_commit_feature_bits(const struct device *dev)
{
const struct virtio_pci_config *conf = dev->config;
virtio_pci_write_status_bit(dev, DEVICE_STATUS_FEATURES_OK);
if (!virtio_pci_read_status_bit(dev, DEVICE_STATUS_FEATURES_OK)) {
LOG_ERR(
"virtio pci device with id 0x%x and bdf 0x%x doesn't support selected "
"feature bits: 0x%.8x%.8x%.8x%.8x",
conf->pcie->id,
conf->pcie->bdf,
virtio_pci_read_device_feature_word(dev, 3),
virtio_pci_read_device_feature_word(dev, 2),
virtio_pci_read_device_feature_word(dev, 1),
virtio_pci_read_device_feature_word(dev, 0)
);
return -EINVAL;
}
return 0;
}
static const struct virtio_driver_api virtio_pci_driver_api = {
.get_virtqueue = virtio_pci_get_virtqueue,
.notify_virtqueue = virtio_pci_notify_queue,
.get_device_specific_config = virtio_pci_get_device_specific_config,
.read_device_feature_bit = virtio_pci_read_device_feature_bit,
.write_driver_feature_bit = virtio_pci_write_driver_feature_bit_range_check,
.commit_feature_bits = virtio_pci_commit_feature_bits,
.init_virtqueues = virtio_pci_init_virtqueues,
.finalize_init = virtio_pci_finalize_init
};
#define VIRTIO_PCI_DEFINE(inst) \
BUILD_ASSERT(DT_NODE_HAS_COMPAT(DT_INST_PARENT(inst), pcie_controller)); \
DEVICE_PCIE_INST_DECLARE(inst); \
static struct virtio_pci_data virtio_pci_data##inst; \
static struct virtio_pci_config virtio_pci_config##inst = { \
DEVICE_PCIE_INST_INIT(inst, pcie) \
}; \
static int virtio_pci_init##inst(const struct device *dev) \
{ \
IRQ_CONNECT( \
DT_INST_IRQN(inst), DT_INST_IRQ(inst, priority), virtio_pci_isr, \
DEVICE_DT_INST_GET(inst), 0 \
); \
int ret = virtio_pci_init_common(dev); \
irq_enable(DT_INST_IRQN(inst)); \
return ret; \
} \
DEVICE_DT_INST_DEFINE( \
inst, \
virtio_pci_init##inst, \
NULL, \
&virtio_pci_data##inst, \
&virtio_pci_config##inst, \
POST_KERNEL, \
0, \
&virtio_pci_driver_api \
);
DT_INST_FOREACH_STATUS_OKAY(VIRTIO_PCI_DEFINE)

188
drivers/virtio/virtqueue.c Normal file
View file

@ -0,0 +1,188 @@
/*
* Copyright (c) 2024 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/virtio/virtqueue.h>
#include <zephyr/kernel.h>
#include <zephyr/logging/log.h>
#include <zephyr/sys/__assert.h>
#include <zephyr/sys/byteorder.h>
#include <zephyr/sys/barrier.h>
#include <errno.h>
LOG_MODULE_REGISTER(virtio, CONFIG_VIRTIO_LOG_LEVEL);
/*
* Based on Virtual I/O Device (VIRTIO) Version 1.3 specification:
* https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.pdf
*/
/*
* The maximum queue size is 2^15 (see 2.7),
* so any 16bit value larger than that can be used as a sentinel in the next field
*/
#define VIRTQ_DESC_NEXT_SENTINEL 0xffff
/* According to the spec 2.7.5.2 the maximum size of descriptor chain is 4GB */
#define MAX_DESCRIPTOR_CHAIN_LENGTH ((uint64_t)1 << 32)
int virtq_create(struct virtq *v, size_t size)
{
__ASSERT(IS_POWER_OF_TWO(size), "size of virtqueue must be a power of 2");
__ASSERT(size <= KB(32), "size of virtqueue must be at most 32KB");
/*
* For sizes and alignments see table in spec 2.7. We are supporting only modern virtio, so
* we don't have to adhere to additional constraints from spec 2.7.2
*/
size_t descriptor_table_size = 16 * size;
size_t available_ring_size = 2 * size + 6;
size_t used_ring_pad = (descriptor_table_size + available_ring_size) % 4;
size_t used_ring_size = 8 * size + 6;
size_t shared_size =
descriptor_table_size + available_ring_size + used_ring_pad + used_ring_size;
size_t v_size = shared_size + sizeof(struct virtq_receive_callback_entry) * size;
uint8_t *v_area = k_aligned_alloc(16, v_size);
if (!v_area) {
LOG_ERR("unable to allocate virtqueue");
return -ENOMEM;
}
v->num = size;
v->desc = (struct virtq_desc *)v_area;
v->avail = (struct virtq_avail *)((uint8_t *)v->desc + descriptor_table_size);
v->used = (struct virtq_used *)((uint8_t *)v->avail + available_ring_size + used_ring_pad);
v->recv_cbs = (struct virtq_receive_callback_entry *)((uint8_t *)v->used + used_ring_size);
/*
* At the beginning of the descriptor table, the available ring and the used ring have to be
* set to zero. It's the case for both PCI (4.1.5.1.3) and MMIO (4.2.3.2) transport options.
* Its unspecified for channel I/O (chapter 4.3), but its used on platforms not supported by
* Zephyr, so we don't have to handle it here
*/
memset(v_area, 0, v_size);
v->last_used_idx = 0;
k_stack_alloc_init(&v->free_desc_stack, size);
for (uint16_t i = 0; i < size; i++) {
k_stack_push(&v->free_desc_stack, i);
}
v->free_desc_n = size;
return 0;
}
void virtq_free(struct virtq *v)
{
k_free(v->desc);
k_stack_cleanup(&v->free_desc_stack);
}
static int virtq_add_available(struct virtq *v, uint16_t desc_idx)
{
uint16_t new_idx_le = sys_cpu_to_le16(sys_le16_to_cpu(v->avail->idx) % v->num);
v->avail->ring[new_idx_le] = sys_cpu_to_le16(desc_idx);
barrier_dmem_fence_full();
v->avail->idx = sys_cpu_to_le16(sys_le16_to_cpu(v->avail->idx) + 1);
return 0;
}
int virtq_add_buffer_chain(
struct virtq *v, struct virtq_buf *bufs, uint16_t bufs_size,
uint16_t device_readable_count, virtq_receive_callback cb, void *cb_opaque,
k_timeout_t timeout)
{
uint64_t total_len = 0;
for (int i = 0; i < bufs_size; i++) {
total_len += bufs[i].len;
}
if (total_len > MAX_DESCRIPTOR_CHAIN_LENGTH) {
LOG_ERR("buffer chain is longer than 2^32 bytes");
return -EINVAL;
}
k_spinlock_key_t key = k_spin_lock(&v->lock);
if (v->free_desc_n < bufs_size && !K_TIMEOUT_EQ(timeout, K_FOREVER)) {
/* we don't have enough free descriptors to push all buffers to the queue */
k_spin_unlock(&v->lock, key);
return -EBUSY;
}
uint16_t prev_desc = VIRTQ_DESC_NEXT_SENTINEL;
uint16_t head = VIRTQ_DESC_NEXT_SENTINEL;
for (uint16_t buf_n = 0; buf_n < bufs_size; buf_n++) {
uint16_t desc;
/*
* we've checked before that we have enough free descriptors
* and the queue is locked, so popping from stack is guaranteed
* to succeed and we don't have to check its return value
*/
virtq_get_free_desc(v, &desc, timeout);
uint16_t desc_le = sys_cpu_to_le16(desc);
if (head == VIRTQ_DESC_NEXT_SENTINEL) {
head = desc;
}
v->desc[desc_le].addr = k_mem_phys_addr(bufs[buf_n].addr);
v->desc[desc_le].len = bufs[buf_n].len;
if (buf_n < device_readable_count) {
v->desc[desc_le].flags = 0;
} else {
v->desc[desc_le].flags = VIRTQ_DESC_F_WRITE;
}
if (buf_n < bufs_size - 1) {
v->desc[desc_le].flags |= VIRTQ_DESC_F_NEXT;
} else {
v->desc[desc_le].next = 0;
}
if (prev_desc != VIRTQ_DESC_NEXT_SENTINEL) {
uint16_t prev_desc_le = sys_cpu_to_le16(prev_desc);
v->desc[prev_desc_le].next = desc_le;
}
prev_desc = desc;
}
v->recv_cbs[head].cb = cb;
v->recv_cbs[head].opaque = cb_opaque;
virtq_add_available(v, head);
k_spin_unlock(&v->lock, key);
return 0;
}
int virtq_get_free_desc(struct virtq *v, uint16_t *desc_idx, k_timeout_t timeout)
{
stack_data_t desc;
int ret = k_stack_pop(&v->free_desc_stack, &desc, timeout);
if (ret == 0) {
*desc_idx = (uint16_t)desc;
v->free_desc_n--;
}
return ret;
}
void virtq_add_free_desc(struct virtq *v, uint16_t desc_idx)
{
k_stack_push(&v->free_desc_stack, desc_idx);
v->free_desc_n++;
}

View file

@ -0,0 +1,8 @@
# Copyright (c) 2024 Antmicro <www.antmicro.com>
# SPDX-License-Identifier: Apache-2.0
description: VIRTIO over PCI
compatible: "virtio,pci"
include: [base.yaml, pcie-device.yaml]

View file

@ -0,0 +1,169 @@
/*
* Copyright (c) 2024 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_VIRTIO_VIRTIO_H_
#define ZEPHYR_VIRTIO_VIRTIO_H_
#include <zephyr/device.h>
#include "virtqueue.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Callback used during virtqueue enumeration
*
* @param queue_idx index of currently inspected queue
* @param max_queue_size maximum permitted size of currently inspected queue
* @param opaque pointer to user provided data
* @return the size of currently inspected virtqueue we want to set
*/
typedef uint16_t (*virtio_enumerate_queues)(
uint16_t queue_idx, uint16_t max_queue_size, void *opaque
);
/**
* @brief Virtio api structure
*/
__subsystem struct virtio_driver_api {
struct virtq *(*get_virtqueue)(const struct device *dev, uint16_t queue_idx);
void (*notify_virtqueue)(const struct device *dev, uint16_t queue_idx);
void *(*get_device_specific_config)(const struct device *dev);
bool (*read_device_feature_bit)(const struct device *dev, int bit);
int (*write_driver_feature_bit)(const struct device *dev, int bit, bool value);
int (*commit_feature_bits)(const struct device *dev);
int (*init_virtqueues)(
const struct device *dev, uint16_t num_queues, virtio_enumerate_queues cb,
void *opaque
);
void (*finalize_init)(const struct device *dev);
};
/**
* Returns virtqueue at given idx
*
* @param dev virtio device it operates on
* @param queue_idx index of virtqueue to get
* @return pointer to virtqueue or NULL if not present
*/
static inline struct virtq *virtio_get_virtqueue(const struct device *dev, uint16_t queue_idx)
{
const struct virtio_driver_api *api = dev->api;
return api->get_virtqueue(dev, queue_idx);
}
/**
* Notifies virtqueue
*
* Note that according to spec 2.7.13.3 the device may access the buffers as soon
* as the avail->idx is increased, which is done by virtq_add_buffer_chain, so the
* device may access the buffers even without notifying it with virtio_notify_virtqueue
*
* @param dev virtio device it operates on
* @param queue_idx virtqueue to be notified
*/
static inline void virtio_notify_virtqueue(const struct device *dev, uint16_t queue_idx)
{
const struct virtio_driver_api *api = dev->api;
api->notify_virtqueue(dev, queue_idx);
}
/**
* Returns device specific config
*
* @param dev virtio device it operates on
* @return pointer to the device specific config or NULL if its not present
*/
static inline void *virtio_get_device_specific_config(const struct device *dev)
{
const struct virtio_driver_api *api = dev->api;
return api->get_device_specific_config(dev);
}
/**
* Returns feature bit offered by virtio device
*
* @param dev virtio device it operates on
* @param bit selected bit
* @return value of the offered feature bit
*/
static inline bool virtio_read_device_feature_bit(const struct device *dev, int bit)
{
const struct virtio_driver_api *api = dev->api;
return api->read_device_feature_bit(dev, bit);
}
/**
* Sets feature bit
*
* @param dev virtio device it operates on
* @param bit selected bit
* @param value bit value to write
* @return 0 on success or negative error code on failure
*/
static inline int virtio_write_driver_feature_bit(const struct device *dev, int bit, bool value)
{
const struct virtio_driver_api *api = dev->api;
return api->write_driver_feature_bit(dev, bit, value);
}
/**
* Commits feature bits
*
* @param dev virtio device it operates on
* @return 0 on success or negative error code on failure
*/
static inline int virtio_commit_feature_bits(const struct device *dev)
{
const struct virtio_driver_api *api = dev->api;
return api->commit_feature_bits(dev);
}
/**
* Initializes virtqueues
*
* @param dev virtio device it operates on
* @param num_queues number of queues to initialize
* @param cb callback called for each available virtqueue
* @param opaque pointer to user provided data that will be passed to the callback
* @return 0 on success or negative error code on failure
*/
static inline int virtio_init_virtqueues(
const struct device *dev, uint16_t num_queues, virtio_enumerate_queues cb, void *opaque)
{
const struct virtio_driver_api *api = dev->api;
return api->init_virtqueues(dev, num_queues, cb, opaque);
}
/**
* Finalizes initialization of the virtio device
*
* @param dev virtio device it operates on
*/
static inline void virtio_finalize_init(const struct device *dev)
{
const struct virtio_driver_api *api = dev->api;
api->finalize_init(dev);
}
/**
* @}
*/
#ifdef __cplusplus
}
#endif
#endif /* ZEPHYR_VIRTIO_VIRTIO_H_ */

View file

@ -0,0 +1,259 @@
/*
* Copyright (c) 2024 Antmicro <www.antmicro.com>
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_VIRTIO_VIRTQUEUE_H_
#define ZEPHYR_VIRTIO_VIRTQUEUE_H_
#include <stdint.h>
#include <stddef.h>
#include <zephyr/kernel.h>
/*
* Based on Virtual I/O Device (VIRTIO) Version 1.3 specification:
* https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.pdf
*/
/**
* used in virtq_desc::flags, enables chaining descriptor via virtq_desc::next
*/
#define VIRTQ_DESC_F_NEXT 1
/**
* used in virtq_desc::flags, makes descriptor device writeable
*/
#define VIRTQ_DESC_F_WRITE 2
/**
* @brief virtqueue descriptor
*
* Describes a single buffer
*/
struct virtq_desc {
/**
* physical address of the buffer
*/
uint64_t addr;
/**
* length of the buffer
*/
uint32_t len;
/**
* buffer flags
*/
uint16_t flags;
/**
* chaining next descriptor, valid if flags & VIRTQ_DESC_F_NEXT
*/
uint16_t next;
};
/**
* @brief virtqueue available ring
*
* Used to pass descriptors to the virtio device. Driver writeable, device readable
*/
struct virtq_avail {
/**
* ring flags, e.g. VIRTQ_AVAIL_F_NO_INTERRUPT, currently unused
*/
uint16_t flags;
/**
* head of the ring, by increasing it newly added descriptors are committed
*/
uint16_t idx;
/**
* ring with indexes of descriptors
*/
uint16_t ring[];
};
/**
* @brief used descriptor chain
*
* Describes a single descriptor chain returned by the virtio device
*/
struct virtq_used_elem {
/**
* index of the head of descriptor chain
*/
uint32_t id;
/**
* total amount of bytes written to descriptor chain by the virtio device
*/
uint32_t len;
};
/**
* @brief virtqueue used ring
*
* Used to receive descriptors from the virtio device. Driver readable, device writeable
*/
struct virtq_used {
/**
* ring flags, e.g. VIRTQ_USED_F_NO_NOTIFY, currently unused
*/
uint16_t flags;
/**
* head of the ring
*/
uint16_t idx;
/**
* ring of struct virtq_used_elem
*/
struct virtq_used_elem ring[];
};
/**
* @brief receive callback function type
*
* @param opaque argument passed to the callback
* @param used_len total amount of bytes written to the descriptor chain by the virtio device
*/
typedef void (*virtq_receive_callback)(void *opaque, uint32_t used_len);
/**
* @brief callback descriptor
*
* contains callback function ad its argument, invoked after virtio device return
* descriptor chain its associated with
*/
struct virtq_receive_callback_entry {
/**
* callback function pointer
*/
virtq_receive_callback cb;
/**
* argument passed to the callback function
*/
void *opaque;
};
/**
* @brief virtqueue
*
* contains structures required for virtqueue operation
*/
struct virtq {
/**
* lock used to synchronize operations on virtqueue
*/
struct k_spinlock lock;
/**
* size of virtqueue
*/
uint16_t num;
/**
* array with descriptors
*/
struct virtq_desc *desc;
/**
* available ring
*/
struct virtq_avail *avail;
/**
* used ring
*/
struct virtq_used *used;
/**
* last seen idx in used ring, used to determine first descriptor to process
* after receiving virtqueue interrupt
*/
uint16_t last_used_idx;
/**
* Stack containing indexes of free descriptors. Because virtio devices are
* not required to use received descriptors in order (see 2.7.9) unless
* VIRTIO_F_IN_ORDER was offered, we can't use array with descriptors as another
* ring buffer, always taking next descriptor. This is an auxilary structure to
* easily determine next free descriptor
*/
struct k_stack free_desc_stack;
/**
* amount of free descriptors in the free_desc_stack
*/
uint16_t free_desc_n;
/**
* array with callbacks invoked after receiving buffers back from the device
*/
struct virtq_receive_callback_entry *recv_cbs;
};
/**
* @brief creates virtqueue
*
* @param v virtqueue to be created
* @param size size of the virtqueue
* @return 0 or error code on failure
*/
int virtq_create(struct virtq *v, size_t size);
/**
* @brief frees virtqueue
*
* @param v virtqueue to be freed
*/
void virtq_free(struct virtq *v);
/**
* @brief single buffer passed to virtq_add_buffer_chain
*/
struct virtq_buf {
/**
* virtual address of the buffer
*/
void *addr;
/**
* length of the buffer
*/
uint32_t len;
};
/**
* @brief adds chain of buffers to the virtqueue
*
* Note that according to spec 2.7.13.3 the device may access the buffers as soon
* as the avail->idx is increased, which is done at the end of this function, so
* the device may access the buffers without notifying it with virtio_notify_virtqueue
*
* @param v virtqueue it operates on
* @param bufs array of buffers to be added to the virtqueue
* @param bufs_size amount of buffers
* @param device_readable_count amount of bufferes readable by the device, the first
* device_readable_count buffers will be set as device readable
* @param cb callback to be invoked after device returns the buffer chain, can be NULL
* @param cb_opaque opaque value that will be passed to the cb
* @param timeout amount of time it will wait for free descriptors, with K_NO_WAIT it
* can be called from isr
* @return 0 or error code on failure
*/
int virtq_add_buffer_chain(
struct virtq *v, struct virtq_buf *bufs, uint16_t bufs_size,
uint16_t device_readable_count, virtq_receive_callback cb, void *cb_opaque,
k_timeout_t timeout
);
/**
* @brief adds free descriptor back
*
* @param v virtqueue it operates on
* @param desc_idx index of returned descriptor
*/
void virtq_add_free_desc(struct virtq *v, uint16_t desc_idx);
/**
* @brief gets next free descriptor
*
* @param v virtqueue it operates on
* @param desc_idx address where index of descriptor will be stored
* @param timeout amount of time it will wait for free descriptor, with K_NO_WAIT it
* can be called from isr
* @return 0 or error code on failure
*/
int virtq_get_free_desc(struct virtq *v, uint16_t *desc_idx, k_timeout_t timeout);
#endif /* ZEPHYR_VIRTIO_VIRTQUEUE_H_ */