赞
踩
这段时间又再次revisit了一把virtio,把笔记整理一下贴出来,大部分内容都是网上找的,+上我个人的一些理解在里面
我们首先关注virtio设备的配置空间,virtio设备本身是基于PCI总线的,因此本质上就是一个PCI设备,和所有其他PCI设备一样,virtio也有自己的vendor ID 0x1AF4,device ID从0x1000 - 0x103F,subsystem device ID如下:
Subsystem Device ID | Virtio Device |
---|---|
1 | Network card |
2 | Block device |
3 | Console |
4 | Entropy source |
5 | Memory ballooning |
6 | IoMemory |
7 | Rpmsg |
8 | SCSI host |
9 | 9P transport |
10 | Mac80211 wlan |
virtio设备的第一块IO region(BAR0指向的空间?)用来存放virtio设备的配置空间,如下所示:
Bits | 32 | 32 | 32 | 16 | 16 | 16 | 8 | 8 |
---|---|---|---|---|---|---|---|---|
R/W | R | R+W | R+W | R | R+W | R+W | R+W | R |
Purpose | Device Features | Guest Features | Queue Address | Queue Size | Queue Select | Queue Notify | Device Status | ISR Status |
Bits | 16 | 16 |
---|---|---|
R/W | R+W | R+W |
Purpose(MSI-X) | Configuration Vector | Queue Vector |
如果配置空间包含了后面两个域,即CONFIG_VECTOR以及QUEUE_VECTOR,表明这个PCI设备开启了MSI-X中断,否则后面两个域不会在配置空间中。内核定义了一个VIRTIO_PCI_CONFIG宏,用于计算配置空间的大小,如果开启了MSI-X中断则是24字节,否则是20字节
-
/* The remaining space is defined by each driver as the per-driver
-
* configuration space */
-
#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20)
-
/* A 32-bit r/o bitmask of the features supported by the host */
-
#define VIRTIO_PCI_HOST_FEATURES 0
-
-
/* A 32-bit r/w bitmask of features activated by the guest */
-
#define VIRTIO_PCI_GUEST_FEATURES 4
-
-
/* A 32-bit r/w PFN for the currently selected queue */
-
#define VIRTIO_PCI_QUEUE_PFN 8
-
-
/* A 16-bit r/o queue size for the currently selected queue */
-
#define VIRTIO_PCI_QUEUE_NUM 12
-
-
/* A 16-bit r/w queue selector */
-
#define VIRTIO_PCI_QUEUE_SEL 14
-
-
/* A 16-bit r/w queue notifier */
-
#define VIRTIO_PCI_QUEUE_NOTIFY 16
-
-
/* An 8-bit device status register. */
-
#define VIRTIO_PCI_STATUS 18
-
-
/* An 8-bit r/o interrupt status register. Reading the value will return the
-
* current contents of the ISR and will also clear it. This is effectively
-
* a read-and-acknowledge. */
-
#define VIRTIO_PCI_ISR 19
-
-
/* The bit of the ISR which indicates a device configuration change. */
-
#define VIRTIO_PCI_ISR_CONFIG 0x2
-
-
/* MSI-X registers: only enabled if MSI-X is enabled. */
-
/* A 16-bit vector for configuration changes. */
-
#define VIRTIO_MSI_CONFIG_VECTOR 20
-
/* A 16-bit vector for selected queue notifications. */
-
#define VIRTIO_MSI_QUEUE_VECTOR 22
-
/* Vector value used to disable MSI for queue */
-
#define VIRTIO_MSI_NO_VECTOR 0xffff
关于PCI的规范和细节,可以参考如下的文章
http://blog.chinaunix.net/uid-618506-id-204331.html
http://blog.sina.com.cn/s/blog_6472c4cc0100qnht.html
http://blog.csdn.net/yayong/article/details/4013299
按照我的理解,这里virtio设备的配置空间,和PCI设备的配置空间是完全不同的概念,virtio自己的配置实际上是占用的bar0指向的一块IO区域来完成的。对于传统的PCI设备,其配置空间是通过PCI规范严格定义好的,目前对于普通PCI设备是256个字节,对于PCIE设备是2k个字节,其中前64个字节称为PCI配置空间头,其定义如下
register (offset) | bits 31-24 | bits 23-16 | bits 15-8 | bits 7-0 |
---|---|---|---|---|
00 | Device ID | Vendor ID | ||
04 | Status | Command | ||
08 | Class code | Subclass | Prog IF | Revision ID |
0C | BIST | Header type | Latency Timer | Cache Line Size |
10 | Base address #0 (BAR0) | |||
14 | Base address #1 (BAR1) | |||
18 | Secondary Latency Timer | Subordinate Bus Number | Secondary Bus Number | Primary Bus Number |
1C | Secondary Status | I/O Limit | I/O Base | |
20 | Memory Limit | Memory Base | ||
24 | Prefetchable Memory Limit | Prefetchable Memory Base | ||
28 | Prefetchable Base Upper 32 Bits | |||
2C | Prefetchable Limit Upper 32 Bits | |||
30 | I/O Limit Upper 16 Bits | I/O Base Upper 16 Bits | ||
34 | Reserved | Capability Pointer | ||
38 | Expansion ROM base address | |||
3C | Bridge Control | Interrupt PIN | Interrupt Line |
pci设备的配置空间可以通过pio或者mmio来访问,其中pio主要用于系统启动时的pci设备枚举,x86会有专门的寄存器来定义如何枚举,请参考相关资料。
host/guest的feature bits需要host和guest通过协商确定,相关的feature bit是根据具体的virtio设备不同而不同的,e.g. virtio_net, virtio_blk, virtio_balloon都有自己特定的feature bit,同时28-31位的feature bit被virtio_ring用来做同步
-
/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
-
* transport being used (eg. virtio_ring), the rest are per-device feature
-
* bits. */
-
#define VIRTIO_TRANSPORT_F_START 28
-
#define VIRTIO_TRANSPORT_F_END 32
目前用到的transport features,是VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX
device status目前有如下几类
-
/* Status byte for guest to report progress, and synchronize features. */
-
/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
-
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
-
/* We have found a driver for the device. */
-
#define VIRTIO_CONFIG_S_DRIVER 2
-
/* Driver has used its parts of the config, and is happy */
-
#define VIRTIO_CONFIG_S_DRIVER_OK 4
-
/* We've given up on this device. */
-
#define VIRTIO_CONFIG_S_FAILED 0x80
-
static
struct virtio_config_ops virtio_pci_config_ops = {
-
.get = vp_get,
-
.
set = vp_set,
-
.get_status = vp_get_status,
-
.set_status = vp_set_status,
-
.reset = vp_reset,
-
.find_vqs = vp_find_vqs,
-
.del_vqs = vp_del_vqs,
-
.get_features = vp_get_features,
-
.finalize_features = vp_finalize_features,
-
};
-
/* virtio config->get() implementation */
-
static void vp_get(struct virtio_device *vdev, unsigned offset,
-
void *buf,
unsigned len)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
void __iomem *ioaddr = vp_dev->ioaddr +
-
VIRTIO_PCI_CONFIG(vp_dev) + offset;
-
u8 *ptr = buf;
-
int i;
-
-
for (i =
0; i < len; i++)
-
ptr[i] = ioread8(ioaddr + i);
-
}
-
/* the config->set() implementation. it's symmetric to the config->get()
-
* implementation */
-
static void vp_set(struct virtio_device *vdev, unsigned offset,
-
const
void *buf,
unsigned len)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
void __iomem *ioaddr = vp_dev->ioaddr +
-
VIRTIO_PCI_CONFIG(vp_dev) + offset;
-
const u8 *ptr = buf;
-
int i;
-
-
for (i =
0; i < len; i++)
-
iowrite8(ptr[i], ioaddr + i);
-
}
-
/* config->{get,set}_status() implementations */
-
static u8 vp_get_status(struct virtio_device *vdev)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
-
}
-
-
static void vp_set_status(struct virtio_device *vdev, u8 status)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
/* We should never be setting status to 0. */
-
BUG_ON(status ==
0);
-
iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
-
}
-
static void vp_reset(struct virtio_device *vdev)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
/* 0 status means a reset. */
-
iowrite8(
0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
-
}
virtio pci设备同样需要按照系统通用的pci初始化方式注册,初始化时调用pci_register_driver,结束时调用pci_unregister_driver
-
static
struct pci_driver virtio_pci_driver = {
-
.name =
"virtio-pci",
-
.id_table = virtio_pci_id_table,
-
.probe = virtio_pci_probe,
-
.remove = virtio_pci_remove,
-
#ifdef CONFIG_PM
-
.driver.pm = &virtio_pci_pm_ops,
-
#endif
-
};
-
-
static
int __
init virtio_pci_init(void)
-
{
-
return pci_register_driver(&virtio_pci_driver);
-
}
-
-
module_init(virtio_pci_init);
-
-
static
void __
exit virtio_pci_exit(void)
-
{
-
pci_unregister_driver(&virtio_pci_driver);
-
}
-
-
module_exit(virtio_pci_exit);
下面来看看virtqueue,在virtio的机制中,前端和后端通过virtqueue来进行数据交换,virtqueue的初始化通过config->find_vqs来进行
-
static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-
struct virtqueue *vqs[],
-
vq_callback_t *callbacks[],
-
const char *names[])
-
{
-
int err;
-
-
/* Try MSI-X with one vector per queue. */
-
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
true,
true);
-
if (!err)
-
return
0;
-
/* Fallback: MSI-X with one vector for config, one shared for queues. */
-
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-
true,
false);
-
if (!err)
-
return
0;
-
/* Finally fall back to regular interrupts. */
-
return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-
false,
false);
-
}
1. 如果没有开启msix模式,则调用vp_request_intx申请一个中断,中断处理函数是vp_interrupt
-
if (!use_msix) {
-
/* Old style: one normal interrupt for change and all vqs. */
-
err = vp_request_intx(vdev);
-
if (err)
-
goto error_request;
-
}
else {
vp_interrupt实际调用的是vp_vring_interrupt(配置变更的中断除外)
-
static irqreturn_t vp_interrupt(int irq, void *opaque)
-
{
-
struct virtio_pci_device *vp_dev = opaque;
-
u8 isr;
-
-
/* reading the ISR has the effect of also clearing it so it's very
-
* important to save off the value. */
-
isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
-
-
/* It's definitely not us if the ISR was not high */
-
if (!isr)
-
return IRQ_NONE;
-
-
/* Configuration change? Tell driver if it wants to know. */
-
if (isr & VIRTIO_PCI_ISR_CONFIG)
-
vp_config_changed(irq, opaque);
-
-
return vp_vring_interrupt(irq, opaque);
-
}
-
irqreturn_t vring_interrupt(
int irq,
void *_vq)
-
{
-
struct vring_virtqueue *vq = to_vvq(_vq);
-
-
if (!more_used(vq)) {
-
pr_debug(
"virtqueue interrupt with no work for %p\n", vq);
-
return IRQ_NONE;
-
}
-
-
if (unlikely(vq->broken))
-
return IRQ_HANDLED;
-
-
pr_debug(
"virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
-
if (vq->vq.callback)
-
vq->vq.callback(&vq->vq);
-
-
return IRQ_HANDLED;
-
}
2. 开启了msix模式,还要区分不同的模式,要么是所有virtqueue共享一个中断,要么是每个virtqueue独立一个中断,无论是哪种模式,都需要调用vp_request_msix_vectors去申请irq中断资源。还要对每个virtqueue,调用setup_vq来完成初始化
vp_request_msix_vectors用于申请nvectors个中断,其中至少有一个config changed中断,处理函数为vp_config_changed,其余如果是共享模式,则所有队列共享一个msix中断,中断处理函数是vp_vring_interrupt
-
}
else {
-
if (per_vq_vectors) {
-
/* Best option: one for change interrupt, one per vq. */
-
nvectors =
1;
-
for (i =
0; i < nvqs; ++i)
-
if (callbacks[i])
-
++nvectors;
-
}
else {
-
/* Second best: one for change, shared for all vqs. */
-
nvectors =
2;
-
}
-
-
err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
-
if (err)
-
goto error_request;
-
}
-
vp_dev->per_vq_vectors = per_vq_vectors;
-
allocated_vectors = vp_dev->msix_used_vectors;
-
for (i =
0; i < nvqs; ++i) {
-
if (!callbacks[i] || !vp_dev->msix_enabled)
-
msix_vec = VIRTIO_MSI_NO_VECTOR;
-
else
if (vp_dev->per_vq_vectors)
-
msix_vec = allocated_vectors++;
-
else
-
msix_vec = VP_MSIX_VQ_VECTOR;
-
vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
-
if (IS_ERR(vqs[i])) {
-
err = PTR_ERR(vqs[i]);
-
goto error_find;
-
}
-
-
if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
-
continue;
-
-
/* allocate per-vq irq if available and necessary */
-
snprintf(vp_dev->msix_names[msix_vec],
-
sizeof *vp_dev->msix_names,
-
"%s-%s",
-
dev_name(&vp_dev->vdev.dev), names[i]);
-
err = request_irq(vp_dev->msix_entries[msix_vec].
vector,
-
vring_interrupt,
0,
-
vp_dev->msix_names[msix_vec],
-
vqs[i]);
-
if (err) {
-
vp_del_vq(vqs[i]);
-
goto error_find;
-
}
-
}
-
return
0;
-
static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
-
void (*callback)
(struct virtqueue *vq),
-
const char *name,
-
u16 msix_vec)
-
{
-
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
struct virtio_pci_vq_info *info;
-
struct virtqueue *vq;
-
unsigned
long flags, size;
-
u16 num;
-
int err;
-
-
/* Select the queue we're interested in */
/* 把要配置的queue的index写入配置空间地址 */
-
iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
-
/* Check if queue is either not available or already active. */
/* num=0说明queue不可用,否则说明地址非空,已经被占用了 */
-
num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
-
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
-
return ERR_PTR(-ENOENT);
-
-
/* allocate and fill out our structure the represents an active
-
* queue */
-
info = kmalloc(
sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
-
if (!info)
-
return ERR_PTR(-ENOMEM);
-
-
info->queue_index = index;
/* 队列index */
-
info->num = num;
/* vring size, vring_desc个数 */
-
info->msix_vector = msix_vec;
-
-
size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
-
info->
queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
/* vring分配空间 */
-
if (info->
queue ==
NULL) {
-
err = -ENOMEM;
-
goto out_info;
-
}
-
-
/* activate the queue */
/* 把vring的地址写入pci配置空间,触发trap使得qemu可以通知到 */
-
iowrite32(virt_to_phys(info->
queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
-
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
-
-
/* create the vring */
/* 创建vring_virqueue,把vring封装在virtqueue里面 */
-
vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-
vdev, info->
queue, vp_notify, callback, name);
-
if (!vq) {
-
err = -ENOMEM;
-
goto out_activate_queue;
-
}
-
-
vq->priv = info;
/* virtqueue->priv指向virtio_pci_vq_info */
-
info->vq = vq;
/* virtio_pci_vq_info->vq指向新创建的virtqueue */
-
-
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-
iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-
msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
-
err = -EBUSY;
-
goto out_assign;
-
}
-
}
-
-
spin_lock_irqsave(&vp_dev->lock, flags);
-
list_add(&info->node, &vp_dev->virtqueues);
-
spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-
return vq;
-
-
out_assign:
-
vring_del_virtqueue(vq);
-
out_activate_queue:
-
iowrite32(
0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
-
free_pages_exact(info->
queue, size);
-
out_info:
-
kfree(info);
-
return ERR_PTR(err);
-
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。