赞
踩
virtio设备可以基于不同总线来实现,本文介绍基于pci实现的virtio-pci设备。以virtio-blk为例,首先介绍PCI配置空间内容,virtio-pci实现的硬件基础——capability,最后分析PIC设备的初始化以及virtio-pci设备的初始化。
capabilities pointer
字段(0x34)存放了附加寄存器组的起始地址。这里的地址表示附加空间在pci设备空间内的偏移status
字段的capabilities list
bit标记自己在64字节预定义配置空间之后有附加的寄存器组,capabilities pointer
会存放寄存器组链表的头部指针,这里的指针代表寄存器在配置空间内的偏移capabilities list
格式如下,第1个字节存放capability ID,标识后面配置空间实现的是哪种capability,第2个字节存放下一个capability的地址。capability ID查阅参见pci spec3.0 附录H。virtio-blk实现的capability有两种,一种是MSI-X( Message Signaled Interrupts - Extension),ID为0x11,一种是Vendor Specific,ID为0x9,后面一种capability设计目的就是让厂商实现自己的功能。virtio-blk的实现以此为基础capability
布局,左边是每个capability
指向的物理地址空间布局。virtio-pci设备的初始化,前后端通知,数据传递等核心功能,就在这5个capability中实现virtio_pci_common_cfg
,它是virtio前后端沟通的主要桥梁,common config分两部分,第一部分用于设备配置,第二部分用于virtqueue使用。virtio驱动初始化利用第一部分来和后端进行沟通协商,比如支持的特性(guest_feature),初始化时设备的状态(device_status),设备的virtqueue个数(num_queues)。第二部分用来实现前后段数据传输。后面会详细提到两部分在virtio初始化和数据传输中的作用。virtio_pci_common_cfg
数据结构如下bus/slot/function
地址,然后通过0XCFC的IO空间读取或者写入数据。地址空间0XCF8的初始化发生在pci_arch_init
里面。pci_subsys_init
x86_init.pci.init => x86_default_pci_init
pci_legacy_init
pcibios_scan_root
x86_pci_root_bus_resources // 为Host bridge分配资源,通常情况下就是64K IO空间地址和内存空间地址就在这里划分
pci_scan_root_bus // 枚举总线树上的设备
pci_create_root_bus // 创建Host bridge
pci_scan_child_bus // 扫描总线树上所有设备,如果有pci桥,递归扫描下去
pci_scan_slot
pci_scan_single_device // 扫描设备,读取vendor id和device id
pci_scan_device
pci_setup_device
pci_read_bases
__pci_read_base // 读取bar空间大小
/*
* Resources are tree-like, allowing
* nesting etc..
*/
struct resource {
resource_size_t start;
resource_size_t end;
const char *name;
unsigned long flags;
unsigned long desc;
struct resource *parent, *sibling, *child;
};
resource代表一个资源,可以是一段IO地址区间,或者Mem地址区间,总线树上每枚举一个设备,Host bridge就根据设备的BAR空间大小分配合适的资源给这个PCI设备用,这里的资源就是IO或者内存空间的物理地址。PCI设备BAR寄存器的值就是从这里申请得来的。申请的流程如下
pci_read_bases
/* 遍历每个BAR寄存器,读取其内容,并为其申请物理地址空间 */
for (pos = 0; pos < howmany; pos++) {
struct resource *res = &dev->resource[pos]; // 申请的地址空间放在这里面
reg = PCI_BASE_ADDRESS_0 + (pos << 2);
pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
}
region.start = l64;
region.end = l64 + sz64;
/* 申请资源,将申请到的资源放在res中, region存放PCI设备BAR空间区间 */
pcibios_bus_to_resource(dev->bus, res, ®ion);
分析资源申请函数,它首先取出PCI设备所在的Host bridge,pci_host_bridge.windows链表维护了Host bridge管理的所有资源,遍历其windows成员链表,找到合适的区间,然后分给PCI设备。
至此,PCI设备有了PCI域的物理地址,当扫描结束后,内核会逐一为这些PCI设备配置这个物理地址
/* * The pci_dev structure is used to describe PCI devices. */ struct pci_dev { struct list_head bus_list; /* node in per-bus list */ struct pci_bus *bus; /* bus this device is on */ struct pci_bus *subordinate; /* bus this device bridges to */ void *sysdata; /* hook for sys-specific extension */ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ struct pci_slot *slot; /* Physical slot this device is in */ unsigned int devfn; /* encoded device & function index */ unsigned short vendor; unsigned short device; unsigned short subsystem_vendor; unsigned short subsystem_device; unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ unsigned long *dma_alias_mask;/* mask of enabled devfn aliases */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change this if your device has broken DMA or supports 64-bit transfers. */ ...... }
BAR
寄存器写入分配到的地址空间起始值,完成配置。流程如下:pci_subsys_init
pcibios_resource_survey
pcibios_allocate_bus_resources(&pci_root_buses); // 首先将整个资源按照总线再分成一段段空间
pcibios_allocate_resources(0); // 检查资源是否统一并且不冲突
pcibios_allocate_resources(1);
pcibios_assign_resources(); // 写入地址到BAR寄存器
pci_assign_resource
_pci_assign_resource
__pci_assign_resource
pci_bus_alloc_resource
pci_update_resource
pci_std_update_resource
pci_write_config_dword(dev, reg, new) // 往BAR寄存器写入起始地址
BAR
寄存器中写入的地址,乍一看就是系统的物理地址,但实际上,它与CPU域的物理地址有所不同,它是PCI域的物理地址。两个域的地址需要通过Host bridge的转换。只不过,X86上Host bridge偷懒了,直接采用了一一映射的方式。因此两个域的地址空间看起来一样。在别的结构上(PowerPC)这个地址不一样。
cat /proc/ioports
cat /proc/iomem
virsh qemu-monitor-command vm --hmp info pci
struct bus_type pci_bus_type = {
.name = "pci",
.match = pci_bus_match,
.uevent = pci_uevent,
.probe = pci_device_probe,
.remove = pci_device_remove,
......
};
pci_driver_init
bus_register(&pci_bus_type) // pci总线数据结构
priv->subsys.kobj.kset = bus_kset; // 指向代表顶层bus的kset
priv->devices_kset = kset_create_and_add("devices", NULL, &priv->subsys.kobj);
priv->drivers_kset = kset_create_and_add("drivers", NULL, &priv->subsys.kobj);
生成的pci目录如下:
创建的devices和drivers如下:
static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, ...... } module_pci_driver(virtio_pci_driver) pci_register_driver __pci_register_driver int __pci_register_driver(struct pci_driver *drv, struct module *owner, const char *mod_name) { /* initialize common driver fields */ drv->driver.name = drv->name; drv->driver.bus = &pci_bus_type; // 将驱动程序的总线指向了pci_bus_type drv->driver.owner = owner; drv->driver.mod_name = mod_name; drv->driver.groups = drv->groups; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); /* register with core */ return driver_register(&drv->driver); // 向驱动核心注册 }
driver_register
driver_find // 查找是否总线上已存在相同驱动,防止重复注册
bus_add_driver
driver_create_file(drv, &driver_attr_uevent) // 在virtio-pci目录下创建uevent属性文件
add_bind_files(drv) // 在virtio-pci目录下创建bind/unbind属性文件
/* driver_attr_uevent 变量通过以下宏定义,其余driver属性文件类似 */
static DRIVER_ATTR_WO(uevent)
#define DRIVER_ATTR_WO(_name) \
struct driver_attribute driver_attr_##_name = __ATTR_WO(_name)
重点看unbind属性文件的创建,它为用户提供了卸载驱动的接口,当用户向unbind属性文件里面写入pci设备的地址时,内核会将该设备与其驱动解绑,相当于绑定的逆操作,对应的解邦操作函数unbind_store,如下:
/* Manually detach a device from its associated driver. */ static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) { struct bus_type *bus = bus_get(drv->bus); // 找到驱动所在总线 struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); // 通过buf中存放的设备名字找到其在内核中对应的device if (dev && dev->driver == drv) { // 确认设备的驱动就是自己 if (dev->parent) /* Needed for USB */ device_lock(dev->parent); device_release_driver(dev); // 解绑定!!! if (dev->parent) device_unlock(dev->parent); err = count; } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store) #define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct driver_attribute driver_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
回到virtio-pci驱动的注册流程都走完之后,sysfs中多了virtio-pci驱动的目录和属性文件,如下:
int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
void * data, int (*fn)(struct device_driver *, void *));
pci_bus_match pci_match_device pci_match_id pci_match_one_device static inline const struct pci_device_id * pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && (id->device == PCI_ANY_ID || id->device == dev->device) && (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && !((id->class ^ dev->class) & id->class_mask)) return id; return NULL; }
pci_match_one_device
函数中,第一个参数是设备驱动注册时硬编码的ID结构体,第二个参数是pci设备,当PCI驱动指定的ID为PCI_ANY_ID时,表示可以匹配任何的ID,查看virtio_pci_driver注册时设置的virtio_pci_id_table,如下,可以看到,驱动只设置了vendor id,所有只要vendor id为0x1af4,都可以match成功。在系统枚举PCI设备时,已经从PCI设备的配置空间中读到了vendor id。因此,如果是virtio设备,不论是哪一种,都可以成功绑定virtio-pci驱动static const struct pci_device_id virtio_pci_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
{ 0 }
};
#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4
#define PCI_DEVICE(vend,dev) \
.vendor = (vend), .device = (dev), \
.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
PCI总线match设备和驱动成功后,驱动程序核心会把device结构中的driver指针指向这个驱动程序,两者就联系起来,然后调用device_driver结构中的probe函数探测PCI设备。这里就是virtio_pci_driver指定的virtio_pci_probe函数。probe的主要动作包含:
virtio_pci_driver.probe
virtio_pci_probe
pci_enable_device
pci_enable_device
pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO) // 打开内存和IO访问权限
do_pci_enable_device
pcibios_enable_device
pci_enable_resources
pci_write_config_word(dev, PCI_COMMAND, cmd) //向command寄存器字段写1
cap探测入口在virtio_pci_modern_probe
,如果是传统模式,入口在virtio_pci_legacy_probe
,这里以modern probe为例
virtio_pci_probe
virtio_pci_modern_probe
virtio_pci_find_capability
pci_find_capability(dev, PCI_CAP_ID_VNDR)
pos = __pci_bus_find_cap_start // 判断入口点,如果是普通pci设备,返回0x34,这个地方存放cap链表的入口偏移
pos = __pci_find_next_cap // 依次搜索每一条cap,找到类型为PCI_CAP_ID_VNDR的第一个cap,返回它在配置空间的偏移
__pci_find_next_cap_ttl
函数首先通过pci_find_capability
查找类型为PCI_CAP_ID_VNDR(0x9)的capability bar位置,这是PCI规范中定义的扩展capability类型,在查找前首先确定capability在配置空间的位置入口,检查PCI设备是否实现capabilty,如果实现了,是普通设备或者pci桥,它在配置空间偏移0x34的地方,如果是Card Bus,它在配置空间偏移0x14的地方,找到capabitliy起始位置后依次查找链表上每个cap,直到找到PCI_CAP_ID_VNDR
类型的cap(检查cap空间type字段是否为PCI_CAP_ID_VNDR
),找到后返回cap在配置空间中的偏移。整个过程关键代码和示意图如下
/** * virtio_pci_find_capability - walk capabilities to find device info. * @dev: the pci device * @cfg_type: the VIRTIO_PCI_CAP_* value we seek * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO. * * Returns offset of the capability, or 0. */ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, u32 ioresource_types, int *bars) { int pos; /* 查找cap结构的在配置空间中的偏移地址 */ for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0; pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { u8 type, bar; /* 取出virtio_pci_cap数据结构中type成员的值 */ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, cfg_type), &type); /* 取出virtio_pci_cap数据结构中bar成员的值 */ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, bar), &bar); /* Ignore structures with reserved BAR values */ if (bar > 0x5) continue; /* 如果是我们想要的type,返回该cap在配置空间中的偏移 */ if (type == cfg_type) { if (pci_resource_len(dev, bar) && pci_resource_flags(dev, bar) & ioresource_types) { *bars |= (1 << bar); return pos; } } } return 0; }
/* This is the PCI capability header: */
struct virtio_pci_cap {
__u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
__u8 cap_next; /* Generic PCI field: next ptr. */
__u8 cap_len; /* Generic PCI field: capability length */
__u8 cfg_type; /* Identifies the structure. */
__u8 bar; /* Where to find it. */
__u8 padding[3]; /* Pad to full dword. */
__le32 offset; /* Offset within bar. */
__le32 length; /* Length of the structure, in bytes. */
};
virtio_blk_config
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
map_capability
将BAR空间映射到内核的虚拟地址空间(3G - 4G)vp_dev->common = map_capability(pci_dev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL);
vp_dev->device = map_capability(pci_dev, device, 0, 4,
0, PAGE_SIZE,
&vp_dev->device_len);
map_capability
pci_iomap_range(dev, bar, offset, length)
if (flags & IORESOURCE_IO) // 如果BAR空间实现的是IO空间,将其映射到CPU的IO地址空间
return __pci_ioport_map(dev, start, len);
if (flags & IORESOURCE_MEM) // 如果BAR空间实现的内存空间,将其映射到CPU的内存地址空间
return ioremap(start, len);
/* Again, we don't know how much we should map, but PAGE_SIZE * is more than enough for all existing devices. */ if (device) { vp_dev->device = map_capability(pci_dev, device, 0, 4, 0, PAGE_SIZE, &vp_dev->device_len); if (!vp_dev->device) goto err_map_device; vp_dev->vdev.config = &virtio_pci_config_ops; // 注册配置空间操作函数 } else { vp_dev->vdev.config = &virtio_pci_config_nodev_ops; } vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; // 注册virtqueue初始化函数 vp_dev->del_vq = del_vq;
register_virtio_device
函数向virtio总线注册设备,可以触发virtio总线上的match操作,然后进行virtio设备的探测,这里我们以virtio-blk设备为例,流程如下:virtio_pci_probe
pci_enable_device
virtio_pci_modern_probe
register_virtio_device
dev->dev.bus = &virtio_bus // 将virtio_device.dev.bus设置成virito总线!!!
dev->config->reset(dev) // 复位virtio设备
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE) // 设置设备状态为ACKNOWLEDGE,表示我们已经发现这个virtio设备
device_register(&dev->dev) // 向virtio总线注册设备,触发总线上的match操作
1af4:1041 network device (modern) 1af4:1042 block device (modern) 1af4:1043 console device (modern) 1af4:1044 entropy generator device (modern) 1af4:1045 balloon device (modern) 1af4:1048 SCSI host bus adapter device (modern) 1af4:1049 9p filesystem device (modern) 1af4:1050 virtio gpu device (modern) 1af4:1052 virtio input device (modern) legacy: #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 #define PCI_DEVICE_ID_VIRTIO_BLOCK 0x1001 #define PCI_DEVICE_ID_VIRTIO_BALLOON 0x1002 #define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003 #define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004 #define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 #define PCI_DEVICE_ID_VIRTIO_9P 0x1009 #define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 #define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 #define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014 #define PCI_DEVICE_ID_VIRTIO_MEM 0x1015
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。