赞
踩
1、 怎么实现vhost_dev的VhostOps的vhost_set_vring_kick和vhost_set_vring_call; vhost_net kernel方式的vhost_set_vring_kick和vhost_set_vring_call依赖于/dev/vhost_net的ioctl。
有两种实现方式: 1、guest是server,dpdk vhost user是client 2、 guest是client,dpdk vhost user是server
VHOST_SET_VRING_CALL 和 VHOST_SET_VRING_KICK的实现有两种:
qemu vhost user: VhostOps user_ops
dpdk : vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX]
2、vhost-user怎么实现vhost_dev的const VhostOps *vhost_ops;
hw/virtio/vhost-backend.c:294:static const VhostOps kernel_ops = { hw/virtio/vhost-user.c:2357:const VhostOps user_ops = { include/hw/virtio/vhost-backend.h:175:extern const VhostOps user_ops;
3、 vhost-user怎么实现vhost_dev的struct vhost_virtqueue
4、 vhost_kernel_ioctl处理VHOST_SET_VRING_CAL和VHOST_SET_VRING_KICK
5、发送和接收
/*将count个报文从host转发给guest*/
uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
/*从guest接收count个报文,并存储到pkts中*/
uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
struct vhost_dev { MemoryListener memory_listener; /* MemoryListener是物理内存操作的回调函数集合 */ struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; struct vhost_virtqueue *vqs; /* vhost_virtqueue列表和个数 */ int nvqs; /* the first virtuque which would be used by this vhost dev */ int vq_index; unsigned long long features; /* vhost设备支持的features */ unsigned long long acked_features; /* guest acked的features */ unsigned long long backend_features; /* backend, e.g. tap设备,支持的features */ bool started; bool log_enabled; vhost_log_chunk_t *log; unsigned long long log_size; Error *migration_blocker; bool force; bool memory_changed; hwaddr mem_changed_start_addr; hwaddr mem_changed_end_addr; const VhostOps *vhost_ops; /* VhostOps基于kernel和user两种形态的vhost有不同的实现,内核的实现最终调用ioctl完成 */ void *opaque; };
static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_NONE] = NULL, [VHOST_USER_GET_FEATURES] = vhost_user_get_features, [VHOST_USER_SET_FEATURES] = vhost_user_set_features, [VHOST_USER_SET_OWNER] = vhost_user_set_owner, [VHOST_USER_RESET_OWNER] = vhost_user_reset_owner, [VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table, [VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base, [VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd, [VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num, [VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr, [VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base, [VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base, [VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick, [VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call, [VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err, [VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features, [VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features, [VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num, [VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable, [VHOST_USER_SEND_RARP] = vhost_user_send_rarp, [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu, [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd, [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg, [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, };
const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_backend_init, .vhost_backend_cleanup = vhost_user_backend_cleanup, .vhost_backend_memslots_limit = vhost_user_memslots_limit, .vhost_set_log_base = vhost_user_set_log_base, .vhost_set_mem_table = vhost_user_set_mem_table, .vhost_set_vring_addr = vhost_user_set_vring_addr, .vhost_set_vring_endian = vhost_user_set_vring_endian, .vhost_set_vring_num = vhost_user_set_vring_num, .vhost_set_vring_base = vhost_user_set_vring_base, .vhost_get_vring_base = vhost_user_get_vring_base, .vhost_set_vring_kick = vhost_user_set_vring_kick, .vhost_set_vring_call = vhost_user_set_vring_call, .vhost_set_features = vhost_user_set_features, .vhost_get_features = vhost_user_get_features, .vhost_set_owner = vhost_user_set_owner, .vhost_reset_device = vhost_user_reset_device, .vhost_get_vq_index = vhost_user_get_vq_index, .vhost_set_vring_enable = vhost_user_set_vring_enable, .vhost_requires_shm_log = vhost_user_requires_shm_log, .vhost_migration_done = vhost_user_migration_done, .vhost_backend_can_merge = vhost_user_can_merge, .vhost_net_set_mtu = vhost_user_net_set_mtu, .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, .vhost_get_config = vhost_user_get_config, .vhost_set_config = vhost_user_set_config, .vhost_crypto_create_session = vhost_user_crypto_create_session, .vhost_crypto_close_session = vhost_user_crypto_close_session, .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, };
The goal of vhost-user is to implement such a Virtio transport, staying as close as possible to the vhost paradigm of using shared memory, ioeventfds and irqfds. A UNIX domain socket based mechanism allows to set up the resources used by a number of Vrings shared between two userspace processes, which will be placed in shared memory. The mechanism also configures the necessary eventfds to signal when a Vring gets a kick event from either side.
Vhost-user has been implemented in QEMU via a set of patches, giving the option to pass any virtio_net Vrings directly to another userspace process, implementing a virtio_net backend outside QEMU. This way, direct Snabbswitch to a QEMU guest virtio_net communication can be realized.
QEMU already implements the vhost interface for a fast zero-copy guest to host kernel data path. Configuration of this interface relies on a series of ioctls that define the control plane. In this scenario, the QEMU network backend invoked is the “tap” netdev. A usual way to run it is:
$ qemu -netdev type=tap,script=/etc/kvm/kvm-ifup,id=net0,vhost=on \ -device virtio-net-pci,netdev=net0
The purpose of the vhost-user patches for QEMU is to provide the infrastructure and implementation of a user space vhost interface. The fundamental additions of this implementation are:
Added an option to -mem-path to allocate guest RAM as memory that can be shared with another process.
Use a Unix domain socket to communicate between QEMU and the user space vhost implementation.
The user space application will receive file descriptors for the pre-allocated shared guest RAM. It will directly access the related vrings in the guest's memory space.
Overall architecture of vhost-user
In the target implementation the vhost client is in QEMU. The target backend is Snabbswitch.
学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
领取,关注我持续更新哦! !
A version of QEMU patched with the latest vhost-user patches can be retrieved from the Virtual Open Systems repository at GitHub - virtualopensystems/qemu: A standard QEMU tree with KVM for ARM patches, branch vhost-user-v5.
To clone it:
$ git clone -b vhost-user-v5 https://github.com/virtualopensystems/qemu.git
Compilation is straightforward:
$ mkdir qemu/obj $ cd qemu/obj/ $ ../configure --target-list=x86_64-softmmu $ make -j
This will build QEMU as qemu/obj/x86_64-softmmu/qemu-system-x86_64.
To run QEMU with the vhost-user backend, one has to provide the named UNIX domain socked that needs to be already opened by the backend:
$ qemu -m 1024 -mem-path /hugetlbfs,prealloc=on,share=on \ -netdev type=vhost-user,id=net0,file=/path/to/socket \ -device virtio-net-pci,netdev=net0
Vhost库实现了一个用户空间virtio网络服务器,允许用户直接操作virtio。 换句话说,它允许用户通过VM virtio网络设备获取/发送数据包。 为了达到这个功能,一个vhost库需要实现:
访问guest内存:
对于QEMU,这是通过使用 -object memory-backend-file,share=on,...
选项实现的。 这意味着QEMU将创建一个文件作为guest RAM。 选项 share=on
允许另一个进程映射该文件,这意味着该进程可以访问这个guest RAM。
知道关于vring所有必要的信息:
诸如可用环形存储链表的存储空间。Vhost定义了一些消息(通过Unix套接字传递)来告诉后端所有需要知道如何操作vring的信息。
以下是一些关键的Vhost API函数概述:
rte_vhost_driver_register(path, flags)
此函数将vhost驱动程序注册到系统中。path
指定Unix套接字的文件路径。
当前支持的flags包括:
RTE_VHOST_USER_CLIENT
当使用该flag时,DPDK vhost-user 作为客户端。 请参阅以下说明。
RTE_VHOST_USER_NO_RECONNECT
当 DPDK vhost-user 作为客户端时,它将不断尝试连接到服务端(QEMU),知道成功。 这在以下两个情况中是非常有用的:
这个重新连接选项是默认启用的,但是,可以通过设置这个标志来关闭它。
RTE_VHOST_USER_DEQUEUE_ZERO_COPY
设置此flag时将启用出队了零复制。默认情况下是禁用的。
在设置此标志时,需要知道以下原则:
零拷贝对于小数据包(小于512)是不好的。
零拷贝对VM2VM情况比较好。对于两个虚拟机之间的ipref,提升性能可能高达70%(当TSO使能时).
对于VM2NIC情况,nb_tx_desc
必须足够小:如果未启动virtio间接特性则 <=64,否则 <= 128。
这是因为,当启用出队列零拷贝时,只有当相应的mbuf被释放时,客户端TX使用的vring才会被更新。 因此,nb_tx_desc必须足够小,以便PMD驱动程序将耗尽可用的TX描述符,并及时释放mbufs。 否则,guset TX vring将无mbuf使用。
Guest的内存应该使用应该使用huge page支持以获得更好的性能。最好使用1G大小的页面。
当启用出队零拷贝时,必须建立guest 物理地址和host物理地址之间的映射。 使用non-huge page则意味着更多的页面细分。 为了简单起见,DPDK vhost对这些段进行了线性搜索,因此,段越少,我们得到的映射就越快。 注意:将来我们可能使用树搜索来提升速度。
rte_vhost_driver_set_features(path, features)
此函数设置vhost-user驱动支持的功能位。 vhost-user驱动可以是vhost-user net,但也可以
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。