赞
踩
net_init_vhost_user
,tap设备驱动初始化接口为net_init_tap
,不同驱动通过State保存初始化过程中的信息,对于vhost-user和tap设备,都通过vhost协议实现数据面卸载,他们都包含一个vhost_net字段,用来抽象vhost协议相关信息,其中最重要的是VhostOps数据结构,抽象了实现vhost协议的接口。对于tap设备,数据面可以被卸载到kernel,kernel_ops实现了卸载到kernel的vhost协议接口,对于dpdk网卡,数据面卸载到用户态dpdk,user_ops实现了vhost协议的接口。vhost user网卡在初始化时需要用到socket类型的字符设备,它的listener字段实现了socket编程的初始化接口,listener的初始化包括创建socket,绑定socket到server监听的路径。完成之后listener通过accept阻塞等待客户端的连接。VhostUserMsg
结构体。如下:/* vhost-user协议头部 */ typedef struct { VhostUserRequest request; /* 请求类型 */ uint32_t flags; /* 标记vhost msg是否属于后端reply消息以及是否需要reply消息 */ uint32_t size; /* the following payload size */ } QEMU_PACKED VhostUserHeader; /* vhost msg的payload,不同类型payload有不同的结构体 */ typedef union { uint64_t u64; struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; VhostUserConfig config; VhostUserCryptoSession session; VhostUserVringArea area; VhostUserInflight inflight; } VhostUserPayload; /* vhost msg结构 */ typedef struct VhostUserMsg { VhostUserHeader hdr; /* 头部 */ VhostUserPayload payload; /* 负载 */ } QEMU_PACKED VhostUserMsg;
typedef struct NetVhostUserState {
NetClientState nc; /* 描述网卡物理接口状态 */
CharBackend chr; /* only queue index 0 */
VhostUserState *vhost_user;
/* Qemu对vhost设备状态的通用抽象,vhost_net结构体的别名 */
VHostNetState *vhost_net;
guint watch;
/* 保存vhost-user设备停止时,vhost_dev中的acked_features字段 */
uint64_t acked_features;
/* 网卡是否已启动 */
bool started;
} NetVhostUserState;
struct vhost_dev { VirtIODevice *vdev; MemoryListener memory_listener; MemoryListener iommu_listener; struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; int n_tmp_sections; MemoryRegionSection *tmp_sections; struct vhost_virtqueue *vqs; int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; uint64_t features; uint64_t acked_features; uint64_t backend_features; uint64_t protocol_features; uint64_t max_queues; bool started; bool log_enabled; uint64_t log_size; Error *migration_blocker; const VhostOps *vhost_ops; void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; QLIST_HEAD(, vhost_iommu) iommu_list; IOMMUNotifier n; const VhostDevConfigOps *config_ops; }
struct vhost_net {
struct vhost_dev dev; /* 指向具体的vhost设备 */
struct vhost_virtqueue vqs[2];
int backend;
NetClientState *nc;
};
-chardev socket,id=charnet1,path=/run/openvswitch/vhu1,server
-netdev vhost-user,chardev=charnet1,queues=2,id=hostnet1
-device virtio-net-pci,mrg_rxbuf=on,mq=on,vectors=6,netdev=hostnet1,id=net1,mac=52:54:00:3f:8f:56,bus=pci.0,addr=0x4
virtio-net-pci
驱动,device的网卡设备可以指向不同实现的back-end,比如通过vhost协议卸载到backend的vhost网卡,或者不卸载情况下的virtio网卡。这里指向的是vhost网卡hostnet1 ,vhost网卡的driver为vhost-user
,它需要一个字符设备用来和slave端通信,这里指向的是charnet1,字符设备的driver是socket
,说明字符设备通过socket来实现和slave的通信,字符设备作为server端监听path属性描述的路径,等待客户端连接。qemu_create_early_backends
函数中完成,顾名思义,这个函数的主要工作是初始化一些其它设备的依赖或者backend。流程如下:qemu_init
qemu_create_early_backends
/* 分析每个字符设备的参数,调用chardev_init_func初始化 */
qemu_opts_foreach(qemu_find_opts("chardev"),
chardev_init_func, NULL, &error_fatal);
chardev_init_func
qemu_chr_new_from_opts(opts, NULL, &local_err)
qemu_chardev_new
chardev_new
/* 打开字符设备,对于socket字符设备,就是打开socket的路径并监听 */
qemu_char_open
/* 如果字符设备已经打开,调用event事件注册的回调函数 */
qemu_chr_be_event
qmp_chardev_open_socket
,过程如下:qmp_chardev_open_socket
/* 如果socket字符设备是server端,则负责监听socket */
if (s->is_listen) {
/* 打开socket字符设备 */
qmp_chardev_open_socket_server(chr, is_telnet || is_tn3270,
is_waitconnect, errp) < 0)
/* 如果这个socket字符设备等待客户端连接 */
if (is_waitconnect)
/* 同步等待 */
tcp_chr_accept_server_sync(chr);
} else {
/* socket字符设备是client端 */
qmp_chardev_open_socket_client(chr, reconnect, errp) < 0)
}
tcp_chr_accept_server_sync
主要工作是等待客户端连接,建立连接后取消监听,然后调用字符设备注册的event事件回调。static void tcp_chr_accept_server_sync(Chardev *chr)
{
SocketChardev *s = SOCKET_CHARDEV(chr);
QIOChannelSocket *sioc;
/* 设置字符设备的状态为连接中 */
tcp_chr_change_state(s, TCP_CHARDEV_STATE_CONNECTING);
/* 同步等待,建立连接后获取客户端的fd并保存到sioc中 */
sioc = qio_net_listener_wait_client(s->listener);
tcp_chr_set_client_ioc_name(chr, sioc);
/* 调用event时间回调 */
tcp_chr_new_client(chr, sioc);
}
qio_net_listener_wait_client
的实现:QIOChannelSocket *qio_net_listener_wait_client(QIONetListener *listener) { /* 创建GLibc时间循环上下文*/ GMainContext *ctxt = g_main_context_new(); GMainLoop *loop = g_main_loop_new(ctxt, TRUE); GSource **sources; ...... sources = g_new0(GSource *, listener->nsioc); for (i = 0; i < listener->nsioc; i++) { /* 将创建的server端的socket fd封装成事件源 */ sources[i] = qio_channel_create_watch(QIO_CHANNEL(listener->sioc[i]), G_IO_IN); /* 设置事件源发生后的回调函数qio_net_listener_wait_client_func */ g_source_set_callback(sources[i], (GSourceFunc)qio_net_listener_wait_client_func, &data, NULL); /* 将时间源添加到事件循环上下文 */ g_source_attach(sources[i], ctxt); } /* 阻塞等待事件源发生 */ g_main_loop_run(loop); /* 事件源触发,qio_net_listener_wait_client_func函数执行完成 */ /* 注销事件源 */ for (i = 0; i < listener->nsioc; i++) { g_source_unref(sources[i]); } g_free(sources); g_main_loop_unref(loop); g_main_context_unref(ctxt); ...... /* 将客户端fd封装到sioc中返回 */ return data.sioc; }
tcp_chr_new_client
tcp_chr_connect
/* 设置字符设备的状态为已连接 */
tcp_chr_change_state(s, TCP_CHARDEV_STATE_CONNECTED)
/* 调用event事件注册的回调net_vhost_user_event */
qemu_chr_be_event(chr, CHR_EVENT_OPENED);
chr_be_event
/* 在这个阶段,因为vhost-user网卡设备还没有初始化,没有注册event的回调。因此直接返回 */
be->chr_event
NetClientState
结构,通过next域链接到全局链表net_clients
,通过netdev->id区分每一个网卡设备,初始化的工作就是构造NetClientState
对象并执行一切必要的操作,比如netdev指向一个chardev设备,则需要进行socket连接的相关准备。网卡设备的初始化在字符设备初始化后,在主线程的qemu_create_late_backends
函数中完成,如下:qemu_init
qemu_create_late_backends
net_init_clients
/* 针对每个网卡设备调用初始化函数net_init_netdev */
qemu_opts_foreach(qemu_find_opts("netdev"),
net_init_netdev, NULL, errp))
net_client_init
net_client_init1
/* 根据网卡类型匹配初始化函数列表,调用对应的初始化函数
* 这里是vhost-user类型,调用net_init_vhost_user函数 */
net_client_init_fun[netdev->type](netdev, netdev->id, peer, errp) <=> net_init_vhost_user
net_vhost_user_init
net_vhost_user_init
函数:static int net_vhost_user_init(NetClientState *peer, const char *device, const char *name, Chardev *chr, int queues) { Error *err = NULL; NetClientState *nc, *nc0 = NULL; NetVhostUserState *s = NULL; VhostUserState *user; /* 初始化VhostUserState,将其与NetVhostUserState关联 */ user = g_new0(struct VhostUserState, 1); for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", i, chr->label); nc->queue_index = i; if (!nc0) { nc0 = nc; s = DO_UPCAST(NetVhostUserState, nc, nc); /* 关联后端设备 */ if (!qemu_chr_fe_init(&s->chr, chr, &err) || /* 设置vhost-user设备后端的字符设备 */ !vhost_user_init(user, &s->chr, &err)) { error_report_err(err); goto err; } } s = DO_UPCAST(NetVhostUserState, nc, nc); /* vhost-user设备作为vhost设备的后端实现 * 将NetVhostUserState与vhost-user设备关联 */ s->vhost_user = user; } s = DO_UPCAST(NetVhostUserState, nc, nc0); do { /* 等待客户端连接 */ if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) { error_report_err(err); goto err; } /* 注册字符设备event事件发生时的回调 * 对于socket字符设备,就是socket监听的fd事件发生时的回调 */ qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, NULL, nc0->name, NULL, true); qemu_chr_fe_set_handlers_full qemu_chr_be_event(s, CHR_EVENT_OPENED) /* 这个条件很重要,vhost-user设备被标记为已经启动,才会停止 */ } while (!s->started);
qemu_chr_fe_wait_connected
和逻辑就是调用socket字符设备的同步等待接口,直到客户端到来。qemu_chr_fe_wait_connected
qemu_chr_wait_connected
cc->chr_wait_connected <=> tcp_chr_wait_connected
/* 如果客户端还没有建立连接,继续等待 */
while (s->state != TCP_CHARDEV_STATE_CONNECTED) {
if (s->is_listen)
tcp_chr_accept_server_sync(chr);
}
net_vhost_user_event
函数:static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
switch (event) {
/* 对于打开连接,如果vhost-user设备没有启动,则启动设备
* 主要工作就是初始化vhost-user网络接口,就是基本原理中描述的初始化工作 */
case CHR_EVENT_OPENED:
if (vhost_user_start(queues, ncs, s->vhost_user) < 0) {
qemu_chr_fe_disconnect(&s->chr);
return;
}
s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
net_vhost_user_watch, s);
qmp_set_link(name, true, &err);
/* 标记vhost-user设备状态为启动 */
s->started = true;
break;
virtio-net-pci
设备的类型信息由virtio_net_pci_info
描述,通常的设备类型由TypeInfo定义,virtio-net-pci设备比较特殊,如下:static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = {
.base_name = TYPE_VIRTIO_NET_PCI,
.generic_name = "virtio-net-pci",
.transitional_name = "virtio-net-pci-transitional",
.non_transitional_name = "virtio-net-pci-non-transitional",
.instance_size = sizeof(VirtIONetPCI),
.instance_init = virtio_net_pci_instance_init,
.class_init = virtio_net_pci_class_init,
};
VirtioPCIDeviceTypeInfo
没有指定父类,则默认父类为TYPE_VIRTIO_PCI
。virtio_net_pci_instance_init
是在Qemu初始化virtio-net-pci
设备时调用的实例化函数,它将virtio-net-pci
的通用实例化函数设置成和TYPE_VIRTIO_NET
相同,如下:static void virtio_net_pci_instance_init(Object *obj)
{
VirtIONetPCI *dev = VIRTIO_NET_PCI(obj);
virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
TYPE_VIRTIO_NET);
......
}
TYPE_VIRTIO_NET
的父类为TYPE_VIRTIO_DEVICE
,关系如下:static const TypeInfo virtio_net_info = {
.name = TYPE_VIRTIO_NET,
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VirtIONet),
.instance_init = virtio_net_instance_init,
.class_init = virtio_net_class_init,
};
virtio_pci_common_write
开始分析:virtio_pci_common_write
switch (addr) {
case VIRTIO_PCI_COMMON_STATUS:
virtio_set_status(vdev, val & 0xFF);
k->set_status <=> virtio_net_set_status
virtio_net_vhost_status
vhost_net_start
vhost_net_start
函数:vhost_net_start
/* 针对设备的每个队列,找到其依附的vhost设备,使能该设备 */
for (i = 0; i < total_queues; i++) {
peer = qemu_get_peer(ncs, i);
vhost_net_start_one(get_vhost_net(peer), dev);
vhost_dev_start
if (peer->vring_enable)
/* 使能vRing */
vhost_set_vring_enable(peer, peer->vring_enable); /* VHOST_USER_SET_VRING_ENABLE */
}
vhost_dev_start
函数:vhost_dev_start /* 设置后端的特性 */ vhost_dev_set_features vhost_ops->vhost_set_features /* 搜集虚机的内存布局信息,传递到后端 */ vhost_ops->vhost_set_mem_table /* 使能每个队列 */ for (i = 0; i < hdev->nvqs; ++i) { r = vhost_virtqueue_start(hdev, vdev, hdev->vqs + i, hdev->vq_index + i); /* 获取description队列地址 */ virtio_queue_get_desc_addr /* 设置共享队列深度 */ vhost_ops->vhost_set_vring_num /* 设置共享队列基址 */ vhost_ops->vhost_set_vring_base vhost_ops->vhost_set_vring_kick vhost_ops->vhost_set_vring_call ...... }
driverctl set-override {phy_nic} vfio-pci
lspci | grep Eth | grep {phy_nic}
ovs-vsctl add-br {br_name} -- set bridge {br_name} datapath_type=netdev
ovs-vsctl add-port {br_name} {port_name} -- set Interface {port_name} type=dpdk options:dpdk-devargs={bus:device:function}
ovs-vsctl --may-exist add-port {br_name} {vm_port} -- set interface {vm_port} type=dpdkvhostuserclient options:vhost-server-path=/path/to/server.socket
/* 配置虚机使用大页 */
<memoryBacking>
<hugepages>
<page size='1048576' unit='KiB'/>
</hugepages>
</memoryBacking>
/* 配置虚机使用vhostuser网卡 */
<interface type='vhostuser'>
<mac address='52:54:00:55:bc:ee'/>
<source type='unix' path='/path/to/server.socket' mode='server'/>
<model type='virtio'/>
</interface>
#0 vhost_user_set_vring_call (dev=0x5618b79dbaa0, file=0x7fff05ce4b10) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/vhost-user.c:806 #1 0x00005618b5b2bb07 in vhost_virtqueue_init (n=<optimized out>, vq=0x5618b79dbd00, dev=0x5618b79dbaa0) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/vhost.c:1191 #2 vhost_dev_init (hdev=hdev@entry=0x5618b79dbaa0, opaque=<optimized out>, backend_type=<optimized out>, busyloop_timeout=0) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/vhost.c:1242 #3 0x00005618b5c70038 in vhost_net_init (options=options@entry=0x7fff05ce4bc0) at hw/net/vhost_net.c:176 #4 0x00005618b5cfbeb8 in vhost_user_start (be=0x7f37049b7010, ncs=0x7fff05ce4be0, queues=2) at net/vhost-user.c:88 #5 net_vhost_user_event (opaque=0x5618b79dc350, event=<optimized out>) at net/vhost-user.c:275 #6 0x00005618b5dbbf95 in qemu_chr_fe_set_handlers (b=b@entry=0x5618b79db7d8, fd_can_read=fd_can_read@entry=0x0, fd_read=fd_read@entry=0x0, fd_event=fd_event@entry=0x5618b5cfbd20 <net_vhost_user_event>, be_change=be_change@entry=0x0, opaque=<optimized out>, context=0x0, set_open=true) at chardev/char-fe.c:304 #7 0x00005618b5cfc602 in net_vhost_user_init (device=0x5618b5f76ebf "vhost_user", queues=<optimized out>, chr=<optimized out>, name=<optimized out>, peer=0x0) at net/vhost-user.c:351 #8 net_init_vhost_user (netdev=<optimized out>, name=<optimized out>, peer=0x0, errp=<optimized out>) at net/vhost-user.c:449 #9 0x00005618b5cf4e18 in net_client_init1 (object=0x5618b79df090, is_netdev=is_netdev@entry=true, errp=errp@entry=0x7fff05ce6e70) at net/net.c:1055 #10 0x00005618b5cf5522 in net_client_init (opts=<optimized out>, is_netdev=<optimized out>, errp=0x7fff05ce7030) at net/net.c:1161 #11 0x00005618b5e37d3a in qemu_opts_foreach (list=<optimized out>, func=func@entry=0x5618b5cf55c0 <net_init_netdev>, opaque=opaque@entry=0x0, errp=errp@entry=0x7fff05ce7030) at util/qemu-option.c:1170 #12 0x00005618b5cf67d2 in net_init_clients (errp=errp@entry=0x7fff05ce7030) at net/net.c:1549 #13 0x00005618b5b60dcf in qemu_init (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/softmmu/vl.c:4309 #14 0x00005618b5a6e759 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/softmmu/main.c:48
#0 vhost_user_set_mem_table (dev=0x55c085afbaa0, mem=0x55c085cbbff0) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/vhost-user.c:545 #1 0x000055c084b36847 in vhost_dev_start (hdev=hdev@entry=0x55c085afbaa0, vdev=vdev@entry=0x55c0869f8f20) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/vhost.c:1631 #2 0x000055c084c7a69b in vhost_net_start_one (dev=0x55c0869f8f20, net=0x55c085afbaa0) at hw/net/vhost_net.c:236 #3 vhost_net_start (dev=dev@entry=0x55c0869f8f20, ncs=0x55c086a13090, total_queues=total_queues@entry=2) at hw/net/vhost_net.c:338 #4 0x000055c084b1355c in virtio_net_vhost_status (status=<optimized out>, n=0x55c0869f8f20) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/net/virtio-net.c:250 #5 virtio_net_set_status (vdev=0x55c0869f8f20, status=15 '\017') at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/net/virtio-net.c:331 #6 0x000055c084b2ed8b in virtio_set_status (vdev=vdev@entry=0x55c0869f8f20, val=<optimized out>) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/virtio.c:1956 #7 0x000055c084b2ef5b in virtio_vmstate_change (opaque=0x55c0869f8f20, running=1, state=<optimized out>) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/hw/virtio/virtio.c:3216 #8 0x000055c084b6611f in vm_state_notify (running=running@entry=1, state=state@entry=RUN_STATE_RUNNING) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/softmmu/vl.c:1284 #9 0x000055c084ac1ccd in vm_prepare_start () at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/cpus.c:2148 #10 0x000055c084ac1d19 in vm_start () at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/cpus.c:2154 #11 0x000055c084cf7942 in qmp_cont (errp=errp@entry=0x7ffc40419010) at monitor/qmp-cmds.c:160 #12 0x000055c084d1c0e2 in qmp_marshal_cont (args=<optimized out>, ret=<optimized out>, errp=0x7ffc40419058) at qapi/qapi-commands-misc.c:594 #13 0x000055c084de4a93 in qmp_dispatch (cmds=0x55c0854c34e0 <qmp_commands>, request=<optimized out>, allow_oob=<optimized out>) at qapi/qmp-dispatch.c:155 #14 0x000055c084cf4351 in monitor_qmp_dispatch (mon=0x55c085b1a210, req=<optimized out>) at monitor/qmp.c:145 #15 0x000055c084cf4b30 in monitor_qmp_bh_dispatcher (data=<optimized out>) at monitor/qmp.c:234 #16 0x000055c084e2c267 in aio_bh_call (bh=0x55c08595ea00) at util/async.c:136 #17 aio_bh_poll (ctx=ctx@entry=0x55c08595d560) at util/async.c:164 #18 0x000055c084e2f96e in aio_dispatch (ctx=0x55c08595d560) at util/aio-posix.c:380 #19 0x000055c084e2c14e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:306 #20 0x00007f0b575eaea4 in g_main_context_dispatch () from target:/usr/lib64/libglib-2.0.so.0 #21 0x000055c084e2ebaa in glib_pollfds_poll () at util/main-loop.c:219 #22 os_host_main_loop_wait (timeout=997000000) at util/main-loop.c:242 #23 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 #24 0x000055c084b669f1 in qemu_main_loop () at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/softmmu/vl.c:1710 #25 0x000055c084a7875e in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at /usr/src/debug/qemu-kvm-5.0.0-19.zy_2.0_3.cp3.ctl2.ctl2.x86_64/softmmu/main.c:49
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。