当前位置:   article > 正文

4.2 APIC 虚拟化_读取 local apic register

读取 local apic register


每个逻辑处理器都有自己的local apic,当guest读取local apic寄存器时将返回物理cpu的值,当guest写时将写入物理寄存器。例如可以通过ICR寄存器向其他cpu发送IPI.Local APIC虚拟化有两种方法:

  (1) EPT机制管理MMIO 或MSR-bitmap方式

  (2) Intel VT 的virtual-APIC方式; 本节将重点讨论第二钟方式

 

4.2.1 Local APIC 模块初始化

(1) 用户空间初始化

 pc_new_cpu ==> apic_init(env,env->cpuid_apic_id) ==> qdev_create(NULL, "kvm-apic");

static TypeInfo kvm_apic_info= {

    .name = "kvm-apic",

    .parent = TYPE_APIC_COMMON,

    .instance_size = sizeof(APICCommonState),

    .class_init = kvm_apic_class_init,

};

static voidkvm_apic_init(APICCommonState *s) { //hw/apic.c

    memory_region_init_io(&s->io_memory,&kvm_apic_io_ops, s, "kvm-apic-msi",

                          MSI_SPACE_SIZE);

}

用于处理msi中断的case, 5.2节将讨论msi interrupt.

 

apic_init_common(hw\apic-common.c) ==> sysbus_create_simple("kvmvapic", -1, NULL);

vapic_init(hw\kvm-vapic.c==> memory_region_init_io(&s->io, &vapic_ops, s,"kvmvapic", 2);)

sysbus_add_io(dev, VAPIC_IO_PORT,&s->io);

sysbus_init_ioports(dev,VAPIC_IO_PORT, 2); //处理port 0x7E的操作

 

(2) 内核空间初始化

vmx_create_vcpu ==》 kvm_vcpu_init ==》 kvm_arch_vcpu_init==》 kvm_create_lapic

 a. 建立一个hrtimer,回调为apic_timer_fn

 b. apic_base 默认设为0xfee00000

 c. kvm_lapic_reset设置虚拟寄存器的default值

vmx_create_vcpu ==》

        if(vm_need_virtualize_apic_accesses(kvm)) { // flexpriority_enabled默认为1

       err = alloc_apic_access_page(kvm);

        }

alloc_apic_access_page:

    kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;

    kvm_userspace_mem.flags = 0;

    kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;

    kvm_userspace_mem.memory_size = PAGE_SIZE;

    r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); //单独用一个slot来管理apic accesspage

    page = gfn_to_page(kvm, 0xfee00);

    kvm->arch.apic_access_page = page;

 

(3) virtual-apic相关的VMCS寄存器设置

kvm_lapic_reset ==》

   if(kvm_vcpu_is_bsp(vcpu))

       kvm_lapic_set_base(vcpu,

              vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);

 kvm_lapic_set_base ==》 kvm_x86_ops->set_virtual_x2apic_mode

vmx_set_virtual_x2apic_mode(structkvm_vcpu *vcpu, bool set) {

        sec_exec_control =vmcs_read32(SECONDARY_VM_EXEC_CONTROL);

    if (set) {

       sec_exec_control &=~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;

       sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;

    } else {

       sec_exec_control &=~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;

       sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;

    }

    vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);

}

local apic两种访问方式的选择memory 和msr; 本节将只分析memory方式

当SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE设为1时,通过msr来访问800h-8ffhlocal apic register; SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES为1时将启用apic-access page.

 

setup_vmcs_config ==> 设置如下bit

    SECONDARY_EXEC_APIC_REGISTER_VIRT :启用virtual-page access访问local apic寄存器

    SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY: 对pending vinterrupt进行评估,允许后通过                                    guest-idt 提交

   CPU_BASED_VM_EXEC_CONTROL寄存器CPU_BASED_TPR_SHADOW设为1,当guest访问apic-accesspage 80h时将访问到vptr.

    PIN_BASED_VM_EXEC_CONTROL寄存器PIN_BASED_POSTED_INTR设为1,当处理器接收到通知的外部中断时不产生vm-exit,而是将post-interrupt descritpor复制到virtual-apic page内VIRR形成虚拟中短期内请求。

 

vmx_vcpu_reset ==>  

       a. kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);

       b. if(cpu_has_vmx_tpr_shadow()) {

       vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);

       if (vm_need_tpr_shadow(vmx->vcpu.kvm))

           vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,

                  __pa(vmx->vcpu.arch.apic->regs));

           vmcs_write32(TPR_THRESHOLD,0);

        }

       c.  vmcs_write64(APIC_ACCESS_ADDR,

             page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));

       virtual-apic page是apic-page的影子页面;当"virtual interruptdelivery"为1时80H,,B0H, VICR可以进行写访问,但不能读,其他寄存器读不可访问。

       引入apic-page让处理器自动监控local apic

       d. if(vmx_vm_has_apicv(vcpu->kvm))

              memset(&vmx->pi_desc,0, sizeof(struct pi_desc));

当guest以线性地址访问apic-access page时,实际访问的是virtual-apic page;当以guest-physical访问apic-access page时产生vm-exit;

 

4.2.2 Local APIC access VM-Exit及其处理

 (1) APIC_Base 的维护

由于host 的cpu和guest vcpu共享apic_base所以当vm-entry时需要重新恢复guestapic_base的值

  用户空间:VM-Entry: kvm_arch_put_registers  ==> kvm_put_apic  ==>

            kvm_put_apic_state(apic,&kapic);

            return kvm_vcpu_ioctl(env, KVM_SET_LAPIC,&kapic);

内核空间:kvm_arch_vcpu_ioctl  case KVM_SET_LAPIC

kvm_vcpu_ioctl_set_lapic  ==》 kvm_apic_post_state_restore==> kvm_lapic_set_base

 

(2) VM-Exit for apic access

读写apic-access page越界或跨寄存器边界时会产生vm-exit;guest 以pha访问时直接发生vm-exit;写入local apic version, isr, tmr, irr等寄存器时。此时发生apic-access vm-exit

    [EXIT_REASON_APIC_ACCESS]             = handle_apic_access, //访问apic-access page产生

 

static int handle_apic_access(structkvm_vcpu *vcpu)

{

    if (likely(fasteoi)) {

       unsigned long exit_qualification =vmcs_readl(EXIT_QUALIFICATION);

       int access_type, offset;

 

       access_type = exit_qualification & APIC_ACCESS_TYPE;

       offset = exit_qualification & APIC_ACCESS_OFFSET;//apic-page内偏移

       if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&

           (offset ==APIC_EOI)) {

           kvm_lapic_set_eoi(vcpu); // call apic_reg_write(vcpu->arch.apic,APIC_EOI, 0);

           skip_emulated_instruction(vcpu);

           return 1;

       }

    }

    return emulate_instruction(vcpu, 0) == EMULATE_DONE;

}

emulate_instruction 会调用到read_write_emulator_ops ==》

vcpu_mmio_write ==》kvm_iodevice_write ==》

static const structkvm_io_device_ops apic_mmio_ops = {

    .read     = apic_mmio_read,

    .write    = apic_mmio_write,

};

apic_mmio_write ==》apic_reg_write(apic, offset & 0xff0, val);

  static int apic_reg_write(struct kvm_lapic*apic, u32 reg, u32 val)

{  。。。。。。

    caseAPIC_ID:     /* Local APIC ID */

       if (!apic_x2apic_mode(apic))

           kvm_apic_set_id(apic, val >> 24);

    case APIC_EOI:

       apic_set_eoi(apic);

       break;

    。。。。。。

}

static inline voidkvm_apic_set_id(struct kvm_lapic *apic, u8 id)

{

    apic_set_reg(apic, APIC_ID, id << 24);

    recalculate_apic_map(apic->vcpu->kvm);

}

static inline voidapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)

{

    *((u32 *) (apic->regs + reg_off)) = val; //写入到virtual-apic page对应位置

}

 

 [EXIT_REASON_APIC_WRITE]              = handle_apic_write, //写入apic-access page

handle_apic_write ==> kvm_apic_write_nodecode(vcpu,offset);

voidkvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)

{

    u32 val = 0;

    offset &= 0xff0;

    apic_reg_read(vcpu->arch.apic, offset, 4, &val);

    apic_reg_write(vcpu->arch.apic, offset, val);

}

 

(3)主要寄存器的虚拟化

 1. EOI

static intapic_set_eoi(struct kvm_lapic *apic)

{

    int vector = apic_find_highest_isr(apic);

    .......

   

    apic_clear_isr(vector, apic); //call kvm_x86_ops->hwapic_isr_update(vcpu->kvm,

                        apic_find_highest_isr(apic)) = vmx_hwapic_isr_update

 

    apic_update_ppr(apic);

 

    kvm_ioapic_send_eoi(apic, vector);

    kvm_make_request(KVM_REQ_EVENT, apic->vcpu);

    return vector;

}

//设置VMCS guest intr 寄存器

static voidvmx_hwapic_isr_update(struct kvm *kvm, int isr)

{

    .......

    status = vmcs_read16(GUEST_INTR_STATUS);

    old = status >> 8;

    if (isr != old) {

       status &= 0xff;

       status |= isr << 8;

       vmcs_write16(GUEST_INTR_STATUS, status);

    }

}

GUEST_INTR_STATUS 该寄存器分为两个8bit, RVI记录最高优先级的 virtual-interrupt向量号,

SVI记录正在执行的virtual-interrutp向量号; EOI命令完成后将返回到之前被中断的服务继续执行, 因此新的SVI 等于之前被中断的服务向量号. apic_find_highest_isr返回之前的最优先的中断向量号

 

2 TPR

 apic_reg_write ==> case APIC_TASKPRI:

       report_tpr_access(apic, true);

       apic_set_tpr(apic, val & 0xff);

 

static void__report_tpr_access(struct kvm_lapic *apic, bool write)

{

    ......

    kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS,  vcpu);

    run->tpr_access.rip = kvm_rip_read(vcpu);

    run->tpr_access.is_write = write;

}

vcpu_enter_guest

    if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {

           vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;

           r = 0;

           goto out;

       }

回到用户态后会调用kvm_handle_tpr_access来处理vm-exit

 

static voidapic_set_tpr(struct kvm_lapic *apic, u32 tpr)

{

    apic_set_reg(apic, APIC_TASKPRI, tpr);

    apic_update_ppr(apic);

}

 

static voidapic_update_ppr(struct kvm_lapic *apic)

{

    ......

    old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);

    tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);

    isr = apic_find_highest_isr(apic);

    isrv = (isr != -1) ? isr : 0;

 

    if ((tpr & 0xf0) >= (isrv & 0xf0))

       ppr = tpr & 0xff;

    else

       ppr = isrv & 0xf0;

 

    if (old_ppr != ppr) {

       apic_set_reg(apic, APIC_PROCPRI, ppr);

       if (ppr < old_ppr)

           kvm_make_request(KVM_REQ_EVENT, apic->vcpu);

    }

}

ppr(process priorityregister)是只读寄存器,能触发它的是TPR, EOI和VM_Entry

其设置如下所示

if (VTPR[7:4] >= SVI[7:4]

  VPPR = VPTR & 0xFF;

else

 VPPR = SVI & 0XF0;

VPPR[31:8] = 0;

 

3. Self-IPI

apic_reg_write ==>      

       apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));

       apic_send_ipi(apic);

static voidapic_send_ipi(struct kvm_lapic *apic)

{

    u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);

    u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);

    struct kvm_lapic_irq irq;

 

    irq.vector = icr_low & APIC_VECTOR_MASK;

    irq.delivery_mode = icr_low & APIC_MODE_MASK;

    irq.dest_mode = icr_low & APIC_DEST_MASK;

    irq.level = icr_low & APIC_INT_ASSERT;

    irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;

    irq.shorthand = icr_low & APIC_SHORT_MASK;

    if (apic_x2apic_mode(apic))

       irq.dest_id = icr_high;

    else

       irq.dest_id = GET_APIC_DEST_FIELD(icr_high);

 

    trace_kvm_apic_ipi(icr_low, irq.dest_id);

    kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq,NULL);

}

调用kvm_irq_delivery_to_apic完成irq的delivery, 下一节将分析该过程。

 

4.2.3  虚拟中断delivery

intkvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,

       struct kvm_lapic_irq *irq, unsigned long *dest_map)

{

    int i, r = -1;

    struct kvm_vcpu *vcpu, *lowest = NULL;

 

    if (irq->dest_mode == 0 && irq->dest_id == 0xff&&

           kvm_is_dm_lowest_prio(irq)) {

       irq->delivery_mode = APIC_DM_FIXED;

    }

 

    if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r,dest_map))

       return r;

 

    kvm_for_each_vcpu(i, vcpu, kvm) {

       if (!kvm_apic_present(vcpu)) //LAPIC enable

           continue;

 

       if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,

                  irq->dest_id, irq->dest_mode)) //根据dest查找vcpu

           continue;

 

       if (!kvm_is_dm_lowest_prio(irq)) {

           if (r < 0)

              r = 0;

           r += kvm_apic_set_irq(vcpu, irq, dest_map);

       } else if (kvm_lapic_enabled(vcpu)) {

           if (!lowest)

              lowest = vcpu;

           else if (kvm_apic_compare_prio(vcpu, lowest) < 0)

              lowest = vcpu;

       }

    }

 

    if (lowest)

       r = kvm_apic_set_irq(lowest, irq, dest_map);

 

    return r;

}

 

kvm_irq_delivery_to_apic_fast对于SELF-IPI立即调用kvm_apic_set_irq(src->vcpu,irq, dest_map);而不需要搜素了。否则则遍历vcpu, 接着判断virtual interrupt是否允许,允许条件如下:  RVI[7:4] > VPPR[7:4]

 

int kvm_apic_set_irq(structkvm_vcpu *vcpu, struct kvm_lapic_irq *irq,

       unsigned long *dest_map)

{

    struct kvm_lapic *apic = vcpu->arch.apic;

 

    return __apic_accept_irq(apic, irq->delivery_mode,irq->vector,

           irq->level, irq->trig_mode, dest_map);

}

 

__apic_accept_irq==> 本节分析2个case

    case APIC_DM_LOWEST:

       vcpu->arch.apic_arb_prio++;

    case APIC_DM_FIXED: // delivery 由vector指定的irq到targeprocess

      。。。。。。

       if (dest_map)

           __set_bit(vcpu->vcpu_id, dest_map);

 

       if (kvm_x86_ops->deliver_posted_interrupt)

           kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);

       else {

           apic_set_irr(vector, apic); //设置中断irr_pending = true

           kvm_make_request(KVM_REQ_EVENT, vcpu);

           kvm_vcpu_kick(vcpu);

       }

       break;

    case APIC_DM_STARTUP: //发送"start-up" IPI

       result = 1;

       apic->sipi_vector = vector;

       /* make sure sipi_vector is visible for the receiver */

       smp_wmb();

       set_bit(KVM_APIC_SIPI, &apic->pending_events);

       kvm_make_request(KVM_REQ_EVENT, vcpu);

       kvm_vcpu_kick(vcpu);

       break;

 

kvm_vcpu_kick(vcpu);让目标cpu调度执行

 

.deliver_posted_interrupt =vmx_deliver_posted_interrupt

static voidvmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)

{

    struct vcpu_vmx *vmx = to_vmx(vcpu);

    int r;

 

    if (pi_test_and_set_pir(vector, &vmx->pi_desc))

       return;

 

    r = pi_test_and_set_on(&vmx->pi_desc);

    kvm_make_request(KVM_REQ_EVENT, vcpu);

    if (!r && (vcpu->mode == IN_GUEST_MODE))

       apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),

              POSTED_INTR_VECTOR);

}

struct apic __read_mostly*apic = &apic_flat; //arch/x86/kernel/apic_flat_64.c

 

vcpu_enter_guest==>

if (kvm_check_request(KVM_REQ_EVENT,vcpu) || req_int_win) {

       kvm_apic_accept_events(vcpu);

       if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {

           r = 1;

           goto out;

       }

       if (inject_pending_event(vcpu, req_int_win) != 0)

           req_immediate_exit = true;

       /* enable NMI/IRQ window open exits if needed */

       else if (vcpu->arch.nmi_pending)

           kvm_x86_ops->enable_nmi_window(vcpu);

       else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)

           kvm_x86_ops->enable_irq_window(vcpu); //enable_irq_window注入中断

 

       if (kvm_lapic_enabled(vcpu)) {

           if (kvm_x86_ops->hwapic_irr_update)

              kvm_x86_ops->hwapic_irr_update(vcpu,

                  kvm_lapic_find_highest_irr(vcpu));

           update_cr8_intercept(vcpu);//64bit cpu支持CR8访问TPR

           kvm_lapic_sync_to_vapic(vcpu);

       }

}

kvm_cpu_has_injectable_intr==>kvm_apic_has_interrupt

判断是否有apic中断irr_pending 为true

 

voidkvm_apic_accept_events(struct kvm_vcpu *vcpu)

{

    ......

    pe = xchg(&apic->pending_events, 0);

 

    if (test_bit(KVM_APIC_INIT, &pe)) {

       kvm_lapic_reset(vcpu);

       kvm_vcpu_reset(vcpu);

       if (kvm_vcpu_is_bsp(apic->vcpu))

           vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;

       else

           vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;

    }

    if (test_bit(KVM_APIC_SIPI, &pe) &&

        vcpu->arch.mp_state== KVM_MP_STATE_INIT_RECEIVED) {

        /* evaluatepending_events before reading the vector */

       smp_rmb();

       sipi_vector = apic->sipi_vector;

       pr_debug("vcpu %d received sipi with vector # %x\n",

            vcpu->vcpu_id,sipi_vector);

       kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);

       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;

    }

}

 

voidkvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)

{

    kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);

    cs.selector = vector << 8;

    cs.base = vector << 12;

    kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);

    kvm_rip_write(vcpu, 0); //更新RIP,到VM-Entry时将在新的rip执行

}

 

 

4.2.4 IOAPIC 虚拟化

(1) 初始化

kvm_ioapic_init ==>

a)  kvm_ioapic_reset

b)  kvm_iodevice_init(&ioapic->dev,&ioapic_mmio_ops);

c)  kvm_io_bus_register_dev(kvm,KVM_MMIO_BUS, ioapic->base_address,

IOAPIC_MEM_LENGTH,&ioapic->dev);

 

static voidkvm_ioapic_reset(struct kvm_ioapic *ioapic)

{

 

    for (i = 0; i < IOAPIC_NUM_PINS; i++)

       ioapic->redirtbl[i].fields.mask = 1;

    ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;

    ioapic->ioregsel = 0;

    ioapic->irr = 0;

    ioapic->id = 0;

    rtc_irq_eoi_tracking_reset(ioapic);

    update_handled_vectors(ioapic);

}

ioapic最重要的就是redirection table register.热redirtbl用于存储该寄存器. kvm_ioapic_redirect_entryredirtbl[IOAPIC_NUM_PINS];

unionkvm_ioapic_redirect_entry {

    u64 bits;

    struct {

       u8 vector;

       u8 delivery_mode:3;

       u8 dest_mode:1;

       u8 delivery_status:1;

       u8 polarity:1;

       u8 remote_irr:1;

       u8 trig_mode:1;

       u8 mask:1;

       u8 reserve:7;

       u8 reserved[4];

       u8 dest_id;

    } fields;

};

ioapic_mmio_ops负责虚拟化mmio操作

static const structkvm_io_device_ops ioapic_mmio_ops = {

    .read     =ioapic_mmio_read,

    .write    =ioapic_mmio_write,

};

 

kvm_vm_ioctl_set_irqchip  ==> case KVM_IRQCHIP_IOAPIC  ==>kvm_set_ioapic

  调用kvm_ioapic_inject_all处理pending irq,

 

(2) set_irq

kvm_ioapic_set_irq ==> ioapic_set_irq(ioapic,irq, irq_level, line_status);

static intioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,

       int irq_level, bool line_status)

{

    union kvm_ioapic_redirect_entry entry;

    u32 mask = 1 << irq;

    ......

    entry = ioapic->redirtbl[irq];

    edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);

    ........

 

    old_irr = ioapic->irr;

    ioapic->irr |= mask;

 

    ret = ioapic_service(ioapic, irq, line_status);

    .........

}

 

static intioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)

a. 根据ioapic->redirtbl[irq] 给struct kvm_lapic_irq irqe;初始化

    irqe.dest_id= entry->fields.dest_id;

    irqe.vector = entry->fields.vector;

    irqe.dest_mode = entry->fields.dest_mode;

    irqe.trig_mode = entry->fields.trig_mode;

    irqe.delivery_mode = entry->fields.delivery_mode << 8;

    irqe.level = 1;

    irqe.shorthand = 0;

b. 调用kvm_irq_delivery_to_apic(ioapic->kvm,NULL, &irqe, NULL); delivery中断

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/681109
推荐阅读
相关标签
  

闽ICP备14008679号