KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space

author Sam Bobroff <sam.bobroff@au1.ibm.com>

Wed, 25 Jul 2018 06:12:02 +0000 (16:12 +1000)

committer Paul Mackerras <paulus@ozlabs.org>

Thu, 26 Jul 2018 03:23:52 +0000 (13:23 +1000)
author Sam Bobroff <sam.bobroff@au1.ibm.com>
Wed, 25 Jul 2018 06:12:02 +0000 (16:12 +1000)
committer Paul Mackerras <paulus@ozlabs.org>
Thu, 26 Jul 2018 03:23:52 +0000 (13:23 +1000)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h

index 1f345a0b6ba20b24b5408f21099fb10f68e9ff82..83a9aa3cf689172648d234da87ec7d4e95add69d 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
  #define SPLIT_HACK_MASK                        0xff000000
  #define SPLIT_HACK_OFFS                        0xfb000000
  
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+       const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+       int stride = kvm->arch.emul_smt_mode;
+       int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+       u32 packed_id;
+
+       if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+               return 0;
+       packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+       if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+               return 0;
+       return packed_id;
+}
+
  #endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index d73b29b6aaa139dcd7d34b8be3a73b1f8a7eeea8..785245e09f3213aea006891036068fa5dd2a1eac 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1816,7 +1816,7 @@ static int threads_per_vcore(struct kvm *kvm)
         return threads_per_subcore;
  }
  
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
  {
         struct kvmppc_vcore *vcore;
  
@@ -1830,7 +1830,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
         init_swait_queue_head(&vcore->wq);
         vcore->preempt_tb = TB_NIL;
         vcore->lpcr = kvm->arch.lpcr;
-       vcore->first_vcpuid = core * kvm->arch.smt_mode;
+       vcore->first_vcpuid = id;
         vcore->kvm = kvm;
         INIT_LIST_HEAD(&vcore->preempt_list);
  
@@ -1989,10 +1989,16 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
                                                    unsigned int id)
  {
         struct kvm_vcpu *vcpu;
-       int err;
+       int err = -EINVAL;
         int core;
         struct kvmppc_vcore *vcore;
  
+       if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode) &&
+           cpu_has_feature(CPU_FTR_ARCH_300)) {
+               pr_devel("DNCI: VCPU ID too high\n");
+               goto out;
+       }
+
         err = -ENOMEM;
         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
         if (!vcpu)
@@ -2048,12 +2054,21 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
         mutex_lock(&kvm->lock);
         vcore = NULL;
         err = -EINVAL;
-       core = id / kvm->arch.smt_mode;
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               BUG_ON(kvm->arch.smt_mode != 1);
+               core = kvmppc_pack_vcpu_id(kvm, id);
+       } else {
+               core = id / kvm->arch.smt_mode;
+       }
         if (core < KVM_MAX_VCORES) {
                 vcore = kvm->arch.vcores[core];
-               if (!vcore) {
+               if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+                       pr_devel("KVM: collision on id %u", id);
+                       vcore = NULL;
+               } else if (!vcore) {
                         err = -ENOMEM;
-                       vcore = kvmppc_vcore_create(kvm, core);
+                       vcore = kvmppc_vcore_create(kvm,
+                                       id & ~(kvm->arch.smt_mode - 1));
                         kvm->arch.vcores[core] = vcore;
                         kvm->arch.online_vcores++;
                 }
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index f9818d7d3381d8c072605534202fb6c91b7d58e4..126f02b3ffb8271e0cac825e13ecf5146de0afe8 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
         return -EBUSY;
  }
  
+static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+       return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
  static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
                              struct kvmppc_xive_src_block *sb,
                              struct kvmppc_xive_irq_state *state)
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
          */
         if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
                 xive_native_configure_irq(hw_num,
-                                         xive->vp_base + state->act_server,
+                                         xive_vp(xive, state->act_server),
                                           MASKED, state->number);
                 /* set old_p so we can track if an H_EOI was done */
                 state->old_p = true;
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
          */
         if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
                 xive_native_configure_irq(hw_num,
-                                         xive->vp_base + state->act_server,
+                                         xive_vp(xive, state->act_server),
                                           state->act_priority, state->number);
                 /* If an EOI is needed, do it here */
                 if (!state->old_p)
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
         kvmppc_xive_select_irq(state, &hw_num, NULL);
  
         return xive_native_configure_irq(hw_num,
-                                        xive->vp_base + server,
+                                        xive_vp(xive, server),
                                          prio, state->number);
  }
  
@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
          * which is fine for a never started interrupt.
          */
         xive_native_configure_irq(hw_irq,
-                                 xive->vp_base + state->act_server,
+                                 xive_vp(xive, state->act_server),
                                   state->act_priority, state->number);
  
         /*
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
  
         /* Reconfigure the IPI */
         xive_native_configure_irq(state->ipi_number,
-                                 xive->vp_base + state->act_server,
+                                 xive_vp(xive, state->act_server),
                                   state->act_priority, state->number);
  
         /*
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
                 pr_devel("Duplicate !\n");
                 return -EEXIST;
         }
-       if (cpu >= KVM_MAX_VCPUS) {
+       if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
                 pr_devel("Out of bounds !\n");
                 return -EINVAL;
         }
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         xc->xive = xive;
         xc->vcpu = vcpu;
         xc->server_num = cpu;
-       xc->vp_id = xive->vp_base + cpu;
+       xc->vp_id = xive_vp(xive, cpu);
         xc->mfrr = 0xff;
         xc->valid = true;
author	Sam Bobroff <sam.bobroff@au1.ibm.com>
	Wed, 25 Jul 2018 06:12:02 +0000 (16:12 +1000)
committer	Paul Mackerras <paulus@ozlabs.org>
	Thu, 26 Jul 2018 03:23:52 +0000 (13:23 +1000)
arch/powerpc/include/asm/kvm_book3s.h		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_xive.c		patch \| blob \| history