From: Michael Ellerman Date: Tue, 14 Feb 2017 06:18:29 +0000 (+1100) Subject: Merge branch 'topic/ppc-kvm' into next X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=da0e7e6276968fcc61ac7484d0026cd5fdd94dc3;p=openwrt%2Fstaging%2Fblogic.git Merge branch 'topic/ppc-kvm' into next Merge the topic branch we're sharing with the kvm-ppc tree. --- da0e7e6276968fcc61ac7484d0026cd5fdd94dc3 diff --cc arch/powerpc/include/asm/head-64.h index a475711cd9c3,9bd81619d090..5067048daad4 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@@ -223,8 -217,8 +223,8 @@@ name FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define TRAMP_KVM_BEGIN(name) \ +#define TRAMP_KVM_BEGIN(name) \ - TRAMP_REAL_BEGIN(name) + TRAMP_VIRT_BEGIN(name) #else #define TRAMP_KVM_BEGIN(name) #endif diff --cc arch/powerpc/include/asm/hvcall.h index 490c4b9f4e3a,54d11b3a6bf7..3cc12a86ef5d --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@@ -276,8 -276,7 +276,9 @@@ #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C #define H_CLEAR_HPT 0x358 +#define H_RESIZE_HPT_PREPARE 0x36C +#define H_RESIZE_HPT_COMMIT 0x370 + #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 #define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET diff --cc arch/powerpc/include/asm/opal.h index 08ddea966601,16efe7406776..1ff03a6da76e --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@@ -228,11 -225,6 +225,7 @@@ int64_t opal_int_set_mfrr(uint32_t cpu int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); - int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, - uint32_t pe_num, uint32_t tce_size, - uint64_t dma_addr, uint32_t npages); +int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --cc arch/powerpc/include/asm/prom.h index 00fcfcbdd053,8af2546ea593..2c8001cc93b6 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@@ -151,11 -153,17 +153,18 @@@ struct of_drconf_cell #define OV5_XCMO 0x0440 /* Page Coalescing */ #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ +#define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */ - #define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ - #define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ - #define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ - #define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ + #define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */ + #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ + #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ + #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ + #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ + #define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ + #define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ + #define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ + #define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ + #define OV5_MMU_SLB 0x1800 /* always use SLB */ + #define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --cc arch/powerpc/kernel/exceptions-64s.S index a6205a4a3574,76dd7738c122..857bf7c5b946 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@@ -720,14 -720,10 +720,10 @@@ hardware_interrupt_hv FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) - do_kvm_0x500: - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -EXC_REAL_END(hardware_interrupt, 0x500, 0x600) +EXC_REAL_END(hardware_interrupt, 0x500, 0x100) -EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION @@@ -735,8 -731,10 +731,10 @@@ FTR_SECTION_ELSE _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) + TRAMP_KVM(PACA_EXGEN, 0x500) + TRAMP_KVM_HV(PACA_EXGEN, 0x500) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) @@@ -884,35 -907,15 +907,15 @@@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE b system_call_common ; #endif -EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) +EXC_REAL_BEGIN(system_call, 0xc00, 0x100) - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST_PR(0xc00) - GET_SCRATCH0(r13) - #else - HMT_MEDIUM; - #endif + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID SYSCALL_PSERIES_3 -EXC_REAL_END(system_call, 0xc00, 0xd00) +EXC_REAL_END(system_call, 0xc00, 0x100) -EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) +EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) - HMT_MEDIUM + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_DIRECT SYSCALL_PSERIES_3 @@@ -926,8 -929,8 +929,8 @@@ EXC_VIRT(single_step, 0x4d00, 0x100, 0x TRAMP_KVM(PACA_EXGEN, 0xd00) EXC_COMMON(single_step_common, 0xd00, single_step_exception) -EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) -EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x4e20, 0xe00) +EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20) - EXC_VIRT_NONE(0x4e00, 0x20) ++EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) EXC_COMMON_BEGIN(h_data_storage_common) mfspr r10,SPRN_HDAR @@@ -942,8 -945,8 +945,8 @@@ b ret_from_except -EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) -EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x4e40, 0xe20) +EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20) - EXC_VIRT_NONE(0x4e20, 0x20) ++EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20) TRAMP_KVM_HV(PACA_EXGEN, 0xe20) EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) diff --cc arch/powerpc/mm/init-common.c index f2108c40e697,2be5dc242832..eb8c6c8c4851 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@@ -78,15 -79,11 +79,15 @@@ void pgtable_cache_add(unsigned shift, align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); new = kmem_cache_create(name, table_size, align, 0, ctor); + if (!new) + panic("Could not allocate pgtable cache for order %d", shift); + kfree(name); pgtable_cache[shift - 1] = new; + pr_debug("Allocated pgtable cache for order %d\n", shift); } - + EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ void pgtable_cache_init(void) { diff --cc arch/powerpc/platforms/powernv/opal-wrappers.S index 63fe1b2b1175,28799e557348..6693f75e93d1 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@@ -304,12 -296,7 +296,8 @@@ OPAL_CALL(opal_pci_get_presence_state OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); - OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); - OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); - OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); - OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); diff --cc arch/powerpc/platforms/pseries/lpar.c index c2e13a51f369,0587655aea69..251060cf1713 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@@ -611,112 -609,29 +611,135 @@@ static int __init disable_bulk_remove(c __setup("bulk_remove=", disable_bulk_remove); +#define HPT_RESIZE_TIMEOUT 10000 /* ms */ + +struct hpt_resize_state { + unsigned long shift; + int commit_rc; +}; + +static int pseries_lpar_resize_hpt_commit(void *data) +{ + struct hpt_resize_state *state = data; + + state->commit_rc = plpar_resize_hpt_commit(0, state->shift); + if (state->commit_rc != H_SUCCESS) + return -EIO; + + /* Hypervisor has transitioned the HTAB, update our globals */ + ppc64_pft_size = state->shift; + htab_size_bytes = 1UL << ppc64_pft_size; + htab_hash_mask = (htab_size_bytes >> 7) - 1; + + return 0; +} + +/* Must be called in user context */ +static int pseries_lpar_resize_hpt(unsigned long shift) +{ + struct hpt_resize_state state = { + .shift = shift, + .commit_rc = H_FUNCTION, + }; + unsigned int delay, total_delay = 0; + int rc; + ktime_t t0, t1, t2; + + might_sleep(); + + if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + return -ENODEV; + + printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n", + shift); + + t0 = ktime_get(); + + rc = plpar_resize_hpt_prepare(0, shift); + while (H_IS_LONG_BUSY(rc)) { + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > HPT_RESIZE_TIMEOUT) { + /* prepare with shift==0 cancels an in-progress resize */ + rc = plpar_resize_hpt_prepare(0, 0); + if (rc != H_SUCCESS) + printk(KERN_WARNING + "lpar: Unexpected error %d cancelling timed out HPT resize\n", + rc); + return -ETIMEDOUT; + } + msleep(delay); + rc = plpar_resize_hpt_prepare(0, shift); + }; + + switch (rc) { + case H_SUCCESS: + /* Continue on */ + break; + + case H_PARAMETER: + return -EINVAL; + case H_RESOURCE: + return -EPERM; + default: + printk(KERN_WARNING + "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n", + rc); + return -EIO; + } + + t1 = ktime_get(); + + rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); + + t2 = ktime_get(); + + if (rc != 0) { + switch (state.commit_rc) { + case H_PTEG_FULL: + printk(KERN_WARNING + "lpar: Hash collision while resizing HPT\n"); + return -ENOSPC; + + default: + printk(KERN_WARNING + "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n", + state.commit_rc); + return -EIO; + }; + } + + printk(KERN_INFO + "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n", + shift, (long long) ktime_ms_delta(t1, t0), + (long long) ktime_ms_delta(t2, t1)); + + return 0; +} + + /* Actually only used for radix, so far */ + static int pseries_lpar_register_process_table(unsigned long base, + unsigned long page_size, unsigned long table_size) + { + long rc; + unsigned long flags = PROC_TABLE_NEW; + + if (radix_enabled()) + flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; + for (;;) { + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, + page_size, table_size); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc != H_SUCCESS) { + pr_err("Failed to register process table (rc=%ld)\n", rc); + BUG(); + } + return rc; + } + void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; @@@ -728,9 -643,14 +751,15 @@@ mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; } + void radix_init_pseries(void) + { + pr_info("Using radix MMU under hypervisor\n"); + register_process_table = pseries_lpar_register_process_table; + } + #ifdef CONFIG_PPC_SMLPAR #define CMO_FREE_HINT_DEFAULT 1 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;