u64 reg_id_aa64mmfr2;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64zfr0;
u32 reg_id_dfr0;
u32 reg_id_isar0;
u32 reg_mvfr0;
u32 reg_mvfr1;
u32 reg_mvfr2;
+
+ /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
+ u64 reg_zcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
#define __ASM_CPUFEATURE_H
#include <asm/cpucaps.h>
+#include <asm/fpsimd.h>
#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
#include <asm/sysreg.h>
/*
return val == ID_AA64PFR0_EL0_32BIT_64BIT;
}
+static inline bool id_aa64pfr0_sve(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+
+ return val > 0;
+}
+
void __init setup_cpu_features(void);
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
return false;
}
+/*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+static inline u64 read_zcr_features(void)
+{
+ u64 zcr;
+ unsigned int vq_max;
+
+ /*
+ * Set the maximum possible VL, and write zeroes to all other
+ * bits to see if they stick.
+ */
+ sve_kernel_enable(NULL);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+ zcr = read_sysreg_s(SYS_ZCR_EL1);
+ zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+ vq_max = sve_vq_from_vl(sve_get_vl());
+ zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+ return zcr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
+extern int sve_kernel_enable(void *);
extern int __ro_after_init sve_max_vl;
extern int sve_set_vector_length(struct task_struct *task,
unsigned long vl, unsigned long flags);
+/*
+ * Probing and setup functions.
+ * Calls to these functions must be serialised with one another.
+ */
+extern void __init sve_init_vq_map(void);
+extern void sve_update_vq_map(void);
+extern int sve_verify_vq_map(void);
+extern void __init sve_setup(void);
+
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
+static inline void sve_init_vq_map(void) { }
+static inline void sve_update_vq_map(void) { }
+static inline int sve_verify_vq_map(void) { return 0; }
+static inline void sve_setup(void) { }
#endif /* ! CONFIG_ARM64_SVE */
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
+#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_zcr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
+ ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
+ ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ /* Op1 = 0, CRn = 1, CRm = 2 */
+ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+
/* Op1 = 3, CRn = 0, CRm = 0 */
{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
}
+ if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
+ sve_init_vq_map();
+ }
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
+ info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
+
/*
* If we have AArch32, we care about 32-bit features for compat.
* If the system doesn't support AArch32, don't update them.
info->reg_mvfr2, boot->reg_mvfr2);
}
+ if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
+ info->reg_zcr, boot->reg_zcr);
+
+ /* Probe vector lengths, unless we already gave up on SVE */
+ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
+ !sys_caps_initialised)
+ sve_update_vq_map();
+ }
+
/*
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
}
}
+static void verify_sve_features(void)
+{
+ u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+ u64 zcr = read_zcr_features();
+
+ unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
+ unsigned int len = zcr & ZCR_ELx_LEN_MASK;
+
+ if (len < safe_len || sve_verify_vq_map()) {
+ pr_crit("CPU%d: SVE: required vector length(s) missing\n",
+ smp_processor_id());
+ cpu_die_early();
+ }
+
+ /* Add checks on other ZCR bits here if necessary */
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
verify_local_cpu_errata_workarounds();
verify_local_cpu_features(arm64_features);
verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
if (system_supports_32bit_el0())
verify_local_elf_hwcaps(compat_elf_hwcaps);
+
+ if (system_supports_sve())
+ verify_sve_features();
}
void check_local_cpu_capabilities(void)
if (system_supports_32bit_el0())
setup_elf_hwcaps(compat_elf_hwcaps);
+ sve_setup();
+
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
#include <linux/bitops.h>
#include <linux/bug.h>
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+ info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
/* Update the 32bit ID registers only if AArch32 is implemented */
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
}
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(info->reg_id_aa64pfr0))
+ info->reg_zcr = read_zcr_features();
+
cpuinfo_detect_icache_policy(info);
}
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
-static int sve_default_vl = SVE_VL_MIN;
+static int sve_default_vl = -1;
#ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = -1;
/* Set of available vector lengths, as vq_to_bit(vq): */
-static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
-extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
#endif /* ! CONFIG_ARM64_SVE */
return 0;
}
+/*
+ * Bitmap for temporary storage of the per-CPU set of supported vector lengths
+ * during secondary boot.
+ */
+static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
+
+static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
+{
+ unsigned int vq, vl;
+ unsigned long zcr;
+
+ bitmap_zero(map, SVE_VQ_MAX);
+
+ zcr = ZCR_ELx_LEN_MASK;
+ zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
+
+ for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
+ write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
+ vl = sve_get_vl();
+ vq = sve_vq_from_vl(vl); /* skip intervening lengths */
+ set_bit(vq_to_bit(vq), map);
+ }
+}
+
+void __init sve_init_vq_map(void)
+{
+ sve_probe_vqs(sve_vq_map);
+}
+
+/*
+ * If we haven't committed to the set of supported VQs yet, filter out
+ * those not supported by the current CPU.
+ */
+void sve_update_vq_map(void)
+{
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+}
+
+/* Check whether the current CPU supports all VQs in the committed set */
+int sve_verify_vq_map(void)
+{
+ int ret = 0;
+
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
+ SVE_VQ_MAX);
+ if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+ pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
+ smp_processor_id());
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * Enable SVE for EL1.
+ * Intended for use by the cpufeatures code during CPU boot.
+ */
+int sve_kernel_enable(void *__always_unused p)
+{
+ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
+ isb();
+
+ return 0;
+}
+
+void __init sve_setup(void)
+{
+ u64 zcr;
+
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The SVE architecture mandates support for 128-bit vectors,
+ * so sve_vq_map must have at least SVE_VQ_MIN set.
+ * If something went wrong, at least try to patch it up:
+ */
+ if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
+ set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
+
+ zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+ sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
+
+ /*
+ * Sanity-check that the max VL we determined through CPU features
+ * corresponds properly to sve_vq_map. If not, do our best:
+ */
+ if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
+ sve_max_vl = find_supported_vector_length(sve_max_vl);
+
+ /*
+ * For the default VL, pick the maximum supported value <= 64.
+ * VL == 64 is guaranteed not to grow the signal frame.
+ */
+ sve_default_vl = find_supported_vector_length(64);
+
+ pr_info("SVE: maximum available vector length %u bytes per vector\n",
+ sve_max_vl);
+ pr_info("SVE: default vector length %u bytes per vector\n",
+ sve_default_vl);
+}
+
/*
* Called from the put_task_struct() path, which cannot get here
* unless dead_task is really dead and not schedulable.
* This is where we ensure that all user tasks have a valid
* vector length configured: no kernel task can become a user
* task without an exec and hence a call to this function.
+ * By the time the first call to this function is made, all
+ * early hardware probing is complete, so sve_default_vl
+ * should be valid.
* If a bug causes this to go wrong, we make some noise and
* try to fudge thread.sve_vl to a safe value here.
*/