x86: sched: Provide arch implementations using aperf/mperf
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 2 Sep 2009 11:49:18 +0000 (13:49 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 15 Sep 2009 14:51:27 +0000 (16:51 +0200)
APERF/MPERF support for cpu_power.

APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.

APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/sched.c [new file with mode: 0644]
include/linux/sched.h

index c1f253dac1552f63c0795a31ce5977b681b98592..8dd30638fe4457ea6810bcbfd5476462b29890f9 100644 (file)
@@ -13,7 +13,7 @@ CFLAGS_common.o               := $(nostackp)
 
 obj-y                  := intel_cacheinfo.o addon_cpuid_features.o
 obj-y                  += proc.o capflags.o powerflags.o common.o
-obj-y                  += vmware.o hypervisor.o
+obj-y                  += vmware.o hypervisor.o sched.o
 
 obj-$(CONFIG_X86_32)   += bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)   += bugs_64.o
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644 (file)
index 0000000..6c00a8f
--- /dev/null
@@ -0,0 +1,55 @@
+#include <linux/sched.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+
+static unsigned long scale_aperfmperf(void)
+{
+       struct aperfmperf val, *old = &__get_cpu_var(old_perf);
+       unsigned long ratio, flags;
+
+       local_irq_save(flags);
+       get_aperfmperf(&val);
+       local_irq_restore(flags);
+
+       ratio = calc_aperfmperf_ratio(old, &val);
+       *old = val;
+
+       return ratio;
+}
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+       /*
+        * do aperf/mperf on the cpu level because it includes things
+        * like turbo mode, which are relevant to full cores.
+        */
+       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+               return scale_aperfmperf();
+
+       /*
+        * maybe have something cpufreq here
+        */
+
+       return default_scale_freq_power(sd, cpu);
+}
+
+unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+       /*
+        * aperf/mperf already includes the smt gain
+        */
+       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+               return SCHED_LOAD_SCALE;
+
+       return default_scale_smt_power(sd, cpu);
+}
+
+#endif
index c30bf3d516d11fa180579215402b392331e3fbf0..fc4c0f9393d2c0ff488ca4925eea6e81648dea41 100644 (file)
@@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
        return 0;
 }
 
+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 }
 #endif /* !CONFIG_SMP */
 
+
 struct io_context;                     /* See blkdev.h */