application behavior. There is no guarantee that data is placed in
cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
“locked” data from cache. Power management C-states may shrink or
-power off cache. It is thus recommended to limit the processor maximum
-C-state, for example, by setting the processor.max_cstate kernel parameter.
+power off cache. Deeper C-states will automatically be restricted on
+pseudo-locked region creation.
It is required that an application using a pseudo-locked region runs
with affinity to the cores (or a subset of the cores) associated
#include <linux/debugfs.h>
#include <linux/kthread.h>
#include <linux/mman.h>
+#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
return rdtgrp_match;
}
+/**
+ * pseudo_lock_pm_req - A power management QoS request list entry
+ * @list: Entry within the @pm_reqs list for a pseudo-locked region
+ * @req: PM QoS request
+ */
+struct pseudo_lock_pm_req {
+ struct list_head list;
+ struct dev_pm_qos_request req;
+};
+
+static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
+{
+ struct pseudo_lock_pm_req *pm_req, *next;
+
+ list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
+ dev_pm_qos_remove_request(&pm_req->req);
+ list_del(&pm_req->list);
+ kfree(pm_req);
+ }
+}
+
+/**
+ * pseudo_lock_cstates_constrain - Restrict cores from entering C6
+ *
+ * To prevent the cache from being affected by power management entering
+ * C6 has to be avoided. This is accomplished by requesting a latency
+ * requirement lower than lowest C6 exit latency of all supported
+ * platforms as found in the cpuidle state tables in the intel_idle driver.
+ * At this time it is possible to do so with a single latency requirement
+ * for all supported platforms.
+ *
+ * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
+ * the ACPI latencies need to be considered while keeping in mind that C2
+ * may be set to map to deeper sleep states. In this case the latency
+ * requirement needs to prevent entering C2 also.
+ */
+static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
+{
+ struct pseudo_lock_pm_req *pm_req;
+ int cpu;
+ int ret;
+
+ for_each_cpu(cpu, &plr->d->cpu_mask) {
+ pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
+ if (!pm_req) {
+ rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+ &pm_req->req,
+ DEV_PM_QOS_RESUME_LATENCY,
+ 30);
+ if (ret < 0) {
+ rdt_last_cmd_printf("fail to add latency req cpu%d\n",
+ cpu);
+ kfree(pm_req);
+ ret = -1;
+ goto out_err;
+ }
+ list_add(&pm_req->list, &plr->pm_reqs);
+ }
+
+ return 0;
+
+out_err:
+ pseudo_lock_cstates_relax(plr);
+ return ret;
+}
+
/**
* pseudo_lock_region_init - Initialize pseudo-lock region information
* @plr: pseudo-lock region
return -ENOMEM;
init_waitqueue_head(&plr->lock_thread_wq);
+ INIT_LIST_HEAD(&plr->pm_reqs);
rdtgrp->plr = plr;
return 0;
}
if (ret < 0)
return ret;
+ ret = pseudo_lock_cstates_constrain(plr);
+ if (ret < 0) {
+ ret = -EINVAL;
+ goto out_region;
+ }
+
plr->thread_done = 0;
thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
if (IS_ERR(thread)) {
ret = PTR_ERR(thread);
rdt_last_cmd_printf("locking thread returned error %d\n", ret);
- goto out_region;
+ goto out_cstates;
}
kthread_bind(thread, plr->cpu);
* empty pseudo-locking loop.
*/
rdt_last_cmd_puts("locking thread interrupted\n");
- goto out_region;
+ goto out_cstates;
}
if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
pseudo_lock_minor_release(new_minor);
out_debugfs:
debugfs_remove_recursive(plr->debugfs_dir);
+out_cstates:
+ pseudo_lock_cstates_relax(plr);
out_region:
pseudo_lock_region_clear(plr);
out:
goto free;
}
+ pseudo_lock_cstates_relax(plr);
debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
pseudo_lock_minor_release(plr->minor);