module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
- "Number of times we overflow ARS results before abort");
-
static bool disable_vendor_specific;
module_param(disable_vendor_specific, bool, S_IRUGO);
MODULE_PARM_DESC(disable_vendor_specific,
mutex_lock(&acpi_desc->init_mutex);
rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
- work_busy(&acpi_desc->work)
+ work_busy(&acpi_desc->dwork.work)
&& !acpi_desc->cancel ? "+\n" : "\n");
mutex_unlock(&acpi_desc->init_mutex);
}
memset(&ars_start, 0, sizeof(ars_start));
ars_start.address = spa->address;
ars_start.length = spa->length;
- ars_start.flags = acpi_desc->ars_start_flags;
+ if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+ ars_start.flags = ND_ARS_RETURN_PREV_DATA;
if (nfit_spa_type(spa) == NFIT_SPA_PM)
ars_start.type = ND_ARS_PERSISTENT;
else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
return cmd_rc;
}
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
+{
+ struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ struct nd_region *nd_region = nfit_spa->nd_region;
+ struct device *dev;
+
+ if ((ars_status->address >= spa->address && ars_status->address
+ < spa->address + spa->length)
+ || (ars_status->address < spa->address)) {
+ /*
+ * Assume that if a scrub starts at an offset from the
+ * start of nfit_spa that we are in the continuation
+ * case.
+ *
+ * Otherwise, if the scrub covers the spa range, mark
+ * any pending request complete.
+ */
+ if (ars_status->address + ars_status->length
+ >= spa->address + spa->length)
+ /* complete */;
+ else
+ return;
+ } else
+ return;
+
+ if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+ return;
+
+ if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+ return;
+
+ if (nd_region) {
+ dev = nd_region_dev(nd_region);
+ nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+ } else
+ dev = acpi_desc->dev;
+
+ dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+ test_bit(ARS_SHORT, &nfit_spa->ars_state)
+ ? "short" : "long");
+ clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+ set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
{
struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
return -ENOMEM;
rc = ars_get_status(acpi_desc);
+
if (rc < 0 && rc != -ENOSPC)
return rc;
return 0;
}
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
- struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+ int *query_rc)
{
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- unsigned int overflow_retry = scrub_overflow_abort;
- u64 init_ars_start = 0, init_ars_len = 0;
- struct device *dev = acpi_desc->dev;
- unsigned int tmo = scrub_timeout;
- int rc;
-
- if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region)
- return;
-
- rc = ars_start(acpi_desc, nfit_spa);
- /*
- * If we timed out the initial scan we'll still be busy here,
- * and will wait another timeout before giving up permanently.
- */
- if (rc < 0 && rc != -EBUSY)
- return;
+ int rc = *query_rc;
- do {
- u64 ars_start, ars_len;
-
- if (acpi_desc->cancel)
- break;
- rc = acpi_nfit_query_poison(acpi_desc);
- if (rc == -ENOTTY)
- break;
- if (rc == -EBUSY && !tmo) {
- dev_warn(dev, "range %d ars timeout, aborting\n",
- spa->range_index);
- break;
- }
+ set_bit(ARS_REQ, &nfit_spa->ars_state);
+ set_bit(ARS_SHORT, &nfit_spa->ars_state);
+ switch (rc) {
+ case 0:
+ case -EAGAIN:
+ rc = ars_start(acpi_desc, nfit_spa);
if (rc == -EBUSY) {
- /*
- * Note, entries may be appended to the list
- * while the lock is dropped, but the workqueue
- * being active prevents entries being deleted /
- * freed.
- */
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- mutex_lock(&acpi_desc->init_mutex);
- continue;
- }
-
- /* we got some results, but there are more pending... */
- if (rc == -ENOSPC && overflow_retry--) {
- if (!init_ars_len) {
- init_ars_len = acpi_desc->ars_status->length;
- init_ars_start = acpi_desc->ars_status->address;
- }
- rc = ars_continue(acpi_desc);
- }
-
- if (rc < 0) {
- dev_warn(dev, "range %d ars continuation failed\n",
- spa->range_index);
+ *query_rc = rc;
break;
- }
-
- if (init_ars_len) {
- ars_start = init_ars_start;
- ars_len = init_ars_len;
+ } else if (rc == 0) {
+ rc = acpi_nfit_query_poison(acpi_desc);
} else {
- ars_start = acpi_desc->ars_status->address;
- ars_len = acpi_desc->ars_status->length;
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
+ break;
}
- dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
- spa->range_index, ars_start, ars_len);
- /* notify the region about new poison entries */
- nvdimm_region_notify(nfit_spa->nd_region,
- NVDIMM_REVALIDATE_POISON);
+ if (rc == -EAGAIN)
+ clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+ else if (rc == 0)
+ ars_complete(acpi_desc, nfit_spa);
+ break;
+ case -EBUSY:
+ case -ENOSPC:
break;
- } while (1);
+ default:
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
+ break;
+ }
+
+ if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+ set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+ return acpi_nfit_register_region(acpi_desc, nfit_spa);
}
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
{
- struct device *dev;
- u64 init_scrub_length = 0;
struct nfit_spa *nfit_spa;
- u64 init_scrub_address = 0;
- bool init_ars_done = false;
- struct acpi_nfit_desc *acpi_desc;
- unsigned int tmo = scrub_timeout;
- unsigned int overflow_retry = scrub_overflow_abort;
-
- acpi_desc = container_of(work, typeof(*acpi_desc), work);
- dev = acpi_desc->dev;
-
- /*
- * We scrub in 2 phases. The first phase waits for any platform
- * firmware initiated scrubs to complete and then we go search for the
- * affected spa regions to mark them scanned. In the second phase we
- * initiate a directed scrub for every range that was not scrubbed in
- * phase 1. If we're called for a 'rescan', we harmlessly pass through
- * the first phase, but really only care about running phase 2, where
- * regions can be notified of new poison.
- */
- /* process platform firmware initiated scrubs */
- retry:
- mutex_lock(&acpi_desc->init_mutex);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- struct nd_cmd_ars_status *ars_status;
- struct acpi_nfit_system_address *spa;
- u64 ars_start, ars_len;
- int rc;
-
- if (acpi_desc->cancel)
- break;
-
- if (nfit_spa->nd_region)
- continue;
-
- if (init_ars_done) {
- /*
- * No need to re-query, we're now just
- * reconciling all the ranges covered by the
- * initial scrub
- */
- rc = 0;
- } else
- rc = acpi_nfit_query_poison(acpi_desc);
-
- if (rc == -ENOTTY) {
- /* no ars capability, just register spa and move on */
- acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
continue;
- }
-
- if (rc == -EBUSY && !tmo) {
- /* fallthrough to directed scrub in phase 2 */
- dev_warn(dev, "timeout awaiting ars results, continuing...\n");
- break;
- } else if (rc == -EBUSY) {
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- goto retry;
- }
+ ars_complete(acpi_desc, nfit_spa);
+ }
+}
- /* we got some results, but there are more pending... */
- if (rc == -ENOSPC && overflow_retry--) {
- ars_status = acpi_desc->ars_status;
- /*
- * Record the original scrub range, so that we
- * can recall all the ranges impacted by the
- * initial scrub.
- */
- if (!init_scrub_length) {
- init_scrub_length = ars_status->length;
- init_scrub_address = ars_status->address;
- }
- rc = ars_continue(acpi_desc);
- if (rc == 0) {
- mutex_unlock(&acpi_desc->init_mutex);
- goto retry;
- }
- }
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+ int query_rc)
+{
+ unsigned int tmo = acpi_desc->scrub_tmo;
+ struct device *dev = acpi_desc->dev;
+ struct nfit_spa *nfit_spa;
- if (rc < 0) {
- /*
- * Initial scrub failed, we'll give it one more
- * try below...
- */
- break;
- }
+ if (acpi_desc->cancel)
+ return 0;
- /* We got some final results, record completed ranges */
- ars_status = acpi_desc->ars_status;
- if (init_scrub_length) {
- ars_start = init_scrub_address;
- ars_len = ars_start + init_scrub_length;
- } else {
- ars_start = ars_status->address;
- ars_len = ars_status->length;
- }
- spa = nfit_spa->spa;
+ if (query_rc == -EBUSY) {
+ dev_dbg(dev, "ARS: ARS busy\n");
+ return min(30U * 60U, tmo * 2);
+ }
+ if (query_rc == -ENOSPC) {
+ dev_dbg(dev, "ARS: ARS continue\n");
+ ars_continue(acpi_desc);
+ return 1;
+ }
+ if (query_rc && query_rc != -EAGAIN) {
+ unsigned long long addr, end;
- if (!init_ars_done) {
- init_ars_done = true;
- dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
- ars_start, ars_len);
- }
- if (ars_start <= spa->address && ars_start + ars_len
- >= spa->address + spa->length)
- acpi_nfit_register_region(acpi_desc, nfit_spa);
+ addr = acpi_desc->ars_status->address;
+ end = addr + acpi_desc->ars_status->length;
+ dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+ query_rc);
}
- /*
- * For all the ranges not covered by an initial scrub we still
- * want to see if there are errors, but it's ok to discover them
- * asynchronously.
- */
+ ars_complete_all(acpi_desc);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- /*
- * Flag all the ranges that still need scrubbing, but
- * register them now to make data available.
- */
- if (!nfit_spa->nd_region) {
- set_bit(ARS_REQ, &nfit_spa->ars_state);
- acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+ continue;
+ if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+ int rc = ars_start(acpi_desc, nfit_spa);
+
+ clear_bit(ARS_DONE, &nfit_spa->ars_state);
+ dev = nd_region_dev(nfit_spa->nd_region);
+ dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+ nfit_spa->spa->range_index, rc);
+ if (rc == 0 || rc == -EBUSY)
+ return 1;
+ dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+ nfit_spa->spa->range_index, rc);
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
}
}
- acpi_desc->init_complete = 1;
+ return 0;
+}
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
- acpi_nfit_async_scrub(acpi_desc, nfit_spa);
- acpi_desc->scrub_count++;
- acpi_desc->ars_start_flags = 0;
- if (acpi_desc->scrub_count_state)
- sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+ struct acpi_nfit_desc *acpi_desc;
+ unsigned int tmo;
+ int query_rc;
+
+ acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+ mutex_lock(&acpi_desc->init_mutex);
+ query_rc = acpi_nfit_query_poison(acpi_desc);
+ tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+ if (tmo) {
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+ acpi_desc->scrub_tmo = tmo;
+ } else {
+ acpi_desc->scrub_count++;
+ if (acpi_desc->scrub_count_state)
+ sysfs_notify_dirent(acpi_desc->scrub_count_state);
+ }
+ memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
mutex_unlock(&acpi_desc->init_mutex);
}
nfit_spa->max_ars = ars_cap.max_ars_out;
nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+ clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+ set_bit(ARS_REQ, &nfit_spa->ars_state);
}
-
static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
{
struct nfit_spa *nfit_spa;
+ int rc, query_rc;
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- int rc, type = nfit_spa_type(nfit_spa->spa);
-
- /* PMEM and VMEM will be registered by the ARS workqueue */
- if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) {
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
+ switch (nfit_spa_type(nfit_spa->spa)) {
+ case NFIT_SPA_VOLATILE:
+ case NFIT_SPA_PM:
acpi_nfit_init_ars(acpi_desc, nfit_spa);
- continue;
+ break;
}
- /* BLK apertures belong to BLK region registration below */
- if (type == NFIT_SPA_BDW)
- continue;
- /* BLK regions don't need to wait for ARS results */
- rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
- if (rc)
- return rc;
}
- acpi_desc->ars_start_flags = 0;
- if (!acpi_desc->cancel)
- queue_work(nfit_wq, &acpi_desc->work);
+ /*
+ * Reap any results that might be pending before starting new
+ * short requests.
+ */
+ query_rc = acpi_nfit_query_poison(acpi_desc);
+ if (query_rc == 0)
+ ars_complete_all(acpi_desc);
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ switch (nfit_spa_type(nfit_spa->spa)) {
+ case NFIT_SPA_VOLATILE:
+ case NFIT_SPA_PM:
+ /* register regions and kick off initial ARS run */
+ rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+ if (rc)
+ return rc;
+ break;
+ case NFIT_SPA_BDW:
+ /* nothing to register */
+ break;
+ case NFIT_SPA_DCR:
+ case NFIT_SPA_VDISK:
+ case NFIT_SPA_VCD:
+ case NFIT_SPA_PDISK:
+ case NFIT_SPA_PCD:
+ /* register known regions that don't support ARS */
+ rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (rc)
+ return rc;
+ break;
+ default:
+ /* don't register unknown regions */
+ break;
+ }
+
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
return 0;
}
}
EXPORT_SYMBOL_GPL(acpi_nfit_init);
-struct acpi_nfit_flush_work {
- struct work_struct work;
- struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
- struct acpi_nfit_flush_work *flush;
-
- flush = container_of(work, typeof(*flush), work);
- complete(&flush->cmp);
-}
-
static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
{
struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
struct device *dev = acpi_desc->dev;
- struct acpi_nfit_flush_work flush;
- int rc;
- /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+ /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
device_lock(dev);
device_unlock(dev);
- /* bounce the init_mutex to make init_complete valid */
+ /* Bounce the init_mutex to complete initial registration */
mutex_lock(&acpi_desc->init_mutex);
- if (acpi_desc->cancel || acpi_desc->init_complete) {
- mutex_unlock(&acpi_desc->init_mutex);
- return 0;
- }
-
- /*
- * Scrub work could take 10s of seconds, userspace may give up so we
- * need to be interruptible while waiting.
- */
- INIT_WORK_ONSTACK(&flush.work, flush_probe);
- init_completion(&flush.cmp);
- queue_work(nfit_wq, &flush.work);
mutex_unlock(&acpi_desc->init_mutex);
- rc = wait_for_completion_interruptible(&flush.cmp);
- cancel_work_sync(&flush.work);
- return rc;
+ return 0;
}
static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
* just needs guarantees that any ars it initiates are not
* interrupted by any intervening start reqeusts from userspace.
*/
- if (work_busy(&acpi_desc->work))
+ if (work_busy(&acpi_desc->dwork.work))
return -EBUSY;
return 0;
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
{
struct device *dev = acpi_desc->dev;
+ int scheduled = 0, busy = 0;
struct nfit_spa *nfit_spa;
- if (work_busy(&acpi_desc->work))
- return -EBUSY;
-
mutex_lock(&acpi_desc->init_mutex);
if (acpi_desc->cancel) {
mutex_unlock(&acpi_desc->init_mutex);
}
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ int type = nfit_spa_type(nfit_spa->spa);
- if (nfit_spa_type(spa) != NFIT_SPA_PM)
+ if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+ continue;
+ if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
continue;
- set_bit(ARS_REQ, &nfit_spa->ars_state);
+ if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+ busy++;
+ else {
+ if (test_bit(ARS_SHORT, &flags))
+ set_bit(ARS_SHORT, &nfit_spa->ars_state);
+ scheduled++;
+ }
+ }
+ if (scheduled) {
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+ dev_dbg(dev, "ars_scan triggered\n");
}
- acpi_desc->ars_start_flags = 0;
- if (test_bit(ARS_SHORT, &flags))
- acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA;
- queue_work(nfit_wq, &acpi_desc->work);
- dev_dbg(dev, "ars_scan triggered\n");
mutex_unlock(&acpi_desc->init_mutex);
- return 0;
+ if (scheduled)
+ return 0;
+ if (busy)
+ return -EBUSY;
+ return -ENOTTY;
}
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
INIT_LIST_HEAD(&acpi_desc->dimms);
INIT_LIST_HEAD(&acpi_desc->list);
mutex_init(&acpi_desc->init_mutex);
- INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+ acpi_desc->scrub_tmo = 1;
+ INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
}
EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
mutex_lock(&acpi_desc->init_mutex);
acpi_desc->cancel = 1;
+ cancel_delayed_work_sync(&acpi_desc->dwork);
mutex_unlock(&acpi_desc->init_mutex);
/*