s390/dasd: fix unusable device after safe offline processing
authorStefan Haberland <sth@linux.vnet.ibm.com>
Tue, 16 May 2017 08:30:13 +0000 (10:30 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 12 Jun 2017 14:26:01 +0000 (16:26 +0200)
The safe offline processing needs, as well as the normal offline
processing, to be locked against multiple parallel executions. But it
should be able to be overtaken by a normal offline processing to make sure
that the device does not wait forever for outstanding I/O if the user
wants to.

Unfortunately the parallel processing of safe offline and normal offline
might lead to a race situation where both threads report successful
execution to the CIO layer which in turn tries to deregister the kobject
of the device twice. This leads to a

refcount_t: underflow; use-after-free.

error and the device is not able to be set online again afterwards without
a reboot.

Correct the locking of the safe offline processing by doing the following:
- Use the cdev lock to secure all set and test operations to the
  device flags.
- Two safe offline processes are locked against each other using
  the DASD_FLAG_SAFE_OFFLINE and DASD_FLAG_SAFE_OFFLINE_RUNNING
  device flags.
  The differentiation between offline triggered and offline running
  is needed since the normal offline attribute is owned by CIO and
  we have to pass over control in between.
- The dasd_generic_set_offline process handles the offline
  processing. It is locked against parallel execution using the
  DASD_FLAG_OFFLINE.
- Only a running safe offline should be able to be overtaken by a
  single normal offline. This is ensured by clearing the
  DASD_FLAG_SAFE_OFFLINE_RUNNING flag when a normal offline
  overtakes. So this can only happen ones.
- The safe offline just aborts in this case doing nothing and
  the normal offline processing finishes as usual.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
drivers/s390/block/dasd.c
drivers/s390/block/dasd_devmap.c

index 6fb3fd5efc11a2f777245255820021b269481cc3..b0c65dcb6865777dfcbae07c76e5be8ee8c14e98 100644 (file)
@@ -3562,57 +3562,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
                        else
                                pr_warn("%s: The DASD cannot be set offline while it is in use\n",
                                        dev_name(&cdev->dev));
-                       clear_bit(DASD_FLAG_OFFLINE, &device->flags);
-                       goto out_busy;
+                       rc = -EBUSY;
+                       goto out_err;
                }
        }
 
-       if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
-               /*
-                * safe offline already running
-                * could only be called by normal offline so safe_offline flag
-                * needs to be removed to run normal offline and kill all I/O
-                */
-               if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags))
-                       /* Already doing normal offline processing */
-                       goto out_busy;
-               else
-                       clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
-       } else {
-               if (test_bit(DASD_FLAG_OFFLINE, &device->flags))
-                       /* Already doing offline processing */
-                       goto out_busy;
+       /*
+        * Test if the offline processing is already running and exit if so.
+        * If a safe offline is being processed this could only be a normal
+        * offline that should be able to overtake the safe offline and
+        * cancel any I/O we do not want to wait for any longer
+        */
+       if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+               if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+                       clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING,
+                                 &device->flags);
+               } else {
+                       rc = -EBUSY;
+                       goto out_err;
+               }
        }
-
        set_bit(DASD_FLAG_OFFLINE, &device->flags);
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 
        /*
-        * if safe_offline called set safe_offline_running flag and
+        * if safe_offline is called set safe_offline_running flag and
         * clear safe_offline so that a call to normal offline
         * can overrun safe_offline processing
         */
        if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) &&
            !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+               /* need to unlock here to wait for outstanding I/O */
+               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
                /*
                 * If we want to set the device safe offline all IO operations
                 * should be finished before continuing the offline process
                 * so sync bdev first and then wait for our queues to become
                 * empty
                 */
-               /* sync blockdev and partitions */
                if (device->block) {
                        rc = fsync_bdev(device->block->bdev);
                        if (rc != 0)
                                goto interrupted;
                }
-               /* schedule device tasklet and wait for completion */
                dasd_schedule_device_bh(device);
                rc = wait_event_interruptible(shutdown_waitq,
                                              _wait_for_empty_queues(device));
                if (rc != 0)
                        goto interrupted;
+
+               /*
+                * check if a normal offline process overtook the offline
+                * processing in this case simply do nothing beside returning
+                * that we got interrupted
+                * otherwise mark safe offline as not running any longer and
+                * continue with normal offline
+                */
+               spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+               if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+                       rc = -ERESTARTSYS;
+                       goto out_err;
+               }
+               clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
        }
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 
        dasd_set_target_state(device, DASD_STATE_NEW);
        /* dasd_delete_device destroys the device reference. */
@@ -3624,22 +3636,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
         */
        if (block)
                dasd_free_block(block);
+
        return 0;
 
 interrupted:
        /* interrupted by signal */
-       clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
+       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
        clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
        clear_bit(DASD_FLAG_OFFLINE, &device->flags);
-       dasd_put_device(device);
-
-       return rc;
-
-out_busy:
+out_err:
        dasd_put_device(device);
        spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
-
-       return -EBUSY;
+       return rc;
 }
 EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
 
index 0ce84f0a4d7f33aeb5fce16bc40254d7f606c523..e943d9c489262924e8f3b1d9efcadc50814d4f5f 100644 (file)
@@ -950,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
 {
        struct ccw_device *cdev = to_ccwdev(dev);
        struct dasd_device *device;
+       unsigned long flags;
        int rc;
 
-       device = dasd_device_from_cdev(cdev);
+       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+       device = dasd_device_from_cdev_locked(cdev);
        if (IS_ERR(device)) {
                rc = PTR_ERR(device);
+               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
                goto out;
        }
 
@@ -962,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
            test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
                /* Already doing offline processing */
                dasd_put_device(device);
+               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
                rc = -EBUSY;
                goto out;
        }
 
        set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
        dasd_put_device(device);
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 
        rc = ccw_device_set_offline(cdev);