target: Fix ALUA transition state race between multiple initiators
authorMike Christie <mchristi@redhat.com>
Wed, 29 Mar 2017 05:19:24 +0000 (00:19 -0500)
committerNicholas Bellinger <nab@linux-iscsi.org>
Fri, 31 Mar 2017 06:12:40 +0000 (23:12 -0700)
Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
drivers/target/target_core_alua.c
drivers/target/target_core_configfs.c
include/target/target_core_base.h

index fd7c16a7ca6e06ad53e6d6df54ab739550ae4a4a..fc4a9c303d559f95b1216857efd8ae6ce77b96ca 100644 (file)
@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
                /*
                 * Set the ASYMMETRIC ACCESS State
                 */
-               buf[off++] |= (atomic_read(
-                       &tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
+               buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
                /*
                 * Set supported ASYMMETRIC ACCESS State bits
                 */
@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
        spin_lock(&lun->lun_tg_pt_gp_lock);
        tg_pt_gp = lun->lun_tg_pt_gp;
-       out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+       out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
        nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
 
        // XXX: keeps using tg_pt_gp witout reference after unlock
@@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata(
 }
 
 /*
- * Called with tg_pt_gp->tg_pt_gp_md_mutex held
+ * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
  */
 static int core_alua_update_tpg_primary_metadata(
        struct t10_alua_tg_pt_gp *tg_pt_gp)
@@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
                        "alua_access_state=0x%02x\n"
                        "alua_access_status=0x%02x\n",
                        tg_pt_gp->tg_pt_gp_id,
-                       tg_pt_gp->tg_pt_gp_alua_pending_state,
+                       tg_pt_gp->tg_pt_gp_alua_access_state,
                        tg_pt_gp->tg_pt_gp_alua_access_status);
 
        snprintf(path, ALUA_METADATA_PATH_LEN,
@@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
        spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 }
 
-static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
-{
-       struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-               struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
-       struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-       bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
-                        ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
-
-       /*
-        * Update the ALUA metadata buf that has been allocated in
-        * core_alua_do_port_transition(), this metadata will be written
-        * to struct file.
-        *
-        * Note that there is the case where we do not want to update the
-        * metadata when the saved metadata is being parsed in userspace
-        * when setting the existing port access state and access status.
-        *
-        * Also note that the failure to write out the ALUA metadata to
-        * struct file does NOT affect the actual ALUA transition.
-        */
-       if (tg_pt_gp->tg_pt_gp_write_metadata) {
-               mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
-               core_alua_update_tpg_primary_metadata(tg_pt_gp);
-               mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
-       }
-       /*
-        * Set the current primary ALUA access state to the requested new state
-        */
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-                  tg_pt_gp->tg_pt_gp_alua_pending_state);
-
-       pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
-               " from primary access state %s to %s\n", (explicit) ? "explicit" :
-               "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
-               tg_pt_gp->tg_pt_gp_id,
-               core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
-               core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
-
-       core_alua_queue_state_change_ua(tg_pt_gp);
-
-       spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-       atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-       spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
-
-       if (tg_pt_gp->tg_pt_gp_transition_complete)
-               complete(tg_pt_gp->tg_pt_gp_transition_complete);
-}
-
 static int core_alua_do_transition_tg_pt(
        struct t10_alua_tg_pt_gp *tg_pt_gp,
        int new_state,
        int explicit)
 {
-       struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-       DECLARE_COMPLETION_ONSTACK(wait);
+       int prev_state;
 
+       mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
        /* Nothing to be done here */
-       if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
+       if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
                return 0;
+       }
 
-       if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
+       if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
                return -EAGAIN;
-
-       /*
-        * Flush any pending transitions
-        */
-       if (!explicit)
-               flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
+       }
 
        /*
         * Save the old primary ALUA access state, and set the current state
         * to ALUA_ACCESS_STATE_TRANSITION.
         */
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-                       ALUA_ACCESS_STATE_TRANSITION);
+       prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
+       tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
        tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
                                ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
                                ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
 
        core_alua_queue_state_change_ua(tg_pt_gp);
 
-       if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+       if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
                return 0;
-
-       tg_pt_gp->tg_pt_gp_alua_previous_state =
-               atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-       tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+       }
 
        /*
         * Check for the optional ALUA primary state transition delay
@@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt(
                msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
 
        /*
-        * Take a reference for workqueue item
+        * Set the current primary ALUA access state to the requested new state
         */
-       spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-       atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-       spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
+       tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
 
-       schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
-       if (explicit) {
-               tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-               wait_for_completion(&wait);
-               tg_pt_gp->tg_pt_gp_transition_complete = NULL;
+       /*
+        * Update the ALUA metadata buf that has been allocated in
+        * core_alua_do_port_transition(), this metadata will be written
+        * to struct file.
+        *
+        * Note that there is the case where we do not want to update the
+        * metadata when the saved metadata is being parsed in userspace
+        * when setting the existing port access state and access status.
+        *
+        * Also note that the failure to write out the ALUA metadata to
+        * struct file does NOT affect the actual ALUA transition.
+        */
+       if (tg_pt_gp->tg_pt_gp_write_metadata) {
+               core_alua_update_tpg_primary_metadata(tg_pt_gp);
        }
 
+       pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
+               " from primary access state %s to %s\n", (explicit) ? "explicit" :
+               "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
+               tg_pt_gp->tg_pt_gp_id,
+               core_alua_dump_state(prev_state),
+               core_alua_dump_state(new_state));
+
+       core_alua_queue_state_change_ua(tg_pt_gp);
+
+       mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
        return 0;
 }
 
@@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
        }
        INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
        INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
-       mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
+       mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
        spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
        atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-       INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-                 core_alua_do_transition_tg_pt_work);
        tg_pt_gp->tg_pt_gp_dev = dev;
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-               ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
+       tg_pt_gp->tg_pt_gp_alua_access_state =
+                       ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
        /*
         * Enable both explicit and implicit ALUA support by default
         */
@@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
        dev->t10_alua.alua_tg_pt_gps_counter--;
        spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-       flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-
        /*
         * Allow a struct t10_alua_tg_pt_gp_member * referenced by
         * core_alua_get_tg_pt_gp_by_name() in
@@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
                        "Primary Access Status: %s\nTG Port Secondary Access"
                        " State: %s\nTG Port Secondary Access Status: %s\n",
                        config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
-                       core_alua_dump_state(atomic_read(
-                                       &tg_pt_gp->tg_pt_gp_alua_access_state)),
+                       core_alua_dump_state(
+                               tg_pt_gp->tg_pt_gp_alua_access_state),
                        core_alua_dump_status(
                                tg_pt_gp->tg_pt_gp_alua_access_status),
                        atomic_read(&lun->lun_tg_pt_secondary_offline) ?
index 38b5025e4c7a877f9e5c0bcfa6995262b6330e32..70657fd564406b3c137b02c3f61824f387f554aa 100644 (file)
@@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
                char *page)
 {
        return sprintf(page, "%d\n",
-               atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
+                      to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
 }
 
 static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
index 730ed3055336a01915ba41aea7ba21b27218c745..ccfad0e9c2cdbd68f13c809c7ed6414b2c0c97c1 100644 (file)
@@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp {
        u16     tg_pt_gp_id;
        int     tg_pt_gp_valid_id;
        int     tg_pt_gp_alua_supported_states;
-       int     tg_pt_gp_alua_pending_state;
-       int     tg_pt_gp_alua_previous_state;
        int     tg_pt_gp_alua_access_status;
        int     tg_pt_gp_alua_access_type;
        int     tg_pt_gp_nonop_delay_msecs;
@@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp {
        int     tg_pt_gp_pref;
        int     tg_pt_gp_write_metadata;
        u32     tg_pt_gp_members;
-       atomic_t tg_pt_gp_alua_access_state;
+       int     tg_pt_gp_alua_access_state;
        atomic_t tg_pt_gp_ref_cnt;
        spinlock_t tg_pt_gp_lock;
-       struct mutex tg_pt_gp_md_mutex;
+       struct mutex tg_pt_gp_transition_mutex;
        struct se_device *tg_pt_gp_dev;
        struct config_group tg_pt_gp_group;
        struct list_head tg_pt_gp_list;
        struct list_head tg_pt_gp_lun_list;
        struct se_lun *tg_pt_gp_alua_lun;
        struct se_node_acl *tg_pt_gp_alua_nacl;
-       struct work_struct tg_pt_gp_transition_work;
-       struct completion *tg_pt_gp_transition_complete;
 };
 
 struct t10_vpd {