xhci: Fix race related to abort operation
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Tue, 3 Jan 2017 16:28:51 +0000 (18:28 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 3 Jan 2017 16:37:32 +0000 (17:37 +0100)
Current abort operation has race.

    xhci_handle_command_timeout()
      xhci_abort_cmd_ring()
        xhci_write_64(CMD_RING_ABORT)
        xhci_handshake(5s)
  do {
    check CMD_RING_RUNNING
            udelay(1)
 ...
 COMP_CMD_ABORT event
 COMP_CMD_STOP event
 xhci_handle_stopped_cmd_ring()
   restart cmd_ring
                                           CMD_RING_RUNNING become 1 again
  } while ()
          return -ETIMEDOUT
        xhci_write_64(CMD_RING_ABORT)
        /* can abort random command */

To do abort operation correctly, we have to wait both of COMP_CMD_STOP
event and negation of CMD_RING_RUNNING.

But like above, while timeout handler is waiting negation of
CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout
handler never be notice negation of CMD_RING_RUNNING, and retry of
CMD_RING_ABORT can abort random command (BTW, I guess retry of
CMD_RING_ABORT was workaround of this race).

To fix this race, this moves xhci_handle_stopped_cmd_ring() to
xhci_abort_cmd_ring().  And timeout handler waits COMP_CMD_STOP event.

At this point, timeout handler is owner of cmd_ring, and safely
restart cmd_ring by using xhci_handle_stopped_cmd_ring().

[FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE
operation]

[locks edited as patch is rebased on other locking fixes -Mathias]
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/usb/host/xhci-mem.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.h

index 57c038c5afa5b90c54f53b2cd147e8d5b5d24d5a..8414ed2a02de98b08ccadbe375d75b93da59cd35 100644 (file)
@@ -2378,6 +2378,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 
        /* init command timeout work */
        INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout);
+       init_completion(&xhci->cmd_ring_stop_completion);
 
        page_size = readl(&xhci->op_regs->page_size);
        xhci_dbg_trace(xhci, trace_xhci_dbg_init,
index fdff6a4cf2068c0254d810d927ab74632423929b..7a14e9ad664d8e806dc1035d6207eb11fb40e477 100644 (file)
@@ -284,23 +284,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
        return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
 }
 
-static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
+static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
+{
+       return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command,
+                                       cmd_list);
+}
+
+/*
+ * Turn all commands on command ring with status set to "aborted" to no-op trbs.
+ * If there are other commands waiting then restart the ring and kick the timer.
+ * This must be called with command ring stopped and xhci->lock held.
+ */
+static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
+                                        struct xhci_command *cur_cmd)
+{
+       struct xhci_command *i_cmd;
+       u32 cycle_state;
+
+       /* Turn all aborted commands in list to no-ops, then restart */
+       list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) {
+
+               if (i_cmd->status != COMP_CMD_ABORT)
+                       continue;
+
+               i_cmd->status = COMP_CMD_STOP;
+
+               xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
+                        i_cmd->command_trb);
+               /* get cycle state from the original cmd trb */
+               cycle_state = le32_to_cpu(
+                       i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
+               /* modify the command trb to no-op command */
+               i_cmd->command_trb->generic.field[0] = 0;
+               i_cmd->command_trb->generic.field[1] = 0;
+               i_cmd->command_trb->generic.field[2] = 0;
+               i_cmd->command_trb->generic.field[3] = cpu_to_le32(
+                       TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
+
+               /*
+                * caller waiting for completion is called when command
+                *  completion event is received for these no-op commands
+                */
+       }
+
+       xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
+
+       /* ring command ring doorbell to restart the command ring */
+       if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
+           !(xhci->xhc_state & XHCI_STATE_DYING)) {
+               xhci->current_cmd = cur_cmd;
+               xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
+               xhci_ring_cmd_db(xhci);
+       }
+}
+
+/* Must be called with xhci->lock held, releases and aquires lock back */
+static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
 {
        u64 temp_64;
        int ret;
 
        xhci_dbg(xhci, "Abort command ring\n");
 
-       temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
-       xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
+       reinit_completion(&xhci->cmd_ring_stop_completion);
 
-       /*
-        * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
-        * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
-        * but the completion event in never sent. Use the cmd timeout timer to
-        * handle those cases. Use twice the time to cover the bit polling retry
-        */
-       xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT);
+       temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
        xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
                        &xhci->op_regs->cmd_ring);
 
@@ -320,17 +368,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
                udelay(1000);
                ret = xhci_handshake(&xhci->op_regs->cmd_ring,
                                     CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
-               if (ret == 0)
-                       return 0;
-
-               xhci_err(xhci, "Stopped the command ring failed, "
-                               "maybe the host is dead\n");
-               cancel_delayed_work(&xhci->cmd_timer);
-               xhci->xhc_state |= XHCI_STATE_DYING;
-               xhci_halt(xhci);
-               return -ESHUTDOWN;
+               if (ret < 0) {
+                       xhci_err(xhci, "Stopped the command ring failed, "
+                                "maybe the host is dead\n");
+                       xhci->xhc_state |= XHCI_STATE_DYING;
+                       xhci_halt(xhci);
+                       return -ESHUTDOWN;
+               }
+       }
+       /*
+        * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
+        * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
+        * but the completion event in never sent. Wait 2 secs (arbitrary
+        * number) to handle those cases after negation of CMD_RING_RUNNING.
+        */
+       spin_unlock_irqrestore(&xhci->lock, flags);
+       ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion,
+                                         msecs_to_jiffies(2000));
+       spin_lock_irqsave(&xhci->lock, flags);
+       if (!ret) {
+               xhci_dbg(xhci, "No stop event for abort, ring start fail?\n");
+               xhci_cleanup_command_queue(xhci);
+       } else {
+               xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci));
        }
-
        return 0;
 }
 
@@ -1212,64 +1273,12 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
                xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT);
 }
 
-/*
- * Turn all commands on command ring with status set to "aborted" to no-op trbs.
- * If there are other commands waiting then restart the ring and kick the timer.
- * This must be called with command ring stopped and xhci->lock held.
- */
-static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
-                                        struct xhci_command *cur_cmd)
-{
-       struct xhci_command *i_cmd, *tmp_cmd;
-       u32 cycle_state;
-
-       /* Turn all aborted commands in list to no-ops, then restart */
-       list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list,
-                                cmd_list) {
-
-               if (i_cmd->status != COMP_CMD_ABORT)
-                       continue;
-
-               i_cmd->status = COMP_CMD_STOP;
-
-               xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
-                        i_cmd->command_trb);
-               /* get cycle state from the original cmd trb */
-               cycle_state = le32_to_cpu(
-                       i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
-               /* modify the command trb to no-op command */
-               i_cmd->command_trb->generic.field[0] = 0;
-               i_cmd->command_trb->generic.field[1] = 0;
-               i_cmd->command_trb->generic.field[2] = 0;
-               i_cmd->command_trb->generic.field[3] = cpu_to_le32(
-                       TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
-
-               /*
-                * caller waiting for completion is called when command
-                *  completion event is received for these no-op commands
-                */
-       }
-
-       xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
-
-       /* ring command ring doorbell to restart the command ring */
-       if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
-           !(xhci->xhc_state & XHCI_STATE_DYING)) {
-               xhci->current_cmd = cur_cmd;
-               xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
-               xhci_ring_cmd_db(xhci);
-       }
-       return;
-}
-
-
 void xhci_handle_command_timeout(struct work_struct *work)
 {
        struct xhci_hcd *xhci;
        int ret;
        unsigned long flags;
        u64 hw_ring_state;
-       bool second_timeout = false;
 
        xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer);
 
@@ -1283,18 +1292,17 @@ void xhci_handle_command_timeout(struct work_struct *work)
                spin_unlock_irqrestore(&xhci->lock, flags);
                return;
        }
-
        /* mark this command to be cancelled */
-       if (xhci->current_cmd->status == COMP_CMD_ABORT)
-               second_timeout = true;
        xhci->current_cmd->status = COMP_CMD_ABORT;
 
        /* Make sure command ring is running before aborting it */
        hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
        if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
            (hw_ring_state & CMD_RING_RUNNING))  {
+               /* Prevent new doorbell, and start command abort */
+               xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
                xhci_dbg(xhci, "Command timeout\n");
-               ret = xhci_abort_cmd_ring(xhci);
+               ret = xhci_abort_cmd_ring(xhci, flags);
                if (unlikely(ret == -ESHUTDOWN)) {
                        xhci_err(xhci, "Abort command ring failed\n");
                        xhci_cleanup_command_queue(xhci);
@@ -1308,9 +1316,9 @@ void xhci_handle_command_timeout(struct work_struct *work)
                goto time_out_completed;
        }
 
-       /* command ring failed to restart, or host removed. Bail out */
-       if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
-               xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
+       /* host removed. Bail out */
+       if (xhci->xhc_state & XHCI_STATE_REMOVING) {
+               xhci_dbg(xhci, "host removed, ring start fail?\n");
                xhci_cleanup_command_queue(xhci);
 
                goto time_out_completed;
@@ -1360,7 +1368,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 
        /* If CMD ring stopped we own the trbs between enqueue and dequeue */
        if (cmd_comp_code == COMP_CMD_STOP) {
-               xhci_handle_stopped_cmd_ring(xhci, cmd);
+               complete_all(&xhci->cmd_ring_stop_completion);
                return;
        }
 
index b54c486df956342569554fc4bff76032c75869c7..2d7b6374b58d0c092528dff89e761716552735d2 100644 (file)
@@ -1569,6 +1569,7 @@ struct xhci_hcd {
        struct list_head        cmd_list;
        unsigned int            cmd_ring_reserved_trbs;
        struct delayed_work     cmd_timer;
+       struct completion       cmd_ring_stop_completion;
        struct xhci_command     *current_cmd;
        struct xhci_ring        *event_ring;
        struct xhci_erst        erst;