ChangeSet 1.1234, 2003/06/18 16:48:52-07:00, david-b@pacbell.net

[PATCH] USB: ehci i/o watchdog

This patch adds a new "I/O watchdog" role to the existing
timer code, and cleans it up a bit.  If you want to run
EHCI without IRQs, it's now simple:  disable them, and
tweak the timer appropriately.

The patch should help with these reported problems.

    (a) Bulk I/O sometimes seems to stop progressing.  Not
        trouble in itself, but usb-storage and scsi could
        wedge deeply because of bugs in their fault recovery;
        and then the problems could break khubd and rmmod...

    (b) Some periodic transfers need to be "jumpstarted".
        Usually seen with a high speed hub.


 drivers/usb/host/ehci-hcd.c |   24 ++++++++++++--------
 drivers/usb/host/ehci-q.c   |   35 +++++++++++-------------------
 drivers/usb/host/ehci.h     |   51 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 77 insertions(+), 33 deletions(-)


diff -Nru a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
--- a/drivers/usb/host/ehci-hcd.c	Wed Jun 18 17:35:37 2003
+++ b/drivers/usb/host/ehci-hcd.c	Wed Jun 18 17:35:37 2003
@@ -118,8 +118,10 @@
 #define	EHCI_TUNE_MULT_TT	1
 #define	EHCI_TUNE_FLS		2	/* (small) 256 frame schedule */
 
-#define EHCI_WATCHDOG_JIFFIES	(HZ/100)	/* arbitrary; ~10 msec */
+#define EHCI_IAA_JIFFIES	(HZ/100)	/* arbitrary; ~10 msec */
+#define EHCI_IO_JIFFIES		(HZ/10)		/* io watchdog > irq_thresh */
 #define EHCI_ASYNC_JIFFIES	(HZ/20)		/* async idle timeout */
+#define EHCI_SHRINK_JIFFIES	(HZ/200)	/* async qh unlink delay */
 
 /* Initial IRQ latency:  lower than default */
 static int log2_irq_thresh = 0;		// 0 to 6
@@ -268,16 +270,13 @@
 		}
 	}
 
+ 	/* stop async processing after it's idled a bit */
+	if (test_bit (TIMER_ASYNC_OFF, &ehci->actions))
+ 		start_unlink_async (ehci, ehci->async);
+
+	/* ehci could run by timer, without IRQs ... */
 	ehci_work (ehci, NULL);
-	if (ehci->reclaim && !timer_pending (&ehci->watchdog))
-		mod_timer (&ehci->watchdog,
-				jiffies + EHCI_WATCHDOG_JIFFIES);
 
- 	/* stop async processing after it's idled a while */
-	else if (ehci->async_idle) {
- 		start_unlink_async (ehci, ehci->async);
- 		ehci->async_idle = 0;
-	}
 	spin_unlock_irqrestore (&ehci->lock, flags);
 }
 
@@ -660,11 +659,18 @@
  */
 static void ehci_work (struct ehci_hcd *ehci, struct pt_regs *regs)
 {
+	timer_action_done (ehci, TIMER_IO_WATCHDOG);
 	if (ehci->reclaim_ready)
 		end_unlink_async (ehci, regs);
 	scan_async (ehci, regs);
 	if (ehci->next_uframe != -1)
 		scan_periodic (ehci, regs);
+
+	/* the IO watchdog guards against hardware or driver bugs that
+	 * misplace IRQs, and should let us run completely without IRQs.
+	 */
+	if ((ehci->async->qh_next.ptr != 0) || (ehci->periodic_sched != 0))
+		timer_action (ehci, TIMER_IO_WATCHDOG);
 }
 
 /*-------------------------------------------------------------------------*/
diff -Nru a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
--- a/drivers/usb/host/ehci-q.c	Wed Jun 18 17:35:37 2003
+++ b/drivers/usb/host/ehci-q.c	Wed Jun 18 17:35:37 2003
@@ -741,8 +741,7 @@
 
 	/* (re)start the async schedule? */
 	head = ehci->async;
-	if (ehci->async_idle)
-		del_timer (&ehci->watchdog);
+	timer_action_done (ehci, TIMER_ASYNC_OFF);
 	if (!head->qh_next.qh) {
 		u32	cmd = readl (&ehci->regs->command);
 
@@ -773,8 +772,6 @@
 
 	qh->qh_state = QH_STATE_LINKED;
 	/* qtd completions reported later by interrupt */
-
-	ehci->async_idle = 0;
 }
 
 /*-------------------------------------------------------------------------*/
@@ -955,7 +952,7 @@
 	struct ehci_qh		*qh = ehci->reclaim;
 	struct ehci_qh		*next;
 
-	del_timer (&ehci->watchdog);
+	timer_action_done (ehci, TIMER_IAA_WATCHDOG);
 
 	// qh->hw_next = cpu_to_le32 (qh->qh_dma);
 	qh->qh_state = QH_STATE_IDLE;
@@ -980,12 +977,8 @@
 		 * active but idle for a while once it empties.
 		 */
 		if (HCD_IS_RUNNING (ehci->hcd.state)
-				&& ehci->async->qh_next.qh == 0
-				&& !timer_pending (&ehci->watchdog)) {
-			ehci->async_idle = 1;
-			mod_timer (&ehci->watchdog,
-					jiffies + EHCI_ASYNC_JIFFIES);
-		}
+				&& ehci->async->qh_next.qh == 0)
+			timer_action (ehci, TIMER_ASYNC_OFF);
 	}
 
 	if (next)
@@ -1020,6 +1013,7 @@
 			wmb ();
 			// handshake later, if we need to
 		}
+		timer_action_done (ehci, TIMER_ASYNC_OFF);
 		return;
 	} 
 
@@ -1045,9 +1039,8 @@
 	ehci->reclaim_ready = 0;
 	cmd |= CMD_IAAD;
 	writel (cmd, &ehci->regs->command);
-	/* posted write need not be known to HC yet ... */
-
-	mod_timer (&ehci->watchdog, jiffies + EHCI_WATCHDOG_JIFFIES);
+	(void) readl (&ehci->regs->command);
+	timer_action (ehci, TIMER_IAA_WATCHDOG);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1056,10 +1049,11 @@
 scan_async (struct ehci_hcd *ehci, struct pt_regs *regs)
 {
 	struct ehci_qh		*qh;
-	int			unlink_delay = 0;
+	enum ehci_timer_action	action = TIMER_IO_WATCHDOG;
 
 	if (!++(ehci->stamp))
 		ehci->stamp++;
+	timer_action_done (ehci, TIMER_ASYNC_SHRINK);
 rescan:
 	qh = ehci->async->qh_next.qh;
 	if (likely (qh != 0)) {
@@ -1091,17 +1085,14 @@
 			 */
 			if (list_empty (&qh->qtd_list)) {
 				if (qh->stamp == ehci->stamp)
-					unlink_delay = 1;
-				else if (!ehci->reclaim) {
+					action = TIMER_ASYNC_SHRINK;
+				else if (!ehci->reclaim)
 					start_unlink_async (ehci, qh);
-					unlink_delay = 0;
-				}
 			}
 
 			qh = qh->qh_next.qh;
 		} while (qh);
 	}
-
-	if (unlink_delay && !timer_pending (&ehci->watchdog))
-		mod_timer (&ehci->watchdog, jiffies + EHCI_WATCHDOG_JIFFIES/2);
+	if (action == TIMER_ASYNC_SHRINK)
+		timer_action (ehci, TIMER_ASYNC_SHRINK);
 }
diff -Nru a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
--- a/drivers/usb/host/ehci.h	Wed Jun 18 17:35:37 2003
+++ b/drivers/usb/host/ehci.h	Wed Jun 18 17:35:37 2003
@@ -52,8 +52,7 @@
 	/* async schedule support */
 	struct ehci_qh		*async;
 	struct ehci_qh		*reclaim;
-	int			reclaim_ready : 1,
-				async_idle : 1;
+	int			reclaim_ready : 1;
 
 	/* periodic schedule support */
 #define	DEFAULT_I_TDPS		1024		/* some HCs can do less */
@@ -83,6 +82,7 @@
 
 	struct timer_list	watchdog;
 	struct notifier_block	reboot_notifier;
+	unsigned long		actions;
 	unsigned		stamp;
 
 	/* irq statistics */
@@ -99,6 +99,53 @@
 
 /* NOTE:  urb->transfer_flags expected to not use this bit !!! */
 #define EHCI_STATE_UNLINK	0x8000		/* urb being unlinked */
+
+enum ehci_timer_action {
+	TIMER_IO_WATCHDOG,
+	TIMER_IAA_WATCHDOG,
+	TIMER_ASYNC_SHRINK,
+	TIMER_ASYNC_OFF,
+};
+
+static inline void
+timer_action_done (struct ehci_hcd *ehci, enum ehci_timer_action action)
+{
+	clear_bit (action, &ehci->actions);
+}
+
+static inline void
+timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action)
+{
+	if (!test_and_set_bit (action, &ehci->actions)) {
+		unsigned long t;
+
+		switch (action) {
+		case TIMER_IAA_WATCHDOG:
+			t = EHCI_IAA_JIFFIES;
+			break;
+		case TIMER_IO_WATCHDOG:
+			t = EHCI_IO_JIFFIES;
+			break;
+		case TIMER_ASYNC_OFF:
+			t = EHCI_ASYNC_JIFFIES;
+			break;
+		// case TIMER_ASYNC_SHRINK:
+		default:
+			t = EHCI_SHRINK_JIFFIES;
+			break;
+		}
+		t += jiffies;
+		// all timings except IAA watchdog can be overridden.
+		// async queue SHRINK often precedes IAA.  while it's ready
+		// to go OFF neither can matter, and afterwards the IO
+		// watchdog stops unless there's still periodic traffic.
+		if (action != TIMER_IAA_WATCHDOG
+				&& t > ehci->watchdog.expires
+				&& timer_pending (&ehci->watchdog))
+			return;
+		mod_timer (&ehci->watchdog, t);
+	}
+}
 
 /*-------------------------------------------------------------------------*/
 
