Most likely xtime_lock starvation. I believe other folks were hitting
this earlier. What do folks think about the attached (and completely
untested, probably breaks x86-64) patch? I'm yanking the read_lock for a
vxtime_sequence lock, as implemented in the x86-64 vsyscall code. This
should alleviate the writer starvation, although no doubt it still has a
few gotchas in it. Also I still need to vxtime_lock all the other
instances of write locking the xtime_lock, but its a start. 
comments, flames?
-john
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c	Tue Nov 19 19:30:05 2002
+++ b/arch/i386/kernel/time.c	Tue Nov 19 19:30:05 2002
@@ -85,19 +85,19 @@
  */
 void do_gettimeofday(struct timeval *tv)
 {
-	unsigned long flags;
-	unsigned long usec, sec;
+	unsigned long usec, sec, sequence;
 
-	read_lock_irqsave(&xtime_lock, flags);
-	usec = timer->get_offset();
-	{
-		unsigned long lost = jiffies - wall_jiffies;
-		if (lost)
-			usec += lost * (1000000 / HZ);
-	}
-	sec = xtime.tv_sec;
-	usec += (xtime.tv_nsec / 1000);
-	read_unlock_irqrestore(&xtime_lock, flags);
+	sequence = vxtime_sequence[1];
+	do {
+		usec = timer->get_offset();
+		{
+			unsigned long lost = jiffies - wall_jiffies;
+			if (lost)
+				usec += lost * (1000000 / HZ);
+		}
+		sec = xtime.tv_sec;
+		usec += (xtime.tv_nsec / 1000);
+	} while(sequence != vxtime_sequence[0]);
 
 	while (usec >= 1000000) {
 		usec -= 1000000;
@@ -111,6 +111,7 @@
 void do_settimeofday(struct timeval *tv)
 {
 	write_lock_irq(&xtime_lock);
+	vxtime_lock();
 	/*
 	 * This is revolting. We need to set "xtime" correctly. However, the
 	 * value in this location is the value at the most recent update of
@@ -127,6 +128,8 @@
 
 	xtime.tv_sec = tv->tv_sec;
 	xtime.tv_nsec = (tv->tv_usec * 1000);
+	vxtime_unlock();
+	
 	time_adjust = 0;		/* stop active adjtime() */
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
@@ -278,11 +281,13 @@
 	 * locally disabled. -arca
 	 */
 	write_lock(&xtime_lock);
-
+	vxtime_lock();
+	
 	timer->mark_offset();
  
 	do_timer_interrupt(irq, NULL, regs);
 
+	vxtime_unlock();
 	write_unlock(&xtime_lock);
 
 }
diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c	Tue Nov 19 19:30:05 2002
+++ b/arch/i386/kernel/timers/timer_pit.c	Tue Nov 19 19:30:05 2002
@@ -61,17 +61,17 @@
 static unsigned long get_offset_pit(void)
 {
 	int count;
-
+	unsigned long flags;
 	static int count_p = LATCH;    /* for the first call after boot */
 	static unsigned long jiffies_p = 0;
 
 	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off. 
+	 * cache volatile jiffies temporarily; 
+	 * IRQs are not turned off, but we'll retry if something changes
 	 */
 	unsigned long jiffies_t;
 
-	/* gets recalled with irq locally disabled */
-	spin_lock(&i8253_lock);
+	spin_lock_irqsave(&i8253_lock,flags);
 	/* timer count may underflow right here */
 	outb_p(0x00, 0x43);	/* latch the count ASAP */
 
@@ -93,7 +93,7 @@
                 count = LATCH - 1;
         }
 	
-	spin_unlock(&i8253_lock);
+	spin_unlock_irqrestore(&i8253_lock,flags);
 
 	/*
 	 * avoiding timer inconsistencies (they are rare, but they happen)...
diff -Nru a/include/linux/time.h b/include/linux/time.h
--- a/include/linux/time.h	Tue Nov 19 19:30:05 2002
+++ b/include/linux/time.h	Tue Nov 19 19:30:05 2002
@@ -122,6 +122,11 @@
 extern struct timespec xtime;
 extern rwlock_t xtime_lock;
 
+/*unstarvable xtime rwlock*/
+extern long vxtime_sequence[2];
+#define vxtime_lock() do { vxtime_sequence[0]++; wmb(); } while(0)
+#define vxtime_unlock() do { wmb(); vxtime_sequence[1]++; } while (0)
+
 static inline unsigned long get_seconds(void)
 { 
 	return xtime.tv_sec;
diff -Nru a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c	Tue Nov 19 19:30:05 2002
+++ b/kernel/timer.c	Tue Nov 19 19:30:05 2002
@@ -761,6 +761,7 @@
  */
 rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
 unsigned long last_time_offset;
+long vxtime_sequence[2]; /*unstarvable xtime rwlock*/
 
 /*
  * This function runs timers and the timer-tq in bottom half context.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/