patch-2.4.0-test9 linux/arch/ppc/kernel/time.c

Next file: linux/arch/ppc/kernel/traps.c
Previous file: linux/arch/ppc/kernel/syscalls.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test8/linux/arch/ppc/kernel/time.c linux/arch/ppc/kernel/time.c
@@ -6,6 +6,27 @@
  * Paul Mackerras' version and mine for PReP and Pmac.
  * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
  *
+ * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
+ * to make clock more stable (2.4.0-test5). The only thing
+ * that this code assumes is that the timebases have been synchronized
+ * by firmware on SMP and are never stopped (never do sleep
+ * on SMP then, nap and doze are OK).
+ *
+ * TODO (not necessarily in this file):
+ * - improve precision and reproducibility of timebase frequency
+ * measurement at boot time.
+ * - get rid of xtime_lock for gettimeofday (generic kernel problem
+ * to be implemented on all architectures for SMP scalability and
+ * eventually implementing gettimeofday without entering the kernel).
+ * - put all time/clock related variables in a single structure
+ * to minimize number of cache lines touched by gettimeofday()
+ * - for astronomical applications: add a new function to get
+ * non ambiguous timestamps even around leap seconds. This needs
+ * a new timestamp format and a good name.
+ *
+ *
+ * The following comment is partially obsolete (at least the long wait
+ * is no more a valid reason):
  * Since the MPC8xx has a programmable interrupt timer, I decided to
  * use that rather than the decrementer.  Two reasons: 1.) the clock
  * frequency is low, causing 2.) a long wait in the timer interrupt
@@ -49,18 +70,32 @@
 void smp_local_timer_interrupt(struct pt_regs *);
 
 /* keep track of when we need to update the rtc */
-time_t last_rtc_update = 0;
+time_t last_rtc_update;
 extern rwlock_t xtime_lock;
 
 /* The decrementer counts down by 128 every 128ns on a 601. */
 #define DECREMENTER_COUNT_601	(1000000000 / HZ)
-#define COUNT_PERIOD_NUM_601	1
-#define COUNT_PERIOD_DEN_601	1000
 
-unsigned decrementer_count;	/* count value for 1e6/HZ microseconds */
-unsigned count_period_num;	/* 1 decrementer count equals */
-unsigned count_period_den;	/* count_period_num / count_period_den us */
-unsigned long last_tb;
+unsigned tb_ticks_per_jiffy;
+unsigned tb_to_us;
+unsigned tb_last_stamp;
+
+extern unsigned long wall_jiffies;
+
+static long time_offset;
+
+/* Timer interrupt helper function */
+static inline int tb_delta(unsigned *jiffy_stamp) {
+	int delta;
+	if (__USE_RTC()) {
+		delta = get_rtcl();
+		if (delta < *jiffy_stamp) *jiffy_stamp -= 1000000000;
+		delta -= *jiffy_stamp;
+	} else {
+		delta = get_tbl() - *jiffy_stamp;
+	}
+	return delta;
+}
 
 /*
  * timer_interrupt - gets called when the decrementer overflows,
@@ -69,88 +104,56 @@
  */
 int timer_interrupt(struct pt_regs * regs)
 {
-	int dval, d;
-#if 0
-	unsigned long flags;
-#endif
+	int next_dec;
 	unsigned long cpu = smp_processor_id();
-	
+	unsigned jiffy_stamp = last_jiffy_stamp(cpu);
+
 	hardirq_enter(cpu);
-#ifdef CONFIG_SMP
-	{
-		unsigned int loops = 100000000;
-		while (test_bit(0, &global_irq_lock)) {
-			if (smp_processor_id() == global_irq_holder) {
-				printk("uh oh, interrupt while we hold global irq lock!\n");
-#ifdef CONFIG_XMON
-				xmon(0);
-#endif
-				break;
-			}
-			if (loops-- == 0) {
-				printk("do_IRQ waiting for irq lock (holder=%d)\n", global_irq_holder);
-#ifdef CONFIG_XMON
-				xmon(0);
-#endif
-			}
-		}
-	}
-#endif /* CONFIG_SMP */			
 	
-	dval = get_dec();
-	/*
-	 * Wait for the decrementer to change, then jump
-	 * in and add decrementer_count to its value
-	 * (quickly, before it changes again!)
-	 */
-	while ((d = get_dec()) == dval)
-		;
-	asm volatile("mftb 	%0" : "=r" (last_tb) );
-	/*
-	 * Don't play catchup between the call to time_init()
-	 * and sti() in init/main.c.
-	 *
-	 * This also means if we're delayed for > HZ
-	 * we lose those ticks.  If we're delayed for > HZ
-	 * then we have something wrong anyway, though.
-	 *
-	 * -- Cort
-	 */
-	if ( d < (-1*decrementer_count) )
-		d = 0;
-	set_dec(d + decrementer_count);
-	if ( !smp_processor_id() )
-	{
+	do { 
+		jiffy_stamp += tb_ticks_per_jiffy;
+	  	if (smp_processor_id()) continue;
+		/* We are in an interrupt, no need to save/restore flags */
+		write_lock(&xtime_lock);
+		tb_last_stamp = jiffy_stamp;
 		do_timer(regs);
-#if 0
-	/* -- BenH -- I'm removing this for now since it can cause various
-	 *            troubles with local-time RTCs. Now that we have a
-	 *            /dev/rtc that uses ppc_md.set_rtc_time() on mac, it
-	 *            should be possible to program the RTC from userland
-	 *            in all cases.
-	 */
+
 		/*
-		 * update the rtc when needed
+		 * update the rtc when needed, this should be performed on the
+		 * right fraction of a second. Half or full second ?
+		 * Full second works on mk48t59 clocks, others need testing.
+		 * Note that this update is basically only used through
+		 * the adjtimex system calls. Setting the HW clock in
+		 * any other way is a /dev/rtc and userland business.
+		 * This is still wrong by -0.5/+1.5 jiffies because of the
+		 * timer interrupt resolution and possible delay, but here we
+		 * hit a quantization limit which can only be solved by higher
+		 * resolution timers and decoupling time management from timer
+		 * interrupts. This is also wrong on the clocks
+		 * which require being written at the half second boundary.
+		 * We should have an rtc call that only sets the minutes and
+		 * seconds like on Intel to avoid problems with non UTC clocks.
 		 */
-		read_lock_irqsave(&xtime_lock, flags);
-		if ( (time_status & STA_UNSYNC) &&
-		     ((xtime.tv_sec > last_rtc_update + 60) ||
-		      (xtime.tv_sec < last_rtc_update)) )
-		{
-			if (ppc_md.set_rtc_time(xtime.tv_sec) == 0)
-				last_rtc_update = xtime.tv_sec;
+		if ( (time_status & STA_UNSYNC) == 0 &&
+		     xtime.tv_sec - last_rtc_update >= 659 &&
+		     abs(xtime.tv_usec - (1000000-1000000/HZ)) < 500000/HZ &&
+		     jiffies - wall_jiffies == 1) {
+		  	if (ppc_md.set_rtc_time(xtime.tv_sec+1 + time_offset) == 0)
+				last_rtc_update = xtime.tv_sec+1;
 			else
-				/* do it again in 60 s */
-				last_rtc_update = xtime.tv_sec;
+				/* Try again one minute later */
+				last_rtc_update += 60;
 		}
-		read_unlock_irqrestore(&xtime_lock, flags);
-#endif			
-	}
+		write_unlock(&xtime_lock);
+	} while((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) < 0);
+	set_dec(next_dec);
+	last_jiffy_stamp(cpu) = jiffy_stamp;
+
 #ifdef CONFIG_SMP
 	smp_local_timer_interrupt(regs);
 #endif		
 	
-	if ( ppc_md.heartbeat && !ppc_md.heartbeat_count--)
+	if (ppc_md.heartbeat && !ppc_md.heartbeat_count--)
 		ppc_md.heartbeat();
 	
 	hardirq_exit(cpu);
@@ -162,106 +165,138 @@
  */
 void do_gettimeofday(struct timeval *tv)
 {
-	unsigned long flags, diff;
+	unsigned long flags;
+	unsigned delta, lost_ticks, usec, sec;
 
-	save_flags(flags);
-	cli();
 	read_lock_irqsave(&xtime_lock, flags);
-	*tv = xtime;
+	sec = xtime.tv_sec;
+	usec = xtime.tv_usec;
+	delta = tb_ticks_since(tb_last_stamp);
+#ifdef CONFIG_SMP
+	/* As long as timebases are not in sync, gettimeofday can only
+	 * have jiffy resolution on SMP.
+	 */
+	if (_machine != _MACH_Pmac)
+		delta = 0;
+#endif /* CONFIG_SMP */
+	lost_ticks = jiffies - wall_jiffies;
 	read_unlock_irqrestore(&xtime_lock, flags);
-	/* XXX we don't seem to have the decrementers synced properly yet */
-#ifndef CONFIG_SMP
-	asm volatile("mftb %0" : "=r" (diff) );
-	diff -= last_tb;
-	tv->tv_usec += diff * count_period_num / count_period_den;
-	tv->tv_sec += tv->tv_usec / 1000000;
-	tv->tv_usec = tv->tv_usec % 1000000;
-#endif
-	
-	restore_flags(flags);
+
+	usec += mulhwu(tb_to_us, tb_ticks_per_jiffy * lost_ticks + delta);
+	while (usec > 1000000) {
+	  	sec++;
+		usec -= 1000000;
+	}
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
 }
 
 void do_settimeofday(struct timeval *tv)
 {
 	unsigned long flags;
-	int frac_tick;
-	
-	last_rtc_update = 0; /* so the rtc gets updated soon */
-	
-	frac_tick = tv->tv_usec % (1000000 / HZ);
-	save_flags(flags);
-	cli();
+	int tb_delta, new_usec, new_sec;
+
 	write_lock_irqsave(&xtime_lock, flags);
-	xtime.tv_sec = tv->tv_sec;
-	xtime.tv_usec = tv->tv_usec - frac_tick;
-	write_unlock_irqrestore(&xtime_lock, flags);
-	set_dec(frac_tick * count_period_den / count_period_num);
+	/* Updating the RTC is not the job of this code. If the time is
+	 * stepped under NTP, the RTC will be update after STA_UNSYNC
+	 * is cleared. Tool like clock/hwclock either copy the RTC
+	 * to the system time, in which case there is no point in writing
+	 * to the RTC again, or write to the RTC but then they don't call
+	 * settimeofday to perform this operation. Note also that
+	 * we don't touch the decrementer since:
+	 * a) it would lose timer interrupt synchronization on SMP
+	 * (if it is working one day)
+	 * b) it could make one jiffy spuriously shorter or longer
+	 * which would introduce another source of uncertainty potentially
+	 * harmful to relatively short timers.
+	 */
+
+	/* This works perfectly on SMP only if the tb are in sync but 
+	 * guarantees an error < 1 jiffy even if they are off by eons,
+	 * still reasonable when gettimeofday resolution is 1 jiffy.
+	 */
+	tb_delta = tb_ticks_since(last_jiffy_stamp(smp_processor_id()));
+	tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy;
+	new_sec = tv->tv_sec;
+	new_usec = tv->tv_usec - mulhwu(tb_to_us, tb_delta);
+	while (new_usec <0) {
+		new_sec--; 
+		new_usec += 1000000;
+	}
+	xtime.tv_usec = new_usec;
+	xtime.tv_sec = new_sec;
+
+	/* In case of a large backwards jump in time with NTP, we want the 
+	 * clock to be updated as soon as the PLL is again in lock.
+	 */
+	last_rtc_update = new_sec - 658;
+
 	time_adjust = 0;                /* stop active adjtime() */
 	time_status |= STA_UNSYNC;
 	time_state = TIME_ERROR;        /* p. 24, (a) */
 	time_maxerror = NTP_PHASE_LIMIT;
 	time_esterror = NTP_PHASE_LIMIT;
-	restore_flags(flags);
+	write_unlock_irqrestore(&xtime_lock, flags);
 }
 
 
 void __init time_init(void)
 {
+	time_t sec, old_sec;
+	unsigned old_stamp, stamp, elapsed;
+	/* This function is only called on the boot processor */
 	unsigned long flags;
+
         if (ppc_md.time_init != NULL)
-        {
-                ppc_md.time_init();
-        }
+                time_offset = ppc_md.time_init();
 
-	if ((_get_PVR() >> 16) == 1) {
+	if (__USE_RTC()) {
 		/* 601 processor: dec counts down by 128 every 128ns */
-		decrementer_count = DECREMENTER_COUNT_601;
-		count_period_num = COUNT_PERIOD_NUM_601;
-		count_period_den = COUNT_PERIOD_DEN_601;
-        } else if (!smp_processor_id()) {
+		tb_ticks_per_jiffy = DECREMENTER_COUNT_601;
+		/* mulhwu_scale_factor(1000000000, 1000000) is 0x418937 */
+		tb_to_us = 0x418937;
+        } else {
                 ppc_md.calibrate_decr();
 	}
 
+	/* Now that the decrementer is calibrated, it can be used in case the 
+	 * clock is stuck, but the fact that we have to handle the 601
+	 * makes things more complex. Repeatedly read the RTC until the
+	 * next second boundary to try to achieve some precision...
+	 */
+	stamp = get_native_tbl();
+	sec = ppc_md.get_rtc_time();
+	elapsed = 0;
+	do {
+		old_stamp = stamp; 
+		old_sec = sec;
+		stamp = get_native_tbl();
+		if (__USE_RTC() && stamp < old_stamp) old_stamp -= 1000000000;
+		elapsed += stamp - old_stamp;
+		sec = ppc_md.get_rtc_time();
+	} while ( sec == old_sec && elapsed < 2*HZ*tb_ticks_per_jiffy);
+	if (sec==old_sec) {
+		printk("Warning: real time clock seems stuck!\n");
+	}
 	write_lock_irqsave(&xtime_lock, flags);
-	xtime.tv_sec = ppc_md.get_rtc_time();
+	xtime.tv_sec = sec;
+	last_jiffy_stamp(0) = tb_last_stamp = stamp;
 	xtime.tv_usec = 0;
+	/* No update now, we just read the time from the RTC ! */
+	last_rtc_update = xtime.tv_sec;
 	write_unlock_irqrestore(&xtime_lock, flags);
+	/* Not exact, but the timer interrupt takes care of this */
+	set_dec(tb_ticks_per_jiffy);
 
-	set_dec(decrementer_count);
-	/* allow setting the time right away */
-	last_rtc_update = 0;
-}
-
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
- */
-unsigned long mktime(unsigned int year, unsigned int mon,
-		     unsigned int day, unsigned int hour,
-		     unsigned int min, unsigned int sec)
-{
-	
-	if (0 >= (int) (mon -= 2)) {	/* 1..12 -> 11,12,1..10 */
-		mon += 12;	/* Puts Feb last since it has leap day */
-		year -= 1;
-	}
-	return (((
-		(unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) +
-		year*365 - 719499
-		)*24 + hour /* now have hours */
-		)*60 + min /* now have minutes */
-		)*60 + sec; /* finally seconds */
+	/* If platform provided a timezone (pmac), we correct the time
+	 * using do_sys_settimeofday() which in turn calls warp_clock()
+	 */
+        if (time_offset) {
+        	struct timezone tz;
+        	tz.tz_minuteswest = -time_offset / 60;
+        	tz.tz_dsttime = 0;
+        	do_sys_settimeofday(NULL, &tz);
+        }
 }
 
 #define TICK_SIZE tick
@@ -354,3 +389,31 @@
 	 */
 	GregorianDay(tm);
 }
+
+/* Auxiliary function to compute scaling factors */
+/* Actually the choice of a timebase running at 1/4 the of the bus
+ * frequency giving resolution of a few tens of nanoseconds is quite nice.
+ * It makes this computation very precise (27-28 bits typically) which
+ * is optimistic considering the stability of most processor clock
+ * oscillators and the precision with which the timebase frequency
+ * is measured but does not harm.
+ */
+unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) {
+	unsigned mlt=0, tmp, err;
+	/* No concern for performance, it's done once: use a stupid
+	 * but safe and compact method to find the multiplier.
+	 */
+	for (tmp = 1U<<31; tmp != 0; tmp >>= 1) {
+		if (mulhwu(inscale, mlt|tmp) < outscale) mlt|=tmp;
+	}
+	/* We might still be off by 1 for the best approximation.
+	 * A side effect of this is that if outscale is too large
+	 * the returned value will be zero.
+	 * Many corner cases have been checked and seem to work,
+	 * some might have been forgotten in the test however.
+	 */
+	err = inscale*(mlt+1);
+	if (err <= inscale/2) mlt++;
+	return mlt;
+}
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)