diff -urNp a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
--- a/arch/i386/kernel/smpboot.c	Fri Sep 27 23:49:54 2002
+++ b/arch/i386/kernel/smpboot.c	Tue Oct  8 11:37:56 2002
@@ -1194,6 +1194,11 @@ int __devinit __cpu_up(unsigned int cpu)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	zap_low_mappings();
+#ifdef CONFIG_NUMA
+	pooldata_lock();
+	bld_pools();
+	pooldata_unlock();
+#endif
All callers of bld_pools() need the pooldata lock - taking
it inside that function makes the code a little more readable..
Also I'd suggest to rename bld_pools() to build_pools() ;)
-	cache_decay_ticks = 10;	/* XXX base this on PAL info and cache-bandwidth estimate */
+	cache_decay_ticks = 8;	/* XXX base this on PAL info and cache-bandwidth estimate */
Could you explain this change?  And it's affect on non-NUMA IA64
machines?
 /**
+ * atomic_inc_return - increment atomic variable and return new value
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and return it's new value.  Note that
+ * the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_inc_return(atomic_t *v){
+	atomic_inc(v);
+	return v->counter;
+}
Who do you guarantee this is atomic?  Please make it fit
Documentation/CodyingStyle, btw..
+int numpools, pool_ptr[MAX_NUMNODES+1], pool_cpus[NR_CPUS], pool_nr_cpus[MAX_NUMNODES];
+unsigned long pool_mask[MAX_NUMNODES];
Hmm, shouldn't those [MAX_NUMNODES] arrays be in some per-node array
to avoid cacheline-bouncing?
+void pooldata_lock(void)
+{
+	int i;
+ retry:
+	while (atomic_read(&pool_lock));
+	if (atomic_inc_return(&pool_lock) > 1) {
+		atomic_dec(&pool_lock);
+		goto retry;
+	}
Why not a simple spin_lock()?
+	/* 
+	 * Wait a while, any loops using pool data should finish
+	 * in between. This is VERY ugly and should be replaced
+	 * by some real RCU stuff. [EF]
+	 */
+	for (i=0; i<100; i++)
+		udelay(1000);
Urgg.  I'd suggest you switch to RCU now and make your patch apply
ontop of it - another reason to apply the RCU core patch..
+void pooldata_unlock(void)
+{
+	atomic_dec(&pool_lock);
+}
Dito for spin_unlock.
+	/* avoid deadlock by timer interrupt on own cpu */
+	if (atomic_read(&pool_lock)) return;
spin_trylock..
All in all your code doesn't seem to be very cachelign-friendly,
lots of global bouncing.  Do you have any numbers on what your
patch changes for normal SMP configurations?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/