Re: Early boot oops with 2.5.67-bk(current) on a dual Athlon-MP

Andrew Morton (akpm@digeo.com)
Sun, 13 Apr 2003 16:52:59 -0700


Nicholas Wourms <nwourms@myrealbox.com> wrote:
>
> Hi,
>
> Attached is the oops (which results in a panic) when I
> attempt to boot the lastest bk current on my machine.

The MCE code is using the workqueue infrastructure waaaaaaay earlier than it
is allowed to. It looks like the timer went off before the workqueue
initialisation had been run.

This should fix it.

arch/i386/kernel/cpu/mcheck/k7.c | 4 ----
arch/i386/kernel/cpu/mcheck/mce.h | 2 --
arch/i386/kernel/cpu/mcheck/non-fatal.c | 5 ++++-
arch/i386/kernel/cpu/mcheck/p4.c | 3 ---
4 files changed, 4 insertions(+), 10 deletions(-)

diff -puN arch/i386/kernel/cpu/mcheck/k7.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/k7.c
--- 25/arch/i386/kernel/cpu/mcheck/k7.c~mce-workqueue-startup-fix 2003-04-13 16:41:36.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/k7.c 2003-04-13 16:42:24.000000000 -0700
@@ -92,8 +92,4 @@ void __init amd_mcheck_init(struct cpuin
set_in_cr4 (X86_CR4_MCE);
printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
-
-#ifdef CONFIG_X86_MCE_NONFATAL
- init_nonfatal_mce_checker();
-#endif
}
diff -puN arch/i386/kernel/cpu/mcheck/mce.h~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/mce.h
--- 25/arch/i386/kernel/cpu/mcheck/mce.h~mce-workqueue-startup-fix 2003-04-13 16:41:43.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/mce.h 2003-04-13 16:42:27.000000000 -0700
@@ -6,8 +6,6 @@ void intel_p5_mcheck_init(struct cpuinfo
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);

-void init_nonfatal_mce_checker(void);
-
/* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);

diff -puN arch/i386/kernel/cpu/mcheck/non-fatal.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/non-fatal.c
--- 25/arch/i386/kernel/cpu/mcheck/non-fatal.c~mce-workqueue-startup-fix 2003-04-13 16:41:50.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/non-fatal.c 2003-04-13 16:45:01.000000000 -0700
@@ -11,6 +11,7 @@
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
+#include <linux/module.h>

#include <asm/processor.h>
#include <asm/system.h>
@@ -65,7 +66,7 @@ static void mce_timerfunc (unsigned long
add_timer (&mce_timer);
}

-void init_nonfatal_mce_checker()
+static int init_nonfatal_mce_checker()
{
if (timerset == 0) {
/* Set the timer to check for non-fatal
@@ -78,4 +79,6 @@ void init_nonfatal_mce_checker()
timerset = 1;
printk(KERN_INFO "Machine check exception polling timer started.\n");
}
+ return 0;
}
+module_init(init_nonfatal_mce_checker);
diff -puN arch/i386/kernel/cpu/mcheck/p4.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/p4.c
--- 25/arch/i386/kernel/cpu/mcheck/p4.c~mce-workqueue-startup-fix 2003-04-13 16:42:03.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/p4.c 2003-04-13 16:44:22.000000000 -0700
@@ -255,7 +255,4 @@ void __init intel_p4_mcheck_init(struct
intel_init_thermal(c);
#endif
}
-#ifdef CONFIG_X86_MCE_NONFATAL
- init_nonfatal_mce_checker();
-#endif
}

_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/