Re: NMI handling rework for x86

Corey Minyard (cminyard@mvista.com)
Fri, 15 Nov 2002 08:13:51 -0600


This is a multi-part message in MIME format.
--------------030703000907050701050805
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

John Levon wrote:

>On Thu, Nov 14, 2002 at 10:30:16PM -0600, Corey Minyard wrote:
>
>
>
>>Since a lot of things are hacking into this code (lkcd, kdb, oprofile,
>>nmi watchdog, and now my IPMI watchdog pretimeout), it would be very
>>nice to get their junk out of this code and allow them to bind in
>>nicely, and allow binding from modules.
>>
>>
>
>I've just noticed you haven't fixed the watchdog vs. oprofile case. You
>pass in the handled flag to the NMI watchdog handler, but you ignore the
>value and always do the perfctr reset. You /must/ only do the reset if
>handled == false, or you'll screw up oprofile when it's running.
>
I don't think that's a good idea for two reasons:

* If the oprofile code is only using the counter that the NMI
watchdog is not using, it will silently cause the NMI watchdog to
stop working. I know that's not the case now, but it could be in
the future.
* The oprofile code will always reset the counter, so the NMI
watchdog will never see the timeout, so it doesn't matter.

It's currently kind of an unnatural relationship. IMHO, it would be
better to have a separate handler for the perf counters that they both
use. But that's beyond the scope of this right now.

>
>also, the diff would be much easier to read as a separate "mv nmi.c
>nmi_watchdog.c" then diff against that
>
Yes, but harder to apply (a little :-). I'll attach that patch. As
John says, you need to do a "mv nmi.c nmi_watchdog.c" in the
arch/i386/kernel.c directory before you apply this patch.

-Corey

--------------030703000907050701050805
Content-Type: text/plain;
name="linux-nmi-v10.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="linux-nmi-v10.diff"

--- linux.orig/arch/i386/kernel/Makefile Thu Nov 14 21:08:35 2002
+++ linux/arch/i386/kernel/Makefile Thu Nov 14 21:14:27 2002
@@ -8,7 +8,7 @@

obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
- pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o
+ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o nmi.o

obj-y += cpu/
obj-y += timers/
@@ -22,7 +22,7 @@
obj-$(CONFIG_ACPI_SLEEP) += acpi_wakeup.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o trampoline.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi_watchdog.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
--- linux.orig/arch/i386/kernel/i386_ksyms.c Thu Nov 14 21:05:52 2002
+++ linux/arch/i386/kernel/i386_ksyms.c Thu Nov 14 21:07:25 2002
@@ -92,6 +92,9 @@
EXPORT_SYMBOL(cpu_khz);
EXPORT_SYMBOL(apm_info);

+EXPORT_SYMBOL(request_nmi);
+EXPORT_SYMBOL(release_nmi);
+
#ifdef CONFIG_DEBUG_IOVIRT
EXPORT_SYMBOL(__io_virt_debug);
#endif
@@ -185,8 +188,6 @@

EXPORT_SYMBOL_GPL(register_profile_notifier);
EXPORT_SYMBOL_GPL(unregister_profile_notifier);
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-EXPORT_SYMBOL_GPL(unset_nmi_callback);

#undef memcpy
#undef memset
--- linux.orig/arch/i386/kernel/irq.c Thu Nov 14 21:05:52 2002
+++ linux/arch/i386/kernel/irq.c Thu Nov 14 21:07:25 2002
@@ -131,6 +131,8 @@
* Generic, controller-independent functions:
*/

+extern void nmi_append_user_names(struct seq_file *p);
+
int show_interrupts(struct seq_file *p, void *v)
{
int i, j;
@@ -166,6 +168,8 @@
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
p += seq_printf(p, "%10u ", nmi_count(j));
+ seq_printf(p, " ");
+ nmi_append_user_names(p);
seq_putc(p, '\n');
#if CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
--- linux.orig/arch/i386/kernel/nmi.c Fri Nov 15 08:07:13 2002
+++ linux/arch/i386/kernel/nmi.c Thu Nov 14 21:15:33 2002
@@ -0,0 +1,245 @@
+/*
+ * linux/arch/i386/nmi.c
+ *
+ * NMI support.
+ *
+ * Corey Minyard <cminyard@mvista.com>
+ *
+ * Moved some of this over from traps.c.
+ */
+
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/nmi.h>
+
+extern void show_registers(struct pt_regs *regs);
+
+/*
+ * A list of handlers for NMIs. This list will be called in order
+ * when an NMI from an otherwise unidentifiable source comes in. If
+ * one of these handles the NMI, it should return NOTIFY_OK, otherwise
+ * it should return NOTIFY_DONE. NMI handlers cannot claim spinlocks,
+ * so we have to handle freeing these in a different manner. A
+ * spinlock protects the list from multiple writers. When something
+ * is removed from the list, it is thrown into another list (with
+ * another link, so the "next" element stays valid) and scheduled to
+ * run as an rcu. When the rcu runs, it is guaranteed that nothing in
+ * the NMI code will be using it.
+ */
+static struct list_head nmi_handler_list = LIST_HEAD_INIT(nmi_handler_list);
+static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * To free the list item, we use an rcu. The rcu-function will not
+ * run until all processors have done a context switch, gone idle, or
+ * gone to a user process, so it's guaranteed that when this runs, any
+ * NMI handler running at release time has completed and the list item
+ * can be safely freed.
+ */
+static void free_nmi_handler(void *arg)
+{
+ struct nmi_handler *handler = arg;
+
+ INIT_LIST_HEAD(&(handler->link));
+ complete(&(handler->complete));
+}
+
+int request_nmi(struct nmi_handler *handler)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h = NULL;
+
+ if (!list_empty(&(handler->link)))
+ return -EBUSY;
+
+ spin_lock(&nmi_handler_lock);
+
+ __list_for_each(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ if (curr_h->priority <= handler->priority)
+ break;
+ }
+
+ /* list_add_rcu takes care of memory barrier */
+ if (curr_h)
+ if (curr_h->priority <= handler->priority)
+ list_add_rcu(&(handler->link), curr_h->link.prev);
+ else
+ list_add_rcu(&(handler->link), &(curr_h->link));
+ else
+ list_add_rcu(&(handler->link), &nmi_handler_list);
+
+ spin_unlock(&nmi_handler_lock);
+ return 0;
+}
+
+void release_nmi(struct nmi_handler *handler)
+{
+ spin_lock(&nmi_handler_lock);
+ list_del_rcu(&(handler->link));
+ init_completion(&(handler->complete));
+ call_rcu(&(handler->rcu), free_nmi_handler, handler);
+ spin_unlock(&nmi_handler_lock);
+
+ /* Wait for handler to finish being freed. This can't be
+ interrupted, we must wait until it finished. */
+ wait_for_completion(&(handler->complete));
+}
+
+void nmi_append_user_names(struct seq_file *p)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h;
+
+ spin_lock(&nmi_handler_lock);
+ __list_for_each(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ if (curr_h->dev_name)
+ p += seq_printf(p, " %s", curr_h->dev_name);
+ }
+ spin_unlock(&nmi_handler_lock);
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+
+ /* Clear and disable the memory parity error line. */
+ reason = (reason & 0xf) | 4;
+ outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ unsigned long i;
+
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+ reason = (reason & 0xf) | 8;
+ outb(reason, 0x61);
+ i = 2000;
+ while (--i) udelay(1000);
+ reason &= ~8;
+ outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(struct pt_regs * regs, int cpu)
+{
+#ifdef CONFIG_MCA
+ /* Might actually be able to figure out what the guilty party
+ * is. */
+ if( MCA_bus ) {
+ mca_handle_nmi();
+ return;
+ }
+#endif
+ printk("Uhhuh. Received NMI for unknown reason on CPU %d.\n", cpu);
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+/* Check "normal" sources of NMI. */
+static int nmi_std (void * dev_id, struct pt_regs * regs, int cpu, int handled)
+{
+ unsigned char reason;
+
+ reason = inb(0x61);
+ if (reason & 0xc0) {
+ if (reason & 0x80)
+ mem_parity_error(reason, regs);
+ if (reason & 0x40)
+ io_check_error(reason, regs);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct nmi_handler nmi_std_handler =
+{
+ .link = LIST_HEAD_INIT(nmi_std_handler.link),
+ .dev_name = "nmi_std",
+ .dev_id = NULL,
+ .handler = nmi_std,
+ .priority = 128, /* mid-level priority. */
+};
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h;
+ int val;
+ int cpu;
+ int handled = 0;
+
+
+ nmi_enter();
+
+ cpu = smp_processor_id();
+ ++nmi_count(cpu);
+
+ /*
+ * Since NMIs are edge-triggered, we could possibly miss one
+ * if we don't call them all, so we call them all.
+ */
+
+ __list_for_each_rcu(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ val = curr_h->handler(curr_h->dev_id, regs, cpu, handled);
+ switch (val) {
+ case NOTIFY_OK:
+ handled = 1;
+ break;
+
+ case NOTIFY_DONE:
+ default:
+ ;
+ }
+ }
+
+ if (!handled)
+ unknown_nmi_error(regs, cpu);
+ else {
+ /*
+ * Reassert NMI in case it became active meanwhile
+ * as it's edge-triggered. Don't do this if the NMI
+ * wasn't handled to avoid an infinite NMI loop.
+ *
+ * This is necessary in case we have another external
+ * NMI while processing this one. The external NMIs
+ * are level-generated, into the processor NMIs are
+ * edge-triggered, so if you have one NMI source
+ * come in while another is already there, the level
+ * will never go down to cause another edge, and
+ * no more NMIs will happen. This does NOT apply
+ * to internally generated NMIs, though, so you
+ * can't use the same trick to only call one handler
+ * at a time. Otherwise, if two internal NMIs came
+ * in at the same time you might miss one.
+ */
+ outb(0x8f, 0x70);
+ inb(0x71); /* dummy */
+ outb(0x0f, 0x70);
+ inb(0x71); /* dummy */
+ }
+
+ nmi_exit();
+}
+
+void __init init_nmi(void)
+{
+ request_nmi(&nmi_std_handler);
+}
--- linux.orig/arch/i386/kernel/nmi_watchdog.c Mon Oct 21 13:25:45 2002
+++ linux/arch/i386/kernel/nmi_watchdog.c Thu Oct 24 20:54:19 2002
@@ -1,5 +1,5 @@
/*
- * linux/arch/i386/nmi.c
+ * linux/arch/i386/nmi_watchdog.c
*
* NMI watchdog support on APIC systems
*
@@ -20,14 +20,29 @@
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>

#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
+#include <asm/nmi.h>

unsigned int nmi_watchdog = NMI_NONE;
static unsigned int nmi_hz = HZ;
-unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+
+/* This is for I/O APIC, until we can figure out how to tell if it's from the
+ I/O APIC. If the NMI was not handled before now, we handle it. */
+static int dummy_watchdog_reset(int handled)
+{
+ return !handled;
+}
+
+/*
+ * Returns 1 if it is a source of the NMI, and resets the NMI to go
+ * off again.
+ */
+static int (*watchdog_reset)(int handled) = dummy_watchdog_reset;
+
extern void show_registers(struct pt_regs *regs);

#define K7_EVNTSEL_ENABLE (1 << 22)
@@ -102,6 +117,18 @@
return 0;
}

+static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs, int cpu,
+ int handled);
+
+static struct nmi_handler nmi_watchdog_handler =
+{
+ .link = LIST_HEAD_INIT(nmi_watchdog_handler.link),
+ .dev_name = "nmi_watchdog",
+ .dev_id = NULL,
+ .handler = nmi_watchdog_tick,
+ .priority = 255, /* We want to be relatively high priority. */
+};
+
static int __init setup_nmi_watchdog(char *str)
{
int nmi;
@@ -110,6 +137,7 @@

if (nmi >= NMI_INVALID)
return 0;
+
if (nmi == NMI_NONE)
nmi_watchdog = nmi;
/*
@@ -131,6 +159,17 @@
*/
if (nmi == NMI_IO_APIC)
nmi_watchdog = nmi;
+
+ if (nmi_watchdog != NMI_NONE) {
+ if (request_nmi(&nmi_watchdog_handler) != 0) {
+ /* Couldn't add a watchdog handler, give up. */
+ printk(KERN_WARNING
+ "nmi_watchdog: Couldn't request nmi\n");
+ nmi_watchdog = NMI_NONE;
+ return 0;
+ }
+ }
+
return 1;
}

@@ -216,11 +255,23 @@
wrmsr(base+i, 0, 0);
}

+static int k7_watchdog_reset(int handled)
+{
+ unsigned int low, high;
+ int source;
+
+ rdmsr(MSR_K7_PERFCTR0, low, high);
+ source = (low & (1 << 31)) == 0;
+ if (source)
+ wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+ return source;
+}
+
static void __pminit setup_k7_watchdog(void)
{
unsigned int evntsel;

- nmi_perfctr_msr = MSR_K7_PERFCTR0;
+ watchdog_reset = k7_watchdog_reset;

clear_msr_range(MSR_K7_EVNTSEL0, 4);
clear_msr_range(MSR_K7_PERFCTR0, 4);
@@ -238,11 +289,23 @@
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
}

+static int p6_watchdog_reset(int handled)
+{
+ unsigned int low, high;
+ int source;
+
+ rdmsr(MSR_P6_PERFCTR0, low, high);
+ source = (low & (1 << 31)) == 0;
+ if (source)
+ wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+ return source;
+}
+
static void __pminit setup_p6_watchdog(void)
{
unsigned int evntsel;

- nmi_perfctr_msr = MSR_P6_PERFCTR0;
+ watchdog_reset = p6_watchdog_reset;

clear_msr_range(MSR_P6_EVNTSEL0, 2);
clear_msr_range(MSR_P6_PERFCTR0, 2);
@@ -260,6 +323,29 @@
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
}

+static int p4_watchdog_reset(int handled)
+{
+ unsigned int low, high;
+ int source;
+
+ rdmsr(MSR_P4_IQ_COUNTER0, low, high);
+ source = (low & (1 << 31)) == 0;
+ if (source) {
+ /*
+ * P4 quirks:
+ * - An overflown perfctr will assert its interrupt
+ * until the OVF flag in its CCCR is cleared.
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ }
+ return source;
+}
+
static int __pminit setup_p4_watchdog(void)
{
unsigned int misc_enable, dummy;
@@ -268,7 +354,7 @@
if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
return 0;

- nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+ watchdog_reset = p4_watchdog_reset;

if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
clear_msr_range(0x3F1, 2);
@@ -350,15 +436,18 @@
alert_counter[i] = 0;
}

-void nmi_watchdog_tick (struct pt_regs * regs)
+static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs, int cpu,
+ int handled)
{
-
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
* smp_processor_id().
*/
- int sum, cpu = smp_processor_id();
+ int sum;
+
+ if (! watchdog_reset(handled))
+ return NOTIFY_DONE; /* We are not an NMI source. */

sum = irq_stat[cpu].apic_timer_irqs;

@@ -387,18 +476,6 @@
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
- if (nmi_perfctr_msr) {
- if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
- wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
- }
+
+ return NOTIFY_OK;
}
--- linux.orig/arch/i386/kernel/traps.c Thu Nov 14 21:08:35 2002
+++ linux/arch/i386/kernel/traps.c Thu Nov 14 21:12:47 2002
@@ -40,7 +40,6 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/nmi.h>

#include <asm/smp.h>
#include <asm/pgalloc.h>
@@ -52,6 +51,7 @@
asmlinkage int system_call(void);
asmlinkage void lcall7(void);
asmlinkage void lcall27(void);
+void init_nmi(void);

struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
{ 0, 0 }, { 0, 0 } };
@@ -443,112 +443,6 @@
}
}

-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips\n");
-
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
-}
-
-static void io_check_error(unsigned char reason, struct pt_regs * regs)
-{
- unsigned long i;
-
- printk("NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- i = 2000;
- while (--i) udelay(1000);
- reason &= ~8;
- outb(reason, 0x61);
-}
-
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{
-#ifdef CONFIG_MCA
- /* Might actually be able to figure out what the guilty party
- * is. */
- if( MCA_bus ) {
- mca_handle_nmi();
- return;
- }
-#endif
- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
-}
-
-static void default_do_nmi(struct pt_regs * regs)
-{
- unsigned char reason = inb(0x61);
-
- if (!(reason & 0xc0)) {
-#if CONFIG_X86_LOCAL_APIC
- /*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
- */
- if (nmi_watchdog) {
- nmi_watchdog_tick(regs);
- return;
- }
-#endif
- unknown_nmi_error(reason, regs);
- return;
- }
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered.
- */
- outb(0x8f, 0x70);
- inb(0x71); /* dummy */
- outb(0x0f, 0x70);
- inb(0x71); /* dummy */
-}
-
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
- return 0;
-}
-
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
-asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
-{
- int cpu;
-
- nmi_enter();
-
- cpu = smp_processor_id();
- ++nmi_count(cpu);
-
- if (!nmi_callback(regs, cpu))
- default_do_nmi(regs);
-
- nmi_exit();
-}
-
-void set_nmi_callback(nmi_callback_t callback)
-{
- nmi_callback = callback;
-}
-
-void unset_nmi_callback(void)
-{
- nmi_callback = dummy_nmi_callback;
-}
-
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -931,4 +825,6 @@
cpu_init();

trap_init_hook();
+
+ init_nmi();
}
--- linux.orig/arch/i386/oprofile/nmi_int.c Thu Nov 14 21:05:52 2002
+++ linux/arch/i386/oprofile/nmi_int.c Thu Nov 14 21:07:25 2002
@@ -54,12 +54,24 @@


// FIXME: kernel_only
-static int nmi_callback(struct pt_regs * regs, int cpu)
+static int nmi_callback(void *dev_id, struct pt_regs *regs, int cpu, int handled)
{
- return (model->check_ctrs(cpu, &cpu_msrs[cpu], regs));
+ if (model->check_ctrs(cpu, &cpu_msrs[cpu], regs))
+ return NOTIFY_OK;
+
+ return NOTIFY_DONE;
}

-
+static struct nmi_handler nmi_handler =
+{
+ .link = LIST_HEAD_INIT(nmi_handler.link),
+ .dev_name = "oprofile",
+ .dev_id = NULL,
+ .handler = nmi_callback,
+ .priority = 1023, /* Very high priority. */
+};
+
+
static void nmi_save_registers(struct op_msrs * msrs)
{
unsigned int const nr_ctrs = model->num_counters;
@@ -96,8 +108,12 @@
}


+static void nmi_cpu_shutdown(void * dummy);
+
static int nmi_setup(void)
{
+ int rv;
+
/* We walk a thin line between law and rape here.
* We need to be careful to install our NMI handler
* without actually triggering any NMIs as this will
@@ -105,7 +121,13 @@
*/
smp_call_function(nmi_cpu_setup, NULL, 0, 1);
nmi_cpu_setup(0);
- set_nmi_callback(nmi_callback);
+ rv = request_nmi(&nmi_handler);
+ if (rv) {
+ smp_call_function(nmi_cpu_shutdown, NULL, 0, 1);
+ nmi_cpu_shutdown(0);
+ return rv;
+ }
+
oprofile_pmdev = set_nmi_pm_callback(oprofile_pm_callback);
return 0;
}
@@ -155,7 +177,7 @@
static void nmi_shutdown(void)
{
unset_nmi_pm_callback(oprofile_pmdev);
- unset_nmi_callback();
+ release_nmi(&nmi_handler);
smp_call_function(nmi_cpu_shutdown, NULL, 0, 1);
nmi_cpu_shutdown(0);
}
--- linux.orig/include/asm-i386/apic.h Mon Oct 21 13:26:04 2002
+++ linux/include/asm-i386/apic.h Tue Oct 22 12:40:16 2002
@@ -79,7 +79,6 @@
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
--- linux.orig/include/asm-i386/nmi.h Mon Oct 21 13:25:52 2002
+++ linux/include/asm-i386/nmi.h Thu Oct 24 20:50:22 2002
@@ -5,26 +5,11 @@
#define ASM_NMI_H

#include <linux/pm.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>

struct pt_regs;

-typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
-
-/**
- * set_nmi_callback
- *
- * Set a handler for an NMI. Only one handler may be
- * set. Return 1 if the NMI was handled.
- */
-void set_nmi_callback(nmi_callback_t callback);
-
-/**
- * unset_nmi_callback
- *
- * Remove the handler previously set.
- */
-void unset_nmi_callback(void);
-
#ifdef CONFIG_PM

/** Replace the PM callback routine for NMI. */
@@ -45,5 +30,34 @@
}

#endif /* CONFIG_PM */
+
+
+/**
+ * Register a handler to get called when an NMI occurs. If the
+ * handler actually handles the NMI, it should return NOTIFY_OK. If
+ * it did not handle the NMI, it should return NOTIFY_DONE. It may "or"
+ * on NOTIFY_STOP_MASK to the return value if it does not want other
+ * handlers after it to be notified.
+ */
+#define HAVE_NMI_HANDLER 1
+struct nmi_handler
+{
+ struct list_head link; /* You must init this before use. */
+
+ char *dev_name;
+ void *dev_id;
+ int (*handler)(void *dev_id, struct pt_regs *regs, int cpu, int handled);
+ int priority; /* Handlers called in priority order. */
+
+ /* Don't mess with anything below here. */
+
+ struct rcu_head rcu;
+ struct completion complete;
+};
+
+int request_nmi(struct nmi_handler *handler);
+
+/* Release will block until the handler is completely free. */
+void release_nmi(struct nmi_handler *handler);

#endif /* ASM_NMI_H */
--- linux.orig/include/linux/nmi.h Thu Jun 20 17:53:40 2002
+++ linux/include/linux/nmi.h Thu Oct 24 16:28:53 2002
@@ -1,22 +1,11 @@
/*
- * linux/include/linux/nmi.h
+ * linux/include/linux/nmi.h
+ *
+ * (C) 2002 Corey Minyard <cminyard@mvista.com>
+ *
+ * Include file for NMI handling.
*/
-#ifndef LINUX_NMI_H
-#define LINUX_NMI_H
-
-#include <asm/irq.h>
-
-/**
- * touch_nmi_watchdog - restart NMI watchdog timeout.
- *
- * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
- * may be used to reset the timeout - for code which intentionally
- * disables interrupts for a long time. This call is stateless.
- */
-#ifdef ARCH_HAS_NMI_WATCHDOG
-extern void touch_nmi_watchdog(void);
-#else
-# define touch_nmi_watchdog() do { } while(0)
-#endif

+#if defined(__i386__)
+#include <asm/nmi.h>
#endif
--- linux.orig/include/linux/nmi_watchdog.h Thu Oct 24 19:56:54 2002
+++ linux/include/linux/nmi_watchdog.h Thu Oct 24 12:50:30 2002
@@ -0,0 +1,22 @@
+/*
+ * linux/include/linux/nmi.h
+ */
+#ifndef LINUX_NMI_WATCHDOG_H
+#define LINUX_NMI_WATCHDOG_H
+
+#include <asm/irq.h>
+
+/**
+ * touch_nmi_watchdog - restart NMI watchdog timeout.
+ *
+ * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
+ * may be used to reset the timeout - for code which intentionally
+ * disables interrupts for a long time. This call is stateless.
+ */
+#ifdef ARCH_HAS_NMI_WATCHDOG
+extern void touch_nmi_watchdog(void);
+#else
+# define touch_nmi_watchdog() do { } while(0)
+#endif
+
+#endif
--- linux.orig/kernel/sched.c Thu Nov 14 21:08:50 2002
+++ linux/kernel/sched.c Thu Nov 14 21:13:12 2002
@@ -17,7 +17,7 @@
*/

#include <linux/mm.h>
-#include <linux/nmi.h>
+#include <linux/nmi_watchdog.h>
#include <linux/init.h>
#include <asm/uaccess.h>
#include <linux/highmem.h>

--------------030703000907050701050805--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/