Re: 2.5.31: modules don't work at all

Andrew Morton (akpm@zip.com.au)
Mon, 12 Aug 2002 16:34:23 -0700


Linus Torvalds wrote:
>
> On Mon, 12 Aug 2002, Andrew Morton wrote:
> >
> > Gets tricky with nested lock_kernels.
>
> No, lock-kernel already only increments once, at the first lock_kernel. We
> have a totally separate counter for the BKL depth, see <asm/smplock.h>
>

There are eighteen smplock.h's, all different. At least one (SuperH)
hasn't been converted to preempt. I'd rather like to decouple the kmap
optimisation work from that mini-quagmire. It's very easy to do, with

current->flags |= PF_ATOMIC;
__copy_to_user(...);
current->flags &= ~PF_ATOMIC;

and that all works fine.

However, soldiering on leads us to some difficulties. You're proposing,
effectively, that preempt_count gets shifted left one bit and that bit
zero becomes "has done lock_kernel()".

So bits 0-31 of preempt_count mean "may not be preempted" and "should
not preempt self". And bits 1-31 of preempt count mean "must not
explicitly call schedule".

Problem is, the semantics of this vary too much between preemptible
and non-preemptible kernels. Because non-preemptible kernels do
not increment preempt_count in spin_lock().

Maybe my penny hasn't dropped yet, but I tend to feel that the
semantics of my "may_schedule()" below are too fragile for it to
be part of the API.

(Untested, uncompiled code)

arch/i386/mm/fault.c | 2 -
include/asm-i386/smplock.h | 20 +++++++++++++-----
include/linux/preempt.h | 49 ++++++++++++++++++++++++++++++++-------------
3 files changed, 51 insertions, 20 deletions

--- 2.5.31/arch/i386/mm/fault.c~fix-faults Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/arch/i386/mm/fault.c Mon Aug 12 16:14:21 2002
@@ -192,7 +192,7 @@ asmlinkage void do_page_fault(struct pt_
* If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault..
*/
- if (preempt_count() || !mm)
+ if (!may_schedule() || !mm)
goto no_context;

#ifdef CONFIG_X86_REMOTE_DEBUG
--- 2.5.31/include/linux/preempt.h~fix-faults Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/include/linux/preempt.h Mon Aug 12 16:14:21 2002
@@ -5,29 +5,60 @@

#define preempt_count() (current_thread_info()->preempt_count)

+/*
+ * Bit zero of preempt_count means "holds lock_kernel".
+ * So a non-zero value in preempt_count() means "may not be preempted" and a
+ * non-zero value in bits 1-31 means "may not explicitly schedule".
+ */
+
#define inc_preempt_count() \
do { \
- preempt_count()++; \
+ preempt_count() += 2; \
} while (0)

#define dec_preempt_count() \
do { \
- preempt_count()--; \
+ preempt_count() -= 2; \
+} while (0)
+
+#define lock_kernel_enter() \
+do { \
+ preempt_count() |= 1; \
+} while (0)
+
+#define lock_kernel_exit() \
+do { \
+ preempt_count() &= ~1; \
} while (0)

+/*
+ * The semantics of this depend upon CONFIG_PREEMPT.
+ *
+ * With CONFIG_PREEMPT=y, may_schedule() returns false in irq context and
+ * inside spinlocks, and returns true inside lock_kernel().
+ *
+ * With CONFIG_PREEMPT=n, may_schedule() returns false in irq context, returns
+ * true inside spinlocks and returns true inside lock_kernel().
+ *
+ * But may_schedule() will also return false if the task has performed an
+ * explicit inc_preempt_count(), regardless of CONFIG_PREEMPT. Which is really
+ * the only situation in which may_schedule() is useful.
+ */
+#define may_schedule() (!(preempt_count() >> 1))
+
#ifdef CONFIG_PREEMPT

extern void preempt_schedule(void);

#define preempt_disable() \
do { \
- inc_preempt_count(); \
+ preempt_count() += 2; \
barrier(); \
} while (0)

#define preempt_enable_no_resched() \
do { \
- dec_preempt_count(); \
+ preempt_count() -= 2; \
barrier(); \
} while (0)

@@ -44,22 +75,12 @@ do { \
preempt_schedule(); \
} while (0)

-#define inc_preempt_count_non_preempt() do { } while (0)
-#define dec_preempt_count_non_preempt() do { } while (0)
-
#else

#define preempt_disable() do { } while (0)
#define preempt_enable_no_resched() do {} while(0)
#define preempt_enable() do { } while (0)
#define preempt_check_resched() do { } while (0)
-
-/*
- * Sometimes we want to increment the preempt count, but we know that it's
- * already incremented if the kernel is compiled for preemptibility.
- */
-#define inc_preempt_count_non_preempt() inc_preempt_count()
-#define dec_preempt_count_non_preempt() dec_preempt_count()

#endif

--- 2.5.31/include/asm-i386/smplock.h~fix-faults Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/include/asm-i386/smplock.h Mon Aug 12 16:15:20 2002
@@ -25,8 +25,10 @@ extern spinlock_t kernel_flag;
*/
#define release_kernel_lock(task) \
do { \
- if (unlikely(task->lock_depth >= 0)) \
+ if (unlikely(task->lock_depth >= 0)) { \
spin_unlock(&kernel_flag); \
+ lock_kernel_exit(); \
+ } \
} while (0)

/*
@@ -34,8 +36,10 @@ do { \
*/
#define reacquire_kernel_lock(task) \
do { \
- if (unlikely(task->lock_depth >= 0)) \
+ if (unlikely(task->lock_depth >= 0)) { \
+ lock_kernel_enter(); \
spin_lock(&kernel_flag); \
+ } \
} while (0)


@@ -49,13 +53,17 @@ do { \
static __inline__ void lock_kernel(void)
{
#ifdef CONFIG_PREEMPT
- if (current->lock_depth == -1)
+ if (current->lock_depth == -1) {
+ lock_kernel_enter();
spin_lock(&kernel_flag);
+ }
++current->lock_depth;
#else
#if 1
- if (!++current->lock_depth)
+ if (!++current->lock_depth) {
+ lock_kernel_enter();
spin_lock(&kernel_flag);
+ }
#else
__asm__ __volatile__(
"incl %1\n\t"
@@ -73,8 +81,10 @@ static __inline__ void unlock_kernel(voi
if (current->lock_depth < 0)
BUG();
#if 1
- if (--current->lock_depth < 0)
+ if (--current->lock_depth < 0) {
spin_unlock(&kernel_flag);
+ lock_kernel_exit();
+ }
#else
__asm__ __volatile__(
"decl %1\n\t"

.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/