[PATCH] Futex Generalization Patch

Rusty Russell (rusty@rustcorp.com.au)
Thu, 04 Apr 2002 17:52:32 +1000


"This time for sure"

I have a new primitive (thanks Paul Mackerras): sleep if the word at
this address is equal to this value. It also has an optional timeout.
On top of this, I have made pthreads locking primitives and futexes
(marginally slower than the completely-in-kernel version, but not
much).

Userspace stuff (including nonpthreads.c code):
http://www.kernel.org/pub/linux/kernel/people/rusty/futex-2.0.tar.gz

Please complain now, or I'll send to Linus as is,
Rusty.
PS. Will be on plane in 24 hours, for 40 hours. Incommunicado.

--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.7/include/linux/futex.h working-2.5.7-futex/include/linux/futex.h --- linux-2.5.7/include/linux/futex.h Thu Mar 21 14:14:54 2002 +++ working-2.5.7-futex/include/linux/futex.h Tue Apr 2 10:56:40 2002 @@ -2,7 +2,7 @@ #define _LINUX_FUTEX_H /* Second argument to futex syscall */ -#define FUTEX_UP (0) -#define FUTEX_DOWN (1) +#define FUTEX_WAIT (0) +#define FUTEX_WAKE (1) #endif diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.7/kernel/futex.c working-2.5.7-futex/kernel/futex.c --- linux-2.5.7/kernel/futex.c Thu Mar 21 14:14:56 2002 +++ working-2.5.7-futex/kernel/futex.c Tue Apr 2 10:56:40 2002 @@ -32,7 +32,8 @@ #include <linux/fs.h> #include <linux/futex.h> #include <linux/highmem.h> -#include <asm/atomic.h> +#include <linux/time.h> +#include <asm/uaccess.h> /* These mutexes are a very simple counter: the winner is the one who decrements from 1 to 0. The counter starts at 1 when the lock is @@ -68,22 +69,27 @@ return &futex_queues[hash_long(h, FUTEX_HASHBITS)]; } -static inline void wake_one_waiter(struct list_head *head, - struct page *page, - unsigned int offset) +static int futex_wake(struct list_head *head, + struct page *page, + unsigned int offset, + int num) { - struct list_head *i; + struct list_head *i, *next; + int num_woken = 0; spin_lock(&futex_lock); - list_for_each(i, head) { + list_for_each_safe(i, next, head) { struct futex_q *this = list_entry(i, struct futex_q, list); if (this->page == page && this->offset == offset) { + list_del_init(i); wake_up_process(this->task); - break; + num_woken++; + if (num_woken >= num) break; } } spin_unlock(&futex_lock); + return num_woken; } /* Add at end to avoid starvation */ @@ -101,11 +107,17 @@ spin_unlock(&futex_lock); } -static inline void unqueue_me(struct futex_q *q) +/* Return 1 if we were still queued (ie. 0 means we were woken) */ +static inline int unqueue_me(struct futex_q *q) { + int ret = 0; spin_lock(&futex_lock); - list_del(&q->list); + if (!list_empty(&q->list)) { + list_del(&q->list); + ret = 1; + } spin_unlock(&futex_lock); + return ret; } /* Get kernel address of the user page and pin it. */ @@ -129,74 +141,62 @@ return page; } -/* Try to decrement the user count to zero. */ -static int decrement_to_zero(struct page *page, unsigned int offset) -{ - atomic_t *count; - int ret = 0; - - count = kmap(page) + offset; - /* If we take the semaphore from 1 to 0, it's ours. If it's - zero, decrement anyway, to indicate we are waiting. If - it's negative, don't decrement so we don't wrap... */ - if (atomic_read(count) >= 0 && atomic_dec_and_test(count)) - ret = 1; - kunmap(page); - return ret; -} - -/* Simplified from arch/ppc/kernel/semaphore.c: Paul M. is a genius. */ -static int futex_down(struct list_head *head, struct page *page, int offset) +static int futex_wait(struct list_head *head, + struct page *page, + int offset, + int val, + unsigned long time) { - int retval = 0; + int *count; struct futex_q q; + int ret = 0; - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); queue_me(head, &q, page, offset); - while (!decrement_to_zero(page, offset)) { - if (signal_pending(current)) { - retval = -EINTR; - break; - } - schedule(); - current->state = TASK_INTERRUPTIBLE; - } - current->state = TASK_RUNNING; - unqueue_me(&q); - /* If we were signalled, we might have just been woken: we - must wake another one. Otherwise we need to wake someone - else (if they are waiting) so they drop the count below 0, - and when we "up" in userspace, we know there is a - waiter. */ - wake_one_waiter(head, page, offset); - return retval; -} - -static int futex_up(struct list_head *head, struct page *page, int offset) -{ - atomic_t *count; - count = kmap(page) + offset; - atomic_set(count, 1); - smp_wmb(); + if (*count != val) { + ret = -EWOULDBLOCK; + set_current_state(TASK_RUNNING); + goto out; + } + time = schedule_timeout(time); + if (time == 0) { + ret = -ETIMEDOUT; + goto out; + } + if (signal_pending(current)) { + ret = -EINTR; + goto out; + } + out: kunmap(page); - wake_one_waiter(head, page, offset); - return 0; + /* Were we woken up anyway? */ + if (!unqueue_me(&q)) + return 0; + return ret; } -asmlinkage int sys_futex(void *uaddr, int op) +asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime) { int ret; unsigned long pos_in_page; struct list_head *head; struct page *page; + unsigned long time = MAX_SCHEDULE_TIMEOUT; + + if (utime) { + struct timespec t; + if (copy_from_user(&t, utime, sizeof(t)) != 0) + return -EFAULT; + time = timespec_to_jiffies(&t) + 1; + } pos_in_page = ((unsigned long)uaddr) % PAGE_SIZE; /* Must be "naturally" aligned, and not on page boundary. */ - if ((pos_in_page % __alignof__(atomic_t)) != 0 - || pos_in_page + sizeof(atomic_t) > PAGE_SIZE) + if ((pos_in_page % __alignof__(int)) != 0 + || pos_in_page + sizeof(int) > PAGE_SIZE) return -EINVAL; /* Simpler if it doesn't vanish underneath us. */ @@ -206,13 +206,12 @@ head = hash_futex(page, pos_in_page); switch (op) { - case FUTEX_UP: - ret = futex_up(head, page, pos_in_page); + case FUTEX_WAIT: + ret = futex_wait(head, page, pos_in_page, val, time); break; - case FUTEX_DOWN: - ret = futex_down(head, page, pos_in_page); + case FUTEX_WAKE: + ret = futex_wake(head, page, pos_in_page, val); break; - /* Add other lock types here... */ default: ret = -EINVAL; } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/