[patch] pidhash cleanups, tgid-2.5.38-F3

Ingo Molnar (mingo@elte.hu)
Mon, 23 Sep 2002 20:19:49 +0200 (CEST)


the attached patch, against BK-curr, does the following things:

- removes the ->thread_group list and uses a new PIDTYPE_TGID pid class
to handle thread groups. This cleans up lots of code in signal.c and
elsewhere.

- fixes sys_execve() if a non-leader thread calls it. (2.5.38 crashed in
this case.)

- renames list_for_each_noprefetch to __list_for_each.

- cleans up delayed-leader parent notification.

- introduces link_pid() to optimize PIDTYPE_TGID installation in the
thread-group case.

i've tested the patch with a number of threaded and non-threaded
workloads, and it works just fine. Compiles & boots on UP and SMP x86.

the session/pgrp bugs reported to lkml are probably still open, they are
the next on my todo - now that we have a clean pidhash architecture they
should be easier to fix.

Ingo

--- linux/fs/exec.c.orig Mon Sep 23 17:53:23 2002
+++ linux/fs/exec.c Mon Sep 23 19:22:07 2002
@@ -537,7 +537,7 @@
if (!newsig)
return -ENOMEM;

- if (list_empty(&current->thread_group))
+ if (thread_group_empty(current))
goto out;
/*
* Kill all other threads in the thread group:
@@ -607,21 +607,17 @@
ptrace = leader->ptrace;
parent = leader->parent;

- ptrace_unlink(leader);
ptrace_unlink(current);
+ ptrace_unlink(leader);
remove_parent(current);
remove_parent(leader);
- /*
- * Split up the last two remaining members of the
- * thread group:
- */
- list_del_init(&leader->thread_group);

- leader->pid = leader->tgid = current->pid;
- current->pid = current->tgid;
+ switch_exec_pids(leader, current);
+
current->parent = current->real_parent = leader->real_parent;
leader->parent = leader->real_parent = child_reaper;
- current->exit_signal = SIGCHLD;
+ current->group_leader = current;
+ leader->group_leader = leader;

add_parent(current, current->parent);
add_parent(leader, leader->parent);
@@ -631,15 +627,17 @@
}

list_add_tail(&current->tasks, &init_task.tasks);
+ current->exit_signal = SIGCHLD;
state = leader->state;
+
write_unlock_irq(&tasklist_lock);

+ put_proc_dentry(proc_dentry1);
+ put_proc_dentry(proc_dentry2);
+
if (state != TASK_ZOMBIE)
BUG();
release_task(leader);
-
- put_proc_dentry(proc_dentry1);
- put_proc_dentry(proc_dentry2);
}

out:
@@ -661,7 +659,7 @@
if (atomic_dec_and_test(&oldsig->count))
kmem_cache_free(sigact_cachep, oldsig);

- if (!list_empty(&current->thread_group))
+ if (!thread_group_empty(current))
BUG();
if (current->tgid != current->pid)
BUG();
--- linux/include/linux/pid.h.orig Fri Sep 20 17:20:20 2002
+++ linux/include/linux/pid.h Mon Sep 23 19:45:48 2002
@@ -4,6 +4,7 @@
enum pid_type
{
PIDTYPE_PID,
+ PIDTYPE_TGID,
PIDTYPE_PGID,
PIDTYPE_SID,
PIDTYPE_MAX
@@ -29,13 +30,12 @@
list_entry(elem, struct task_struct, pids[type].pid_chain)

/*
- * attach_pid() must be called with the tasklist_lock write-held.
- *
- * It might unlock the tasklist_lock for allocation, so this
- * function must be called after installing all other links of
- * a new task.
+ * attach_pid() and link_pid() must be called with the tasklist_lock
+ * write-held.
*/
-extern int FASTCALL(attach_pid(struct task_struct *, enum pid_type, int));
+extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr));
+
+extern void FASTCALL(link_pid(struct task_struct *task, struct pid_link *link, struct pid *pid));

/*
* detach_pid() must be called with the tasklist_lock write-held.
@@ -50,6 +50,7 @@

extern int alloc_pidmap(void);
extern void FASTCALL(free_pidmap(int));
+extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);

#define for_each_task_pid(who, type, task, elem, pid) \
if ((pid = find_pid(type, who))) \
--- linux/include/linux/sched.h.orig Mon Sep 23 17:03:39 2002
+++ linux/include/linux/sched.h Mon Sep 23 17:55:27 2002
@@ -337,7 +337,6 @@
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader;
- struct list_head thread_group;

/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
@@ -806,34 +805,19 @@
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)

-static inline task_t *next_thread(task_t *p)
-{
- if (!p->sig)
- BUG();
-#if CONFIG_SMP
- if (!spin_is_locked(&p->sig->siglock) &&
- !rwlock_is_locked(&tasklist_lock))
- BUG();
-#endif
- return list_entry((p)->thread_group.next, task_t, thread_group);
-}
+extern task_t * FASTCALL(next_thread(task_t *p));
+
+#define thread_group_leader(p) (p->pid == p->tgid)

-static inline task_t *prev_thread(task_t *p)
+static inline int thread_group_empty(task_t *p)
{
- if (!p->sig)
- BUG();
-#if CONFIG_SMP
- if (!spin_is_locked(&p->sig->siglock) &&
- !rwlock_is_locked(&tasklist_lock))
- BUG();
-#endif
- return list_entry((p)->thread_group.prev, task_t, thread_group);
-}
+ struct pid *pid = p->pids[PIDTYPE_TGID].pidptr;

-#define thread_group_leader(p) (p->pid == p->tgid)
+ return pid->task_list.next->next == &pid->task_list;
+}

#define delay_group_leader(p) \
- (p->tgid == p->pid && !list_empty(&p->thread_group))
+ (thread_group_leader(p) && !thread_group_empty(p))

extern void unhash_process(struct task_struct *p);

--- linux/include/linux/list.h.orig Fri Sep 20 17:20:14 2002
+++ linux/include/linux/list.h Mon Sep 23 17:03:52 2002
@@ -196,7 +196,17 @@
for (pos = (head)->next, prefetch(pos->next); pos != (head); \
pos = pos->next, prefetch(pos->next))

-#define list_for_each_noprefetch(pos, head) \
+/**
+ * __list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)

/**
--- linux/include/linux/init_task.h.orig Mon Sep 23 17:55:34 2002
+++ linux/include/linux/init_task.h Mon Sep 23 17:55:38 2002
@@ -76,7 +76,6 @@
.children = LIST_HEAD_INIT(tsk.children), \
.sibling = LIST_HEAD_INIT(tsk.sibling), \
.group_leader = &tsk, \
- .thread_group = LIST_HEAD_INIT(tsk.thread_group), \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
.real_timer = { \
.function = it_real_fn \
--- linux/kernel/fork.c.orig Fri Sep 20 17:20:16 2002
+++ linux/kernel/fork.c Mon Sep 23 19:45:18 2002
@@ -802,7 +802,6 @@
*/
p->tgid = p->pid;
p->group_leader = p;
- INIT_LIST_HEAD(&p->thread_group);
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);

@@ -830,7 +829,6 @@
}
p->tgid = current->tgid;
p->group_leader = current->group_leader;
- list_add(&p->thread_group, &current->thread_group);
spin_unlock(&current->sig->siglock);
}

@@ -840,9 +838,11 @@

attach_pid(p, PIDTYPE_PID, p->pid);
if (thread_group_leader(p)) {
+ attach_pid(p, PIDTYPE_TGID, p->tgid);
attach_pid(p, PIDTYPE_PGID, p->pgrp);
attach_pid(p, PIDTYPE_SID, p->session);
- }
+ } else
+ link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);

nr_threads++;
write_unlock_irq(&tasklist_lock);
--- linux/kernel/exit.c.orig Fri Sep 20 17:20:22 2002
+++ linux/kernel/exit.c Mon Sep 23 19:01:48 2002
@@ -34,13 +34,13 @@
struct dentry *proc_dentry;
nr_threads--;
detach_pid(p, PIDTYPE_PID);
+ detach_pid(p, PIDTYPE_TGID);
if (thread_group_leader(p)) {
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
}

REMOVE_LINKS(p);
- p->pid = 0;
proc_dentry = p->proc_dentry;
if (unlikely(proc_dentry != NULL)) {
spin_lock(&dcache_lock);
@@ -74,6 +74,15 @@
write_lock_irq(&tasklist_lock);
__exit_sighand(p);
proc_dentry = __unhash_process(p);
+
+ /*
+ * If we are the last non-leader member of the thread
+ * group, and the leader is zombie, then notify the
+ * group leader's parent process.
+ */
+ if (p->group_leader != p && thread_group_empty(p))
+ do_notify_parent(p->group_leader, p->group_leader->exit_signal);
+
p->parent->cutime += p->utime + p->cutime;
p->parent->cstime += p->stime + p->cstime;
p->parent->cmin_flt += p->min_flt + p->cmin_flt;
@@ -670,6 +679,25 @@
do_exit((error_code&0xff)<<8);
}

+task_t *next_thread(task_t *p)
+{
+ struct pid_link *link = p->pids + PIDTYPE_TGID;
+ struct list_head *tmp, *head = &link->pidptr->task_list;
+
+#if CONFIG_SMP
+ if (!p->sig)
+ BUG();
+ if (!spin_is_locked(&p->sig->siglock) &&
+ !rwlock_is_locked(&tasklist_lock))
+ BUG();
+#endif
+ tmp = link->pid_chain.next;
+ if (tmp == head)
+ tmp = head->next;
+
+ return pid_task(tmp, PIDTYPE_TGID);
+}
+
/*
* this kills every thread in the thread group. Note that any externally
* wait4()-ing process will get the correct exit code - even if this
@@ -679,7 +707,7 @@
{
unsigned int exit_code = (error_code & 0xff) << 8;

- if (!list_empty(&current->thread_group)) {
+ if (!thread_group_empty(current)) {
struct signal_struct *sig = current->sig;

spin_lock_irq(&sig->siglock);
--- linux/kernel/signal.c.orig Fri Sep 20 17:20:24 2002
+++ linux/kernel/signal.c Mon Sep 23 17:57:54 2002
@@ -254,7 +254,6 @@
{
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
- list_del_init(&tsk->thread_group);
}

void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig)
@@ -281,15 +280,13 @@
BUG();
spin_lock(&sig->siglock);
spin_lock(&tsk->sigmask_lock);
- tsk->sig = NULL;
if (atomic_dec_and_test(&sig->count)) {
__remove_thread_group(tsk, sig);
+ tsk->sig = NULL;
spin_unlock(&sig->siglock);
flush_sigqueue(&sig->shared_pending);
kmem_cache_free(sigact_cachep, sig);
} else {
- struct task_struct *leader = tsk->group_leader;
-
/*
* If there is any task waiting for the group exit
* then notify it:
@@ -298,24 +295,9 @@
wake_up_process(sig->group_exit_task);
sig->group_exit_task = NULL;
}
- /*
- * If we are the last non-leader member of the thread
- * group, and the leader is zombie, then notify the
- * group leader's parent process.
- *
- * (subtle: here we also rely on the fact that if we are the
- * thread group leader then we are not zombied yet.)
- */
- if (atomic_read(&sig->count) == 1 &&
- leader->state == TASK_ZOMBIE) {
-
- __remove_thread_group(tsk, sig);
- spin_unlock(&sig->siglock);
- do_notify_parent(leader, leader->exit_signal);
- } else {
- __remove_thread_group(tsk, sig);
- spin_unlock(&sig->siglock);
- }
+ __remove_thread_group(tsk, sig);
+ tsk->sig = NULL;
+ spin_unlock(&sig->siglock);
}
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
flush_sigqueue(&tsk->pending);
@@ -853,7 +835,7 @@
p->sig->curr_target = p;

else for (;;) {
- if (list_empty(&p->thread_group))
+ if (thread_group_empty(p))
BUG();
if (!tmp || tmp->tgid != p->tgid)
BUG();
@@ -882,17 +864,13 @@
int __broadcast_thread_group(struct task_struct *p, int sig)
{
struct task_struct *tmp;
- struct list_head *entry;
+ struct list_head *l;
+ struct pid *pid;
int err = 0;

- /* send a signal to the head of the list */
- err = __force_sig_info(sig, p);
-
- /* send a signal to all members of the list */
- list_for_each(entry, &p->thread_group) {
- tmp = list_entry(entry, task_t, thread_group);
+ for_each_task_pid(p->tgid, PIDTYPE_TGID, tmp, l, pid)
err = __force_sig_info(sig, tmp);
- }
+
return err;
}

@@ -909,7 +887,7 @@
spin_lock_irqsave(&p->sig->siglock, flags);

/* not a thread group - normal signal behavior */
- if (list_empty(&p->thread_group) || !sig)
+ if (thread_group_empty(p) || !sig)
goto out_send;

if (sig_user_defined(p, sig)) {
--- linux/kernel/pid.c.orig Fri Sep 20 17:20:30 2002
+++ linux/kernel/pid.c Mon Sep 23 20:12:58 2002
@@ -142,7 +142,7 @@
struct list_head *elem, *bucket = &pid_hash[type][pid_hashfn(nr)];
struct pid *pid;

- list_for_each_noprefetch(elem, bucket) {
+ __list_for_each(elem, bucket) {
pid = list_entry(elem, struct pid, hash_chain);
if (pid->nr == nr)
return pid;
@@ -150,6 +150,13 @@
return NULL;
}

+void link_pid(task_t *task, struct pid_link *link, struct pid *pid)
+{
+ atomic_inc(&pid->count);
+ list_add_tail(&link->pid_chain, &pid->task_list);
+ link->pidptr = pid;
+}
+
int attach_pid(task_t *task, enum pid_type type, int nr)
{
struct pid *pid = find_pid(type, nr);
@@ -165,13 +172,13 @@
get_task_struct(task);
list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
}
- list_add(&task->pids[type].pid_chain, &pid->task_list);
+ list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
task->pids[type].pidptr = pid;

return 0;
}

-void detach_pid(task_t *task, enum pid_type type)
+static inline int __detach_pid(task_t *task, enum pid_type type)
{
struct pid_link *link = task->pids + type;
struct pid *pid = link->pidptr;
@@ -179,25 +186,69 @@

list_del(&link->pid_chain);
if (!atomic_dec_and_test(&pid->count))
- return;
+ return 0;

nr = pid->nr;
list_del(&pid->hash_chain);
put_task_struct(pid->task);

+ return nr;
+}
+
+static void _detach_pid(task_t *task, enum pid_type type)
+{
+ __detach_pid(task, type);
+}
+
+void detach_pid(task_t *task, enum pid_type type)
+{
+ int nr = __detach_pid(task, type);
+
+ if (!nr)
+ return;
+
for (type = 0; type < PIDTYPE_MAX; ++type)
if (find_pid(type, nr))
return;
free_pidmap(nr);
}

-extern task_t *find_task_by_pid(int nr)
+task_t *find_task_by_pid(int nr)
{
struct pid *pid = find_pid(PIDTYPE_PID, nr);

if (!pid)
return NULL;
return pid_task(pid->task_list.next, PIDTYPE_PID);
+}
+
+/*
+ * This function switches the PIDs if a non-leader thread calls
+ * sys_execve() - this must be done without releasing the PID.
+ * (which a detach_pid() would eventually do.)
+ */
+void switch_exec_pids(task_t *leader, task_t *thread)
+{
+ _detach_pid(leader, PIDTYPE_PID);
+ _detach_pid(leader, PIDTYPE_TGID);
+ _detach_pid(leader, PIDTYPE_PGID);
+ _detach_pid(leader, PIDTYPE_SID);
+
+ _detach_pid(thread, PIDTYPE_PID);
+ _detach_pid(thread, PIDTYPE_TGID);
+
+ leader->pid = leader->tgid = thread->pid;
+ thread->pid = thread->tgid;
+
+ attach_pid(thread, PIDTYPE_PID, thread->pid);
+ attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+ attach_pid(thread, PIDTYPE_PGID, thread->pgrp);
+ attach_pid(thread, PIDTYPE_SID, thread->session);
+
+ attach_pid(leader, PIDTYPE_PID, leader->pid);
+ attach_pid(leader, PIDTYPE_TGID, leader->tgid);
+ attach_pid(leader, PIDTYPE_PGID, leader->pgrp);
+ attach_pid(leader, PIDTYPE_SID, leader->session);
}

void __init pidhash_init(void)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/