[PATCH] 2.5: (better) syscalls for setting task affinity

Robert Love (rml@tech9.net)
27 Feb 2002 18:45:21 -0500


[ This post adds length and pointer checks to the syscalls such that we
can portably change the size of cpus_allowed. ]

The attached patch implements a syscall interface for setting and
retrieving a task's CPU affinity (task->cpus_allowed):

int sched_set_affinity(pid_t pid, unsigned int len,
unsigned long *new_mask_ptr);

int sched_get_affinity(pid_t pid, unsigned int *user_len_ptr,
unsigned long *user_mask_ptr)

sched_set_affinity uses the set_cpus_allowed function in Ingo's new
scheduler to do all the hard work. Additionally, this patch is based
off Ingo's previous syscall affinity patch and my proc-based affinity
patch. Much credit to Ingo for this past work and his current
scheduler.

Security is enforced: calling user must match task's uid or euid or
possess CAP_SYS_NICE.

We can safely change the size of cpus_allowed. One can use:

sched_get_affinity(0, &len, NULL);

to get the size of cpus_allowed. The use of this interface, thus,
should be portable as we scale cpus_allowed over sizeof(long).

I think exporting cpus_allowed as a bit mask is the proper user
interface for CPU affinity. Certainly more complicated solutions exist,
but this should get the job done. I have also updated my proc-based
solution although Ingo and others have flamed^Wconvinced me a
syscall-based interface is ideal. I agree, mostly on the basis that
/proc may not be mounted but I have posted an updated proc-based
solution anyhow for proper discussion.

Patch is against 2.5.6-pre1. Enjoy,

Robert Love

diff -urN linus/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- linus/arch/i386/kernel/entry.S Tue Feb 26 20:17:01 2002
+++ linux/arch/i386/kernel/entry.S Tue Feb 26 20:44:06 2002
@@ -716,6 +716,8 @@
.long SYMBOL_NAME(sys_lremovexattr)
.long SYMBOL_NAME(sys_fremovexattr)
.long SYMBOL_NAME(sys_tkill)
+ .long SYMBOL_NAME(sys_sched_set_affinity)
+ .long SYMBOL_NAME(sys_sched_get_affinity) /* 240 */

.rept NR_syscalls-(.-sys_call_table)/4
.long SYMBOL_NAME(sys_ni_syscall)
diff -urN linus/include/asm-i386/unistd.h linux/include/asm-i386/unistd.h
--- linus/include/asm-i386/unistd.h Tue Feb 26 20:17:14 2002
+++ linux/include/asm-i386/unistd.h Tue Feb 26 23:54:21 2002
@@ -243,6 +243,8 @@
#define __NR_lremovexattr 236
#define __NR_fremovexattr 237
#define __NR_tkill 238
+#define __NR_sched_set_affinity 239
+#define __NR_sched_get_affinity 240

/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */

diff -urN linus/kernel/sched.c linux/kernel/sched.c
--- linus/kernel/sched.c Tue Feb 26 20:17:37 2002
+++ linux/kernel/sched.c Tue Feb 26 23:52:38 2002
@@ -1215,6 +1215,99 @@
return retval;
}

+/**
+ * sys_sched_set_affinity - set the cpu affinity of a process
+ * @pid: pid of the process
+ * @len: length of new_mask
+ * @new_mask: user-space pointer to the new cpu mask
+ */
+asmlinkage int sys_sched_set_affinity(pid_t pid, unsigned int len,
+ unsigned long *new_mask_ptr)
+{
+ unsigned long new_mask;
+ task_t *p;
+ int retval;
+
+ if (len < sizeof(new_mask))
+ return -EINVAL;
+
+ if (!new_mask_ptr)
+ return -EINVAL;
+
+ if (copy_from_user(&new_mask, new_mask_ptr, sizeof(new_mask)))
+ return -EFAULT;
+
+ new_mask &= cpu_online_map;
+ if (!new_mask)
+ return -EINVAL;
+
+ read_lock(&tasklist_lock);
+
+ retval = -ESRCH;
+ p = find_process_by_pid(pid);
+ if (!p)
+ goto out_unlock;
+
+ retval = -EPERM;
+ if ((current->euid != p->euid) && (current->euid != p->uid) &&
+ !capable(CAP_SYS_NICE))
+ goto out_unlock;
+
+ retval = 0;
+#ifdef CONFIG_SMP
+ set_cpus_allowed(p, new_mask);
+#endif
+
+out_unlock:
+ read_unlock(&tasklist_lock);
+out:
+ return retval;
+}
+
+/**
+ * sys_sched_get_affinity - get the cpu affinity of a process
+ * @pid: pid of the process
+ * @user_len_ptr: userspace pointer to the length of the mask
+ * @user_mask_ptr: userspace pointer to the mask
+ */
+asmlinkage int sys_sched_get_affinity(pid_t pid, unsigned int *user_len_ptr,
+ unsigned long *user_mask_ptr)
+{
+ unsigned long mask;
+ unsigned int len, user_len;
+ task_t *p;
+ int retval;
+
+ len = sizeof(mask);
+
+ if (copy_from_user(&user_len, user_len_ptr, sizeof(user_len)))
+ return -EFAULT;
+
+ if (copy_to_user(user_len_ptr, &len, sizeof(len)))
+ return -EFAULT;
+
+ if (user_len < len)
+ return -EINVAL;
+
+ read_lock(&tasklist_lock);
+
+ retval = -ESRCH;
+ p = find_process_by_pid(pid);
+ if (!p)
+ goto out_unlock;
+
+ retval = 0;
+ mask = p->cpus_allowed & cpu_online_map;
+
+out_unlock:
+ read_unlock(&tasklist_lock);
+ if (retval)
+ return retval;
+ if (copy_to_user(user_mask_ptr, &mask, sizeof(mask)))
+ return -EFAULT;
+ return 0;
+}
+
asmlinkage long sys_sched_yield(void)
{
runqueue_t *rq;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/