That's indeed the case on a Celeron/Coppermine I just tested
with a trivial application.
 
> uh? you just need to fxsave on stack, extract the mask, the struct
> is 512 bytes length, surely during kernel init 512 bytes stack
> allocation is right
Beware of the alignment requirement, in the following patch I just
allocated 512 bytes of initdata for this. Not elegant but safe.
> 
> >not want to touch too many files in my patch, but it seems unavoidable. 
> 
> 
> >Now a last question, are there SMP systems in which one processor
> >supports DAZ and the other does not, just to complicate matters a
> >little more?
> 
> Such system are not symetric. I don't think we must take care
> about this theorical things and I'm pretty sure than mixing old
> P4 and newers in a box can't work. Anyway even if it works
> userspace program using DAZ will be not reliable since they can
> run from time to time on cpu with DAZ then cpu w/o DAZ.
Hmmm, the (misnamed) boot_cpu_data flags are the intersection of 
all the cpu flags. So I did the same for mxcsr_mask.
Anyway, here is the second version of the patch. Unfortunately I could
not test it, only compile, and I'm going to be away from any machine 
I can play with until mid-June or so. It took me some time to
understand early boot, especially how cr4 OSFXSR, first set up through
check_bugs(!), propagated to other processors through the global
(and misnamed) mmu_cr4_features, and I might have missed other things.
I repeat, handle with care, I'm not sure at all that it won't eat
your data and there are things that I don't like about it: mostly
that boot_cpu_data should be reordered so that mxcsr_mask is close
to the capabilities. There are #defines of the offsets in this 
structure in head.S, changing this is far too dangerous for a patch 
that I can't test. 
     Regards,
     Gabriel
===== arch/i386/kernel/i387.c 1.16 vs edited =====
--- 1.16/arch/i386/kernel/i387.c	Wed Apr  9 05:45:37 2003
+++ edited/arch/i386/kernel/i387.c	Sun May 11 20:55:32 2003
@@ -208,7 +208,8 @@
 void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr )
 {
 	if ( cpu_has_xmm ) {
-		tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf);
+		tsk->thread.i387.fxsave.mxcsr = 
+		    mxcsr & boot_cpu_data.mxcsr_mask;
 	}
 }
 
@@ -356,8 +357,7 @@
 	clear_fpu( tsk );
 	err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
 				sizeof(struct i387_fxsave_struct) );
-	/* mxcsr bit 6 and 31-16 must be zero for security reasons */
-	tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+	tsk->thread.i387.fxsave.mxcsr &= boot_cpu_data.mxcsr_mask;
 	return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
 }
 
@@ -455,8 +455,7 @@
 	if ( cpu_has_fxsr ) {
 		__copy_from_user( &tsk->thread.i387.fxsave, buf,
 				  sizeof(struct user_fxsr_struct) );
-		/* mxcsr bit 6 and 31-16 must be zero for security reasons */
-		tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+		tsk->thread.i387.fxsave.mxcsr &= boot_cpu_data.mxcsr_mask;
 		return 0;
 	} else {
 		return -EIO;
===== arch/i386/kernel/cpu/common.c 1.21 vs edited =====
--- 1.21/arch/i386/kernel/cpu/common.c	Sun Mar 23 15:55:48 2003
+++ edited/arch/i386/kernel/cpu/common.c	Sun May 11 22:43:44 2003
@@ -262,6 +262,7 @@
 __setup("serialnumber", x86_serial_nr_setup);
 
 
+static struct i387_fxsave_struct fxsave_area __initdata;
 
 /*
  * This does the hard work of actually picking apart the CPU stuff...
@@ -324,6 +325,24 @@
 		clear_bit(X86_FEATURE_XMM, c->x86_capability);
 	}
 
+	if (test_bit(X86_FEATURE_XMM, c->x86_capability)) {
+		/* Setting OSFXSR may not be necessary */
+		unsigned cr0, cr4;
+		long mask;
+
+		memset(&fxsave_area, 0, sizeof fxsave_area);
+		cr0 = read_cr0();
+		cr4 = read_cr4();
+		write_cr4(cr4|X86_CR4_OSFXSR);
+		clts();
+		__asm__ __volatile__("fxsave %0": "=m" (fxsave_area));
+		write_cr4(cr4);
+		write_cr0(cr0);
+		mask = fxsave_area.mxcsr_mask;
+		mask = mask ? mask : 0xffbf;
+		c->mxcsr_mask = mask;
+	}
+
 	if (disable_pse)
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
@@ -357,6 +376,7 @@
 		/* AND the already accumulated flags with these */
 		for ( i = 0 ; i < NCAPINTS ; i++ )
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+		boot_cpu_data.mxcsr_mask &= c->mxcsr_mask;
 	}
 
 	/* Init Machine Check Exception if available. */
===== include/asm-i386/processor.h 1.50 vs edited =====
--- 1.50/include/asm-i386/processor.h	Fri May  9 21:24:03 2003
+++ edited/include/asm-i386/processor.h	Sun May 11 20:53:22 2003
@@ -63,6 +63,7 @@
 	int	f00f_bug;
 	int	coma_bug;
 	unsigned long loops_per_jiffy;
+	long    mxcsr_mask;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL 0
@@ -320,7 +321,7 @@
 	long	foo;
 	long	fos;
 	long	mxcsr;
-	long	reserved;
+	long	mxcsr_mask;
 	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
 	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
 	long	padding[56];
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/