--=_courier-31798-1053012366-0001-2
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
On Tue, May 13, 2003 at 05:19:23PM +0200, Trond Myklebust wrote:
>
> Could you please try with a newer kernel. The close-to-open cache
> consistency fixes are a relatively recent addition to the Linux NFS
> client. I dunno if RedHat's 2.4.18 kernel has them.
>
> 2.4.7 certainly does not.
I tried again with the 2.4.20 based kernel that Red Hat released
yesterday (2.4.20-13.7bigmem). The problem that I was seeing occurs
less frequently there, but it still happens.
I have attached a program which can reproduce this. If you run it
under 2.4.7 it fails instantly. If you use 2.4.20 it may take a
minute or so but it will also fail.
Thanks,
Jim
PS: Do you know if there is any way to work around this problem from
within my program?
--
----------------------------------------------------------------------------
Jim Nance Synopsys
(919) 425-7219 Do you have sweet iced tea? jlnance at synopsys.com
No, but there's sugar on the table.
--=_courier-31798-1053012366-0001-2
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="p1.c"
/* This program demonstrates a problem with the close/open consistency
* of NFS file systems under Linux. It fails very rapidy with Red Hats
* 2.4.7-10smp kernel. This kernel was known to have bugs. It also fails
* with Red Hats 2.4.20-13.7bigmem kernel, which was thought to have this
* bug fixed. For my testcase both linux machines were talking to a
* network applicance file server and mounted like this:
*
* na1-rtp:/vol/vol0/home/jlnance /home/jlnance nfs rw,v3,rsize=4096,\
* wsize=4096,hard,intr,udp,lock,addr=na1-rtp 0 0
*
* This program needs to be run on 2 machines, assume hostnames A & B.
* A and B need to share an NFS mounted file system.
*
* On machine A:
* cd /some/nfs/path/common/to/both
* ./p1 s
*
* On machine B:
* cd /some/nfs/path/common/to/both
* ./p1 c A
*
* After a while you may see output similar to:
* cayman> ./p1 s
* Failed to find #0 which client wrote
* Failed on file number 483
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <netdb.h>
#define PORT 12387
#define FLEN 16
void die()
{
perror("");
exit(-1);
}
void Write(int fd, char *buff, size_t len)
{
for(;;) {
int nsent=write(fd, buff, len);
if(nsent==0)
exit(0);
if(nsent==-1) {
if(errno!=EINTR)
die();
} else {
buff += nsent;
len -= nsent;
if(len==0) {
return;
}
}
}
}
void Read(int fd, char *buff, size_t len)
{
for(;;) {
int nread=read(fd, buff, len);
if(nread==0)
exit(0);
if(nread==-1) {
if(errno!=EINTR)
die();
} else {
buff += nread;
len -= nread;
if(len==0) {
return;
}
}
}
}
int server()
{
int sock = socket(AF_INET, SOCK_STREAM, 0);
if(sock==-1) die(); else {
struct sockaddr_in name;
int on = 1;
name.sin_family = AF_INET;
name.sin_addr.s_addr = htonl(INADDR_ANY);
name.sin_port = htons(PORT);
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof on);
if(bind(sock, (struct sockaddr*)&name, sizeof(name))==-1) die(); else {
if(listen(sock, 1)==-1) die(); else {
int tsock = accept(sock, 0, 0);
if(tsock!=-1) {
int cnt;
for(cnt=0; cnt<100000; cnt++) {
int fd;
char dummy;
char number[FLEN];
struct stat sbuf;
/*sprintf(number, "#%d", cnt);*/
sprintf(number, "#%d", 0);
Write(tsock, number, sizeof(number));
Read(tsock, &dummy, 1);
if(stat(number, &sbuf)) {
fprintf(stderr, "Failed to find %s which client wrote\n", number);
fprintf(stderr, "Failed on file number %d\n", cnt);
exit(-2);
}
unlink(number);
}
}
}
}
}
return 0;
}
int client(char *server)
{
struct hostent *info = gethostbyname(server);
if(!info) die(); else {
int rsocket = socket(AF_INET, SOCK_STREAM, 0);
if(rsocket==-1) die(); else {
struct sockaddr_in name;
name.sin_family = AF_INET;
name.sin_port = htons(PORT);
memcpy(&name.sin_addr, info->h_addr_list[0], sizeof(struct in_addr));
if(connect(rsocket, (struct sockaddr*)&name, sizeof(name))==-1)
die();
else {
for(;;) {
int fd;
char fname[FLEN];
char tname[FLEN+8];
Read(rsocket, fname, sizeof(fname));
strcpy(tname, fname);
strcat(tname, ".tmp");
fd = open(tname, O_WRONLY|O_CREAT, 0600);
if(fd==-1) die();
Write(fd, fname, sizeof(fname)); /* Junk data */
close(fd);
rename(tname, fname);
Write(rsocket, fname, 1); /* Tells the server we are done */
}
}
}
}
return 0;
}
void usage(char *prog)
{
fprintf(stderr, "Usage:\n");
fprintf(stderr, " %s s\n", prog);
fprintf(stderr, " %s c servername\n", prog);
fprintf(stderr, " Run 1 of each in the same NFS directory on 2 different "
"machines\n Two processes total\n");
exit(-1);
}
int main(int ac, char **av)
{
if(ac<2) {
usage(av[0]);
} if(av[1][0]=='s') {
return server();
}else if(ac<3) {
usage(av[0]);
} else if(av[1][0]=='c') {
return client(av[2]);
} else {
usage(av[0]);
}
return -1;
}
--=_courier-31798-1053012366-0001-2--