Re: NFS client errors with 2.5.74?

Trond Myklebust (trond.myklebust@fys.uio.no)
Thu, 10 Jul 2003 12:15:24 +0200


>>>>> " " == Jamie Lokier <jamie@shareable.org> writes:
> - Every so often, the client's kernel log gets:
> kernel: nfs: server 192.168.1.1 not responding, timed out

Sigh... I hate soft mounts... Have I said that before? 8-)

> Any idea about this? Is it a known problem?

I can never guarantee you perfect service with soft mounts (a 5 second
network patition/server congestion is all it takes) but I do have a
patch that just went into 2.4.22 that backs out some of the
Van Jacobson exponential backoff changes. This helps stabilize things
a lot.

I haven't yet had time to port that patch to 2.5.x, but the code
should be pretty much identical, so if you want to give it a go, then
here it is...

Cheers,
Trond

diff -u --recursive --new-file linux-2.4.21-20-rdplus/net/sunrpc/clnt.c linux-2.4.21-21-soft/net/sunrpc/clnt.c
--- linux-2.4.21-20-rdplus/net/sunrpc/clnt.c 2003-06-15 14:49:35.000000000 -0700
+++ linux-2.4.21-21-soft/net/sunrpc/clnt.c 2003-06-17 10:38:56.000000000 -0700
@@ -709,14 +709,14 @@

dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
if (clnt->cl_softrtry) {
- if (clnt->cl_chatty && !task->tk_exit)
+ if (clnt->cl_chatty)
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname, clnt->cl_server);
rpc_exit(task, -EIO);
return;
}

- if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN) && rpc_ntimeo(&clnt->cl_rtt) > 7) {
+ if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
clnt->cl_protname, clnt->cl_server);
diff -u --recursive --new-file linux-2.4.21-20-rdplus/net/sunrpc/xprt.c linux-2.4.21-21-soft/net/sunrpc/xprt.c
--- linux-2.4.21-20-rdplus/net/sunrpc/xprt.c 2003-06-15 15:14:14.000000000 -0700
+++ linux-2.4.21-21-soft/net/sunrpc/xprt.c 2003-06-17 09:48:18.000000000 -0700
@@ -1046,21 +1046,6 @@
}

/*
- * Exponential backoff for UDP retries
- */
-static inline int
-xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req)
-{
- int backoff;
-
- req->rq_ntimeo++;
- backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF);
- if (req->rq_ntimeo < (1 << backoff))
- return 1;
- return 0;
-}
-
-/*
* RPC receive timeout handler.
*/
static void
@@ -1073,14 +1058,7 @@
if (req->rq_received)
goto out;

- if (!xprt->nocong) {
- if (xprt_expbackoff(task, req)) {
- rpc_add_timer(task, xprt_timer);
- goto out_unlock;
- }
- rpc_inc_timeo(&task->tk_client->cl_rtt);
- xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
- }
+ xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
req->rq_nresend++;

dprintk("RPC: %4d xprt_timer (%s request)\n",
@@ -1090,7 +1068,6 @@
out:
task->tk_timeout = 0;
rpc_wake_up_task(task);
-out_unlock:
spin_unlock(&xprt->sock_lock);
}

@@ -1228,16 +1205,17 @@
return;
out_receive:
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
+ spin_lock_bh(&xprt->sock_lock);
/* Set the task's receive timeout value */
if (!xprt->nocong) {
task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
rpcproc_timer(clnt, task->tk_msg.rpc_proc));
- req->rq_ntimeo = 0;
+ task->tk_timeout <<= clnt->cl_timeout.to_retries
+ - req->rq_timeout.to_retries;
if (task->tk_timeout > req->rq_timeout.to_maxval)
task->tk_timeout = req->rq_timeout.to_maxval;
} else
task->tk_timeout = req->rq_timeout.to_current;
- spin_lock_bh(&xprt->sock_lock);
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/