[PATCH 2.5.61] jfs breakage in 2.5.60

Dave Kleikamp (shaggy@austin.ibm.com)
Mon, 17 Feb 2003 13:34:34 -0600


This patch fixes a trap in JFS when under a heavy load.

It's been submitted to Linus as a bk pull.

Thanks to Dave Jones for reporting the problem.

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1059 -> 1.1060
# fs/jfs/jfs_logmgr.h 1.10 -> 1.11
# fs/jfs/jfs_logmgr.c 1.44 -> 1.45
# fs/jfs/jfs_umount.c 1.9 -> 1.10
# fs/jfs/jfs_txnmgr.c 1.38 -> 1.39
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/02/17 shaggy@shaggy.austin.ibm.com 1.1060
# JFS: Fix jfs_sync_fs
#
# jfs_sync_fs was implemented using the same code as the unmount code to flush
# the journal and wait for the journal to quiesce. Since jfs_sync_fs may be
# called while the volume is under heavy use, we can end up waiting
# indefinately. Code in jfs_flush_journal meant to detect a hang at unmount
# time may be triggered in this case causing a trap. This patch changes
# jfs_sync_fs to only wait until the most recent transaction has been
# committed to disk, rather than waiting until the commit queue is empty.
# --------------------------------------------
#
diff -Nru a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
--- a/fs/jfs/jfs_logmgr.c Mon Feb 17 13:26:16 2003
+++ b/fs/jfs/jfs_logmgr.c Mon Feb 17 13:26:16 2003
@@ -97,7 +97,7 @@
#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
-#define LOGGC_WAKEUP(tblk) wake_up(&(tblk)->gcwait)
+#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)

/*
* log sync serialization (per log)
@@ -511,7 +511,6 @@
tblk->bp = log->bp;
tblk->pn = log->page;
tblk->eor = log->eor;
- init_waitqueue_head(&tblk->gcwait);

/* enqueue transaction to commit queue */
tblk->cqnext = NULL;
@@ -831,6 +830,12 @@
tblk->flag &= ~tblkGC_QUEUE;
tblk->cqnext = 0;

+ if (tblk == log->flush_tblk) {
+ /* we can stop flushing the log now */
+ clear_bit(log_FLUSH, &log->flag);
+ log->flush_tblk = NULL;
+ }
+
jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
tblk->flag);

@@ -843,10 +848,10 @@
/* state transition: COMMIT -> COMMITTED */
tblk->flag |= tblkGC_COMMITTED;

- if (tblk->flag & tblkGC_READY) {
+ if (tblk->flag & tblkGC_READY)
log->gcrtc--;
- LOGGC_WAKEUP(tblk);
- }
+
+ LOGGC_WAKEUP(tblk);
}

/* was page full before pageout ?
@@ -892,6 +897,7 @@
else {
log->cflag &= ~logGC_PAGEOUT;
clear_bit(log_FLUSH, &log->flag);
+ WARN_ON(log->flush_tblk);
}

//LOGGC_UNLOCK(log);
@@ -1307,7 +1313,8 @@

INIT_LIST_HEAD(&log->synclist);

- log->cqueue.head = log->cqueue.tail = 0;
+ log->cqueue.head = log->cqueue.tail = NULL;
+ log->flush_tblk = NULL;

log->count = 0;

@@ -1395,38 +1402,78 @@
*
* FUNCTION: initiate write of any outstanding transactions to the journal
* and optionally wait until they are all written to disk
+ *
+ * wait == 0 flush until latest txn is committed, don't wait
+ * wait == 1 flush until latest txn is committed, wait
+ * wait > 1 flush until all txn's are complete, wait
*/
void jfs_flush_journal(struct jfs_log *log, int wait)
{
int i;
+ struct tblock *target;

jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);

- /*
- * This ensures that we will keep writing to the journal as long
- * as there are unwritten commit records
- */
- set_bit(log_FLUSH, &log->flag);
-
- /*
- * Initiate I/O on outstanding transactions
- */
LOGGC_LOCK(log);
- if (log->cqueue.head && !(log->cflag & logGC_PAGEOUT)) {
- log->cflag |= logGC_PAGEOUT;
- lmGCwrite(log, 0);
+
+ target = log->cqueue.head;
+
+ if (target) {
+ /*
+ * This ensures that we will keep writing to the journal as long
+ * as there are unwritten commit records
+ */
+
+ if (test_bit(log_FLUSH, &log->flag)) {
+ /*
+ * We're already flushing.
+ * if flush_tblk is NULL, we are flushing everything,
+ * so leave it that way. Otherwise, update it to the
+ * latest transaction
+ */
+ if (log->flush_tblk)
+ log->flush_tblk = target;
+ } else {
+ /* Only flush until latest transaction is committed */
+ log->flush_tblk = target;
+ set_bit(log_FLUSH, &log->flag);
+
+ /*
+ * Initiate I/O on outstanding transactions
+ */
+ if (!(log->cflag & logGC_PAGEOUT)) {
+ log->cflag |= logGC_PAGEOUT;
+ lmGCwrite(log, 0);
+ }
+ }
+ }
+ if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
+ /* Flush until all activity complete */
+ set_bit(log_FLUSH, &log->flag);
+ log->flush_tblk = NULL;
+ }
+
+ if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
+ DECLARE_WAITQUEUE(__wait, current);
+
+ add_wait_queue(&target->gcwait, &__wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ LOGGC_UNLOCK(log);
+ schedule();
+ current->state = TASK_RUNNING;
+ LOGGC_LOCK(log);
+ remove_wait_queue(&target->gcwait, &__wait);
}
LOGGC_UNLOCK(log);

- if (!wait)
+ if (wait < 2)
return;

+ /*
+ * If there was recent activity, we may need to wait
+ * for the lazycommit thread to catch up
+ */
if (log->cqueue.head || !list_empty(&log->synclist)) {
- /*
- * If there was very recent activity, we may need to wait
- * for the lazycommit thread to catch up
- */
-
for (i = 0; i < 800; i++) { /* Too much? */
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ / 4);
@@ -1437,7 +1484,6 @@
}
assert(log->cqueue.head == NULL);
assert(list_empty(&log->synclist));
-
clear_bit(log_FLUSH, &log->flag);
}

@@ -1467,7 +1513,7 @@

jfs_info("lmLogShutdown: log:0x%p", log);

- jfs_flush_journal(log, 1);
+ jfs_flush_journal(log, 2);

/*
* We need to make sure all of the "written" metapages
diff -Nru a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
--- a/fs/jfs/jfs_logmgr.h Mon Feb 17 13:26:16 2003
+++ b/fs/jfs/jfs_logmgr.h Mon Feb 17 13:26:16 2003
@@ -403,6 +403,7 @@
struct tblock *head;
struct tblock *tail;
} cqueue;
+ struct tblock *flush_tblk; /* tblk we're waiting on for flush */
int gcrtc; /* 4: GC_READY transaction count */
struct tblock *gclrt; /* 4: latest GC_READY transaction */
spinlock_t gclock; /* 4: group commit lock */
diff -Nru a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
--- a/fs/jfs/jfs_txnmgr.c Mon Feb 17 13:26:16 2003
+++ b/fs/jfs/jfs_txnmgr.c Mon Feb 17 13:26:16 2003
@@ -2741,8 +2741,7 @@
if (tblk->flag & tblkGC_READY)
log->gcrtc--;

- if (tblk->flag & tblkGC_READY)
- wake_up(&tblk->gcwait); // LOGGC_WAKEUP
+ wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP

/*
* Can't release log->gclock until we've tested tblk->flag
diff -Nru a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
--- a/fs/jfs/jfs_umount.c Mon Feb 17 13:26:16 2003
+++ b/fs/jfs/jfs_umount.c Mon Feb 17 13:26:16 2003
@@ -69,7 +69,7 @@
/*
* Wait for outstanding transactions to be written to log:
*/
- jfs_flush_journal(log, 1);
+ jfs_flush_journal(log, 2);

/*
* close fileset inode allocation map (aka fileset inode)
@@ -149,7 +149,7 @@
*
* remove file system from log active file system list.
*/
- jfs_flush_journal(log, 1);
+ jfs_flush_journal(log, 2);

/*
* Make sure all metadata makes it to disk
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/