..and it is probably do_swap_page().
Despite the name, "lookup_swap_cache()" does more than a lookup - it will
wait for the page that it looked up. And we call it with the
page_table_lock held in do_swap_page().
Ho humm. Does the appended patch fix it for you? Looks obvious enough, but
this bug is actually hidden on true SMP, and I'm too lazy to test with
"num_cpus=1" or something..
		Linus
-----
diff -u --recursive --new-file pre6/linux/mm/memory.c linux/mm/memory.c
--- pre6/linux/mm/memory.c	Tue Mar 20 23:13:03 2001
+++ linux/mm/memory.c	Wed Mar 21 22:21:27 2001
@@ -1031,18 +1031,20 @@
 	struct vm_area_struct * vma, unsigned long address,
 	pte_t * page_table, swp_entry_t entry, int write_access)
 {
-	struct page *page = lookup_swap_cache(entry);
+	struct page *page;
 	pte_t pte;
+	spin_unlock(&mm->page_table_lock);
+	page = lookup_swap_cache(entry);
 	if (!page) {
-		spin_unlock(&mm->page_table_lock);
 		lock_kernel();
 		swapin_readahead(entry);
 		page = read_swap_cache(entry);
 		unlock_kernel();
-		spin_lock(&mm->page_table_lock);
-		if (!page)
+		if (!page) {
+			spin_lock(&mm->page_table_lock);
 			return -1;
+		}
 		flush_page_to_ram(page);
 		flush_icache_page(vma, page);
@@ -1053,13 +1055,13 @@
 	 * Must lock page before transferring our swap count to already
 	 * obtained page count.
 	 */
-	spin_unlock(&mm->page_table_lock);
 	lock_page(page);
-	spin_lock(&mm->page_table_lock);
 	/*
-	 * Back out if somebody else faulted in this pte while we slept.
+	 * Back out if somebody else faulted in this pte while we
+	 * released the page table lock.
 	 */
+	spin_lock(&mm->page_table_lock);
 	if (pte_present(*page_table)) {
 		UnlockPage(page);
 		page_cache_release(page);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/