<!-- received="Mon May 31 08:04:28 1999 EET DST" -->
<!-- sent="30 May 1999 12:56:49 -0500" -->
<!-- name="Eric W. Biederman" -->
<!-- email="ebiederm+eric@ccr.net" -->
<!-- subject="[RFC][PATCH] dirty pages in the page cache." -->
<!-- id="" -->
<!-- inreplyto="" -->
<title>Linux-kernel mailing list archive 1999-22,: [RFC][PATCH] dirty pages in the page cache.</title>
<body bgcolor="#FFFFFF"><font face="Arial,Helvetica">
<h1>[RFC][PATCH] dirty pages in the page cache.</h1>
<b>Eric W. Biederman</b> (<a href="mailto:ebiederm%22eric@ccr.net"><i>ebiederm+eric@ccr.net</i></a>)<br>
<i>30 May 1999 12:56:49 -0500</i>
<p>
<ul>
<li> <b>Messages sorted by:</b> <a href="date.html#273">[ date ]</a><a href="index.html#273">[ thread ]</a><a href="subject.html#273">[ subject ]</a><a href="author.html#273">[ author ]</a>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0274.html">Alan Cox: "Re: 2.2.9 hangs in truncate_inode_pages"</a>
<li> <b>Previous message:</b> <a href="0272.html">Emil Briggs: "Re: Q: PAGE_CACHE_SIZE"</a>
<!-- nextthread="start" -->
<!-- reply="end" -->
</ul>
<hr>
<!-- body="start" -->
The following patch implements dirty pages in the page cache, using it's own<br>
version of kflushd, kpgflushd so the support is not confined to just block<br>
based filesystems, and allows fun things like allocate of space on write.<br>
<p>
The code still has a ways to go before it reaches optimum tuning<br>
but the important part of the interal API looks solid.  <br>
<p>
As far as space consumption in struct page, one more set of list pointers<br>
has been added (to keep the dirty page list, and whatever), and 12 bits<br>
of flags has been used, allowing a crude but space efficient timer on<br>
when a page should be next written.<br>
<p>
Some important functions are:<br>
mark_page_dirty, mark_page_clean, unlock_page, write_page.<br>
<p>
Eric<br>
<p>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/fs/buffer.c linux-2.3.3.eb7/fs/buffer.c<br>
--- linux-2.3.3.eb6/fs/buffer.c	Sat May 22 17:16:33 1999<br>
+++ linux-2.3.3.eb7/fs/buffer.c	Sat May 22 18:23:49 1999<br>
@@ -1103,23 +1103,6 @@<br>
 	goto try_again;<br>
 }<br>
 <br>
-/* Run the hooks that have to be done when a page I/O has completed. */<br>
-static inline void after_unlock_page (struct page * page)<br>
-{<br>
-	if (test_and_clear_bit(PG_decr_after, &amp;page-&gt;flags)) {<br>
-		atomic_dec(&amp;nr_async_pages);<br>
-#ifdef DEBUG_SWAP<br>
-		printk ("DebugVM: Finished IO on page %p, nr_async_pages %d\n",<br>
-			(char *) page_address(page), <br>
-			atomic_read(&amp;nr_async_pages));<br>
-#endif<br>
-	}<br>
-	if (test_and_clear_bit(PG_swap_unlock_after, &amp;page-&gt;flags))<br>
-		swap_after_unlock_page(page-&gt;key);<br>
-	if (test_and_clear_bit(PG_free_after, &amp;page-&gt;flags))<br>
-		__free_page(page);<br>
-}<br>
-<br>
 /*<br>
  * Free all temporary buffers belonging to a page.<br>
  * This needs to be called with interrupts disabled.<br>
@@ -1190,9 +1173,7 @@<br>
 	/* OK, the async IO on this page is complete. */<br>
 	free_async_buffers(bh);<br>
 	restore_flags(flags);<br>
-	clear_bit(PG_locked, &amp;page-&gt;flags);<br>
-	wake_up(&amp;page-&gt;wait);<br>
-	after_unlock_page(page);<br>
+	unlock_page(page);<br>
 	return;<br>
 <br>
 still_busy:<br>
@@ -1285,14 +1266,12 @@<br>
 		 * and unlock_buffer(). */<br>
 	} else {<br>
 		unsigned long flags;<br>
-		clear_bit(PG_locked, &amp;page-&gt;flags);<br>
 		set_bit(PG_uptodate, &amp;page-&gt;flags);<br>
-		wake_up(&amp;page-&gt;wait);<br>
 		save_flags(flags);<br>
 		cli();<br>
 		free_async_buffers(bh);<br>
 		restore_flags(flags);<br>
-		after_unlock_page(page);<br>
+		unlock_page(page);<br>
 	}<br>
 	++current-&gt;maj_flt;<br>
 	return 0;<br>
@@ -1593,6 +1572,7 @@<br>
 <br>
 	sync_supers(0);<br>
 	sync_inodes(0);<br>
+	sync_pcache(1, 0);<br>
 <br>
 	ncount = 0;<br>
 #ifdef DEBUG<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/include/linux/mm.h linux-2.3.3.eb7/include/linux/mm.h<br>
--- linux-2.3.3.eb6/include/linux/mm.h	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/include/linux/mm.h	Sat May 22 18:23:49 1999<br>
@@ -141,6 +141,7 @@<br>
 	wait_queue_head_t wait;<br>
 	struct page **pprev_hash;<br>
 	void *generic_pp; /* This is page buffers iff PageBuffer(page) is true. */<br>
+	struct list_head lru;	/* dirty page queue */<br>
 } mem_map_t;<br>
 <br>
 /* Page flag bit values */<br>
@@ -157,8 +158,15 @@<br>
 #define PG_swap_cache		10<br>
 #define PG_skip			11<br>
 #define PG_buffer		12<br>
+#define PG_wcycle_low		13<br>
+#define PG_wcycle_high		24<br>
 #define PG_reserved		31<br>
 <br>
+/* Which cycle of page_flush which will write out the pages */<br>
+#define PageWCycle(page)	(((page)-&gt;flags &amp; ((2 &lt;&lt; PG_wcycle_high) -1)) &gt;&gt; PG_wcycle_low)<br>
+#define PageSetWCycle(page, cycle) \<br>
+	((page)-&gt;flags |= ((cycle &amp; ((PG_wcycle_high - PG_wcycle_low) -1)) &lt;&lt; PG_wcycle_low))<br>
+<br>
 /* Make it prettier to test the above... */<br>
 #define PageLocked(page)	(test_bit(PG_locked, &amp;(page)-&gt;flags))<br>
 #define PageError(page)		(test_bit(PG_error, &amp;(page)-&gt;flags))<br>
@@ -225,6 +233,12 @@<br>
  * page-&gt;inode is the pointer to the inode, and page-&gt;key is the<br>
  * offset into the file (divided by PAGE_CACHE_SIZE).<br>
  *<br>
+ * If an inode page wants to use the generic dirty page management, <br>
+ * mark_page_dirty() is called, which sets page-&gt;dirty.  Either<br>
+ * mark_page_clean() or write_page() can be called to remove this<br>
+ * condition.  Though usually this will happen automatically after the<br>
+ * page has aged appropriately.<br>
+ *<br>
  * A page may have buffers allocated to it. In this case,<br>
  * PageBuffer(page) is true and page-&gt;generic_pp is a circular list of<br>
  * these buffer heads. Else, PageBuffer(page) is false.<br>
@@ -332,12 +346,12 @@<br>
 extern int do_munmap(unsigned long, size_t);<br>
 <br>
 /* filemap.c */<br>
-extern unsigned long page_unuse(struct page *);<br>
-extern int shrink_mmap(int, int);<br>
 extern void truncate_inode_pages(struct inode *, loff_t);<br>
 extern void invalidate_inode_pages(struct inode *);<br>
+extern int sync_inode_pages(struct inode *inode, int wait);<br>
 extern void zap_inode_pages(struct inode *);<br>
 extern void update_vm_cache(struct inode *, loff_t, const char *, int);<br>
+extern struct page *get_inode_page(struct inode *, loff_t, unsigned long *);<br>
 extern unsigned long get_cached_page(struct inode *, unsigned long, int);<br>
 extern void put_cached_page(unsigned long);<br>
 <br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/include/linux/pagemap.h linux-2.3.3.eb7/include/linux/pagemap.h<br>
--- linux-2.3.3.eb6/include/linux/pagemap.h	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/include/linux/pagemap.h	Sat May 22 18:23:49 1999<br>
@@ -155,4 +155,25 @@<br>
 	struct vm_store *store, unsigned long key,<br>
 	struct page **hash);<br>
 <br>
+extern int sync_pcache(int old, int max_write);<br>
+extern int sync_pcache_dev(kdev_t dev);<br>
+<br>
+extern atomic_t nr_dirty_pages;<br>
+extern void wakeup_pgflush(int wait);<br>
+/* more thought needs to go into mark_page_dirty,<br>
+ *<br>
+ * It looks like an excellent place to require functionality to be present<br>
+ * if there is a configurable function under it.<br>
+ *<br>
+ * Also I need to figure out how the delay for writing a page needs to be set.<br>
+ */<br>
+extern void mark_page_dirty(struct page *page);<br>
+extern void mark_page_clean(struct page *page);<br>
+extern void unlock_page(struct page *page);<br>
+extern int generic_writepage(<br>
+	struct vm_store *store, struct page *page, unsigned long index, void **p);<br>
+extern int generic_updatepage(struct file *file, struct page *page, <br>
+	const char *buf, unsigned int offset, unsigned int count, int sync);<br>
+extern int write_page(struct page *page); /* do I need to export this one? */<br>
+<br>
 #endif<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/include/linux/vm_store.h linux-2.3.3.eb7/include/linux/vm_store.h<br>
--- linux-2.3.3.eb6/include/linux/vm_store.h	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/include/linux/vm_store.h	Sat May 22 18:23:49 1999<br>
@@ -47,6 +47,8 @@<br>
 extern int shrink_mmap(int priority, int gfp_mask);<br>
 extern void update_vm_store_cache(struct vm_store *store,<br>
 	unsigned long index, unsigned long offset, const char * buf, int count);<br>
+extern int sync_store_pages(struct vm_store *store);<br>
+extern int wait_on_store_pages(struct vm_store *store);<br>
 <br>
 #endif /* KERNEL */<br>
 #endif /* _LINUX_VM_STORE_H */<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/init/main.c linux-2.3.3.eb7/init/main.c<br>
--- linux-2.3.3.eb6/init/main.c	Sat May 22 16:10:13 1999<br>
+++ linux-2.3.3.eb7/init/main.c	Sat May 22 18:23:49 1999<br>
@@ -67,6 +67,8 @@<br>
 <br>
 static int init(void *);<br>
 extern int bdflush(void *);<br>
+extern int pgflush(void *);<br>
+extern void pgflush_init(void);<br>
 extern int kswapd(void *);<br>
 extern int kpiod(void *);<br>
 extern void kswapd_setup(void);<br>
@@ -1299,6 +1301,9 @@<br>
 <br>
 	/* Launch bdflush from here, instead of the old syscall way. */<br>
 	kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);<br>
+	/* Launch pgflush from here, it's a clone of bdflush... */<br>
+	pgflush_init();<br>
+	kernel_thread(pgflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);<br>
 	/* Start the background pageout daemon. */<br>
 	kswapd_setup();<br>
 	kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/ipc/shm.c linux-2.3.3.eb7/ipc/shm.c<br>
--- linux-2.3.3.eb6/ipc/shm.c	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/ipc/shm.c	Sat May 22 18:23:49 1999<br>
@@ -515,6 +515,7 @@<br>
 	shmd-&gt;vm_file = NULL;<br>
 	shmd-&gt;vm_store = NULL;<br>
 	shmd-&gt;vm_index = 0;<br>
+	shmd-&gt;vm_store = NULL;<br>
 	shmd-&gt;vm_ops = &amp;shm_vm_ops;<br>
 <br>
 	shp-&gt;u.shm_nattch++;            /* prevent destruction */<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/kernel/ksyms.c linux-2.3.3.eb7/kernel/ksyms.c<br>
--- linux-2.3.3.eb6/kernel/ksyms.c	Sat May 22 16:10:25 1999<br>
+++ linux-2.3.3.eb7/kernel/ksyms.c	Sat May 22 18:23:49 1999<br>
@@ -105,7 +105,6 @@<br>
 EXPORT_SYMBOL(remap_page_range);<br>
 EXPORT_SYMBOL(max_mapnr);<br>
 EXPORT_SYMBOL(high_memory);<br>
-EXPORT_SYMBOL(update_vm_cache);<br>
 EXPORT_SYMBOL(vmtruncate);<br>
 EXPORT_SYMBOL(find_vma);<br>
 EXPORT_SYMBOL(get_unmapped_area);<br>
@@ -145,8 +144,6 @@<br>
 EXPORT_SYMBOL(check_disk_change);<br>
 EXPORT_SYMBOL(invalidate_buffers);<br>
 EXPORT_SYMBOL(invalidate_inodes);<br>
-EXPORT_SYMBOL(invalidate_inode_pages);<br>
-EXPORT_SYMBOL(truncate_inode_pages);<br>
 EXPORT_SYMBOL(fsync_dev);<br>
 EXPORT_SYMBOL(permission);<br>
 EXPORT_SYMBOL(inode_setattr);<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/Makefile linux-2.3.3.eb7/mm/Makefile<br>
--- linux-2.3.3.eb6/mm/Makefile	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/mm/Makefile	Sat May 22 18:23:49 1999<br>
@@ -11,8 +11,8 @@<br>
 O_OBJS	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \<br>
 	    vmalloc.o slab.o \<br>
 	    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o \<br>
-	    vm_store.o<br>
+	    vm_store.o page_flush.o<br>
 <br>
-OX_OBJS := swap_syms.o<br>
+OX_OBJS := swap_syms.o page_syms.o<br>
 <br>
 include $(TOPDIR)/Rules.make<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/filemap.c linux-2.3.3.eb7/mm/filemap.c<br>
--- linux-2.3.3.eb6/mm/filemap.c	Sat May 22 18:21:10 1999<br>
+++ linux-2.3.3.eb7/mm/filemap.c	Sat May 22 18:23:49 1999<br>
@@ -58,6 +58,9 @@<br>
  * locked down (those are sure to be up-to-date anyway, so we shouldn't<br>
  * invalidate them).<br>
  */<br>
+/*<br>
+ * Should we invalidate dirty pages? --EWB 19 June 1998<br>
+ */<br>
 void invalidate_inode_pages(struct inode * inode)<br>
 {<br>
  	invalidate_store_pages(inode-&gt;vm_store);<br>
@@ -84,6 +87,11 @@<br>
 	if (keep_bytes) {<br>
 		/* partial truncate, clear end of page */<br>
 		page = find_page(inode-&gt;vm_store, partial_keep);<br>
+		/* Wait in case we are reading the page and truncating it simultaneously */<br>
+		while (PageLocked(page) &amp;&amp; !PageUptodate(page)) {<br>
+			wait_on_page(page);<br>
+			page = find_page(inode-&gt;vm_store, partial_keep);<br>
+		}<br>
 		if (page) {<br>
 			unsigned long address = page_address(page);<br>
 			memset((void *) (keep_bytes + address), 0, PAGE_CACHE_SIZE - keep_bytes);<br>
@@ -118,6 +126,24 @@<br>
 	update_vm_store_cache(inode-&gt;vm_store, index, offset, buf, count);<br>
 }<br>
 <br>
+/* Sync all of the pages associated with an inode */<br>
+int sync_inode_pages(struct inode *inode, int wait)<br>
+{<br>
+	int error = 0;<br>
+	struct vm_store *store, *next;<br>
+	for(store = inode-&gt;vm_store; store != NULL; store = next) {<br>
+		next = store-&gt;st_next;<br>
+		error |= sync_store_pages(store);<br>
+	}<br>
+	if (wait) {<br>
+		for(store = inode-&gt;vm_store; store != NULL; store = next) {<br>
+			next = store-&gt;st_next;<br>
+			error |= wait_on_store_pages(store);<br>
+		}<br>
+	}<br>
+	return error? -EIO : 0;<br>
+}<br>
+<br>
 struct page *get_inode_page(<br>
 	struct inode *inode, loff_t page, unsigned long *page_cache_ptr)<br>
 {<br>
@@ -300,14 +326,14 @@<br>
 	max_ahead = 0;<br>
 <br>
 /*<br>
- * The current page is locked.<br>
+ * The current page is not uptodate.<br>
  * If the current position is inside the previous read IO request, do not<br>
  * try to reread previously read ahead pages.<br>
  * Otherwise decide or not to read ahead some pages synchronously.<br>
  * If we are not going to read ahead, set the read ahead context for this <br>
  * page only.<br>
  */<br>
-	if (PageLocked(page)) {<br>
+	if (!PageUptodate(page)) {<br>
 		if (!filp-&gt;f_ralen || index &gt;= raend || index + filp-&gt;f_ralen &lt; raend) {<br>
 			raend = index;<br>
 			if (((loff_t)raend &lt;&lt; PAGE_CACHE_SHIFT) &lt; inode-&gt;i_size)<br>
@@ -321,7 +347,7 @@<br>
 		}<br>
 	}<br>
 /*<br>
- * The current page is not locked.<br>
+ * The current page is uptodate <br>
  * If we were reading ahead and,<br>
  * if the current max read ahead size is not zero and,<br>
  * if the current position is inside the last read-ahead IO request,<br>
@@ -507,7 +533,9 @@<br>
 		else if (reada_ok &amp;&amp; filp-&gt;f_ramax &gt; (MIN_READAHEAD &gt;&gt; PAGE_CACHE_SHIFT))<br>
 				filp-&gt;f_ramax = (MIN_READAHEAD &gt;&gt; PAGE_CACHE_SHIFT);<br>
 <br>
-		wait_on_page(page);<br>
+		if (!PageUptodate(page) &amp;&amp; PageLocked(page)) {<br>
+			wait_on_page(page);<br>
+		}<br>
 <br>
 		if (!PageUptodate(page))<br>
 			goto page_read_error;<br>
@@ -798,7 +826,7 @@<br>
 			goto failure;<br>
 	}<br>
 <br>
-	if (PageLocked(page))<br>
+	if (PageLocked(page) &amp;&amp; !PageUptodate(page))<br>
 		goto page_locked_wait;<br>
 	if (!PageUptodate(page))<br>
 		goto page_read_error;<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/mmap.c linux-2.3.3.eb7/mm/mmap.c<br>
--- linux-2.3.3.eb6/mm/mmap.c	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/mm/mmap.c	Sat May 22 18:23:49 1999<br>
@@ -71,7 +71,7 @@<br>
 }<br>
 <br>
 /* Remove one vm structure from the inode's i_mmap ring. */<br>
-static inline void remove_shared_vm_struct(struct vm_area_struct *vma)<br>
+static void remove_shared_vm_struct(struct vm_area_struct *vma)<br>
 {<br>
 	struct file * file = vma-&gt;vm_file;<br>
 	struct vm_store *store = vma-&gt;vm_store;<br>
@@ -536,6 +536,7 @@<br>
 		mpnt-&gt;vm_file = area-&gt;vm_file;<br>
 		mpnt-&gt;vm_store = area-&gt;vm_store;<br>
 		mpnt-&gt;vm_pte = area-&gt;vm_pte;<br>
+		mpnt-&gt;vm_store = area-&gt;vm_store;<br>
 		if (mpnt-&gt;vm_file)<br>
 			mpnt-&gt;vm_file-&gt;f_count++;<br>
 		if (mpnt-&gt;vm_ops &amp;&amp; mpnt-&gt;vm_ops-&gt;open)<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/page_alloc.c linux-2.3.3.eb7/mm/page_alloc.c<br>
--- linux-2.3.3.eb6/mm/page_alloc.c	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/mm/page_alloc.c	Sat May 22 18:23:49 1999<br>
@@ -124,6 +124,8 @@<br>
 	if (!PageReserved(page) &amp;&amp; atomic_dec_and_test(&amp;page-&gt;count)) {<br>
 		if (PageSwapCache(page))<br>
 			panic ("Freeing swap cache page");<br>
+		if (PageDirty(page))<br>
+			panic ("Freeing dirty page");<br>
 		page-&gt;flags &amp;= ~(1 &lt;&lt; PG_referenced);<br>
 		free_pages_ok(page - mem_map, 0);<br>
 		return;<br>
@@ -141,6 +143,8 @@<br>
 		if (atomic_dec_and_test(&amp;map-&gt;count)) {<br>
 			if (PageSwapCache(map))<br>
 				panic ("Freeing swap cache pages");<br>
+			if (PageDirty(map))<br>
+				panic ("Freeing dirty page");<br>
 			map-&gt;flags &amp;= ~(1 &lt;&lt; PG_referenced);<br>
 			free_pages_ok(map_nr, order);<br>
 			return;<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/page_flush.c linux-2.3.3.eb7/mm/page_flush.c<br>
--- linux-2.3.3.eb6/mm/page_flush.c	Wed Dec 31 18:00:00 1969<br>
+++ linux-2.3.3.eb7/mm/page_flush.c	Sat May 22 18:23:49 1999<br>
@@ -0,0 +1,566 @@<br>
+/*<br>
+ *  linux/mm/page_flush.c<br>
+ *<br>
+ *  Copyright (C) 1998 Eric Biederman<br>
+ */<br>
+#include &lt;linux/fs.h&gt;<br>
+#include &lt;linux/mm.h&gt;<br>
+#include &lt;linux/pagemap.h&gt;<br>
+#include &lt;linux/vmalloc.h&gt;<br>
+#include &lt;linux/swap.h&gt;<br>
+#include &lt;linux/init.h&gt;<br>
+#include &lt;linux/smp_lock.h&gt;<br>
+#include &lt;asm/bitops.h&gt;<br>
+#include &lt;asm/pgtable.h&gt;<br>
+<br>
+#ifdef DEBUG<br>
+#  define debug_printk(n, args) do { if (n &lt;= DEBUG) printk args ; } while(0)<br>
+#else<br>
+#  define debug_printk(n, args)<br>
+#endif<br>
+<br>
+/* These need to be made dynamically tuneable... */<br>
+/* Percentage of page cache dirty to activate pgflush */				 <br>
+#define PG_DIRTY_FRACT  40<br>
+/* Maximum number of dirty blocks to pages out per wake-cycle. */<br>
+#define PG_DIRTY_WRITE_COUNT 500<br>
+/* default amount of time to age a dirty page before writing. */<br>
+#define PG_AGE_DIRTY_PAGE (30*HZ)<br>
+/* Amount of time inbetween cycles */<br>
+#define PG_CYCLE_TIME (1*HZ)<br>
+/* Number of cycles */<br>
+#define PG_CYCLES (1 &lt;&lt; (PG_wcycle_high - PG_wcycle_low))<br>
+<br>
+<br>
+void wakeup_pgflush(int wait);<br>
+<br>
+/* TO PLAY WITH<br>
+ * - mergsort the list before processing to get better locality of reference<br>
+ * - device bandwidth discover, to keeps writes from piling up faster than<br>
+ *   a device can handle<br>
+ */<br>
+<br>
+/* I believe for best performance I need to have a list of dirty pages,<br>
+ * and have a write time for each dirty page.<br>
+ * A fifo list should help since writes tend to be in order for files.<br>
+ * A flushtime should help correctly handle how long pages are cached.<br>
+ *<br>
+ */<br>
+<br>
+/* Use a spinlock so I don't have to worry when the dirty page list is updated.<br>
+ * Well actually I don't think it is safe to add to the dirty list<br>
+ * during an interrupt but removing a page should be safe.<br>
+ */<br>
+atomic_t nr_dirty_pages = ATOMIC_INIT(0);<br>
+static spinlock_t dirty_page_list_lock = SPIN_LOCK_UNLOCKED;<br>
+static LIST_HEAD(dirty_page_list);<br>
+<br>
+/* Note:<br>
+ * If you have a device that for some strange reason<br>
+ * can't handle having it's buffer written to by the cpu<br>
+ * while it's writing data out to disk (or wherever)<br>
+ * you need to make a copy yourself as this code assumes it <br>
+ * is perfectly safe to write to locked buffers, as long as the<br>
+ * locked buffers are uptodate.<br>
+ */<br>
+<br>
+/* Last cycle number I used &amp; and last time (in jiffies) I ran */<br>
+static unsigned long last_cycle = 0;<br>
+static unsigned long last_run  = 0;<br>
+<br>
+/* Note: Special care has been taken so jiffie wrap around is an expected case<br>
+ * and is handled properly.  All flushtimes are computed on a sliding scale<br>
+ * where half the numbers are always above jiffies and half the numbers are<br>
+ * always below.<br>
+ */<br>
+static inline void set_page_writetime(struct page *page, unsigned long wtime)<br>
+{<br>
+	unsigned long newtime;<br>
+	int newcycle;<br>
+	if (PageDirty(page)) {<br>
+		/* Move page to dirty list if jiffies is clear */<br>
+		newtime = jiffies + wtime;<br>
+		newcycle = (newtime - last_run)/ PG_CYCLE_TIME;<br>
+		if (newcycle == 0) {<br>
+			newcycle = 1;<br>
+		} else if (newcycle &gt;= PG_CYCLES) {<br>
+			newcycle = PG_CYCLES -1;<br>
+		}<br>
+		if (newcycle &gt; ((PageWCycle(page) - last_run)%PG_CYCLES)) {<br>
+			PageSetWCycle(page, (newcycle + last_run)%PG_CYCLES);<br>
+		}<br>
+	}<br>
+}<br>
+<br>
+static inline void after_add_to_dirty_list(void)<br>
+{<br>
+	int too_many;<br>
+	/* This buffer is dirty, maybe we need to start flushing.<br>
+	 * If too high a percentage of the buffers are dirty...<br>
+	 */<br>
+	too_many = (page_cache_size * PG_DIRTY_FRACT)/100;<br>
+	if (atomic_read(&amp;nr_dirty_pages) &gt; too_many) <br>
+		wakeup_pgflush(0);<br>
+}<br>
+<br>
+<br>
+/*<br>
+ * Before we start the kernel thread, print out the <br>
+ * kswapd initialization message (otherwise the init message <br>
+ * may be printed in the middle of another driver's init <br>
+ * message).  It looks very bad when that happens.<br>
+ */<br>
+__initfunc(void pgflush_init(void))<br>
+{<br>
+	int i;<br>
+	char *revision="$Revision: 0.5 $", *s, *e;<br>
+	<br>
+	if ((s = strchr(revision, ':')) &amp;&amp;<br>
+	    (e = strchr(s, '$')))<br>
+		s++, i = e - s;<br>
+	else<br>
+		s = revision, i = -1;<br>
+	printk ("Starting pgflushd v%.*s\n", i, s);<br>
+}<br>
+<br>
+#ifdef DEBUG<br>
+static int dirty_page_list_length(void)<br>
+{<br>
+	struct list_head *head, *ptr;<br>
+	int count;<br>
+<br>
+	head = &amp;dirty_page_list;<br>
+	ptr = head-&gt;next;<br>
+	count = 0;<br>
+	while(ptr != head) {<br>
+		count++;<br>
+		ptr = ptr-&gt;next;<br>
+	}<br>
+	return count;<br>
+}<br>
+<br>
+static void verify_dirty_page_length(void)<br>
+{<br>
+	int length = dirty_page_list_length();<br>
+	if (length != atomic_read(&amp;nr_dirty_pages)) {<br>
+		printk("length:%d != count:%d\n",<br>
+			length, atomic_read(&amp;nr_dirty_pages));<br>
+	}<br>
+}<br>
+#else<br>
+#define verify_dirty_page_length()<br>
+#endif<br>
+<br>
+static void add_to_dirty_page_list(struct page *page)<br>
+{<br>
+	unsigned long flags;<br>
+	spin_lock_irqsave(&amp;dirty_page_list_lock, flags);<br>
+<br>
+	list_add(&amp;page-&gt;lru, &amp;dirty_page_list);<br>
+	atomic_inc(&amp;nr_dirty_pages);<br>
+<br>
+	verify_dirty_page_length();<br>
+<br>
+	spin_unlock_irqrestore(&amp;dirty_page_list_lock, flags);<br>
+}<br>
+<br>
+static void remove_from_dirty_page_list(struct page *page)<br>
+{<br>
+	unsigned long flags;<br>
+	spin_lock_irqsave(&amp;dirty_page_list_lock, flags);<br>
+<br>
+	atomic_dec(&amp;nr_dirty_pages);<br>
+	list_del(&amp;page-&gt;lru);<br>
+<br>
+	verify_dirty_page_length();<br>
+<br>
+	spin_unlock_irqrestore(&amp;dirty_page_list_lock, flags);<br>
+}<br>
+<br>
+static struct page *first_dirty_page(void)<br>
+{<br>
+	struct page *result;<br>
+	result =  list_entry(dirty_page_list.prev, struct page, lru);<br>
+	if (result == list_entry(&amp;dirty_page_list, struct page, lru)) {<br>
+#ifdef DEBUG<br>
+		if (atomic_read(&amp;nr_dirty_pages) != 0) {<br>
+			printk(KERN_DEBUG "%d dirty pages, and list empty?\n",<br>
+				atomic_read(&amp;nr_dirty_pages));<br>
+		}<br>
+#endif<br>
+		result = 0;<br>
+	}<br>
+	return result;<br>
+}<br>
+#define next_dirty_page(page) list_entry(page-&gt;lru.prev, struct page, lru)<br>
+<br>
+#define is_dirty_page(page) PageDirty(page)<br>
+<br>
+void mark_page_dirty(struct page *page)<br>
+{<br>
+	if (!page) {<br>
+		printk("mark_page_dirty: attempt to mark nonexistent page dirty!\n");<br>
+		return;<br>
+	}<br>
+	if (!page-&gt;store) {<br>
+		printk("mark_page_dirty: attempt to mark page: %ld without a vm_store!\n",<br>
+			page_address(page));<br>
+		return;<br>
+	}<br>
+	if (!test_and_set_bit(PG_dirty, &amp;page-&gt;flags)) {<br>
+		set_page_writetime(page, PG_AGE_DIRTY_PAGE);<br>
+		add_to_dirty_page_list(page);<br>
+		after_add_to_dirty_list();<br>
+	} else {<br>
+#if 0<br>
+ 		/* This is actually a common case! */<br>
+		printk(KERN_DEBUG "mark_page_dirty: page: %ld is already dirty!\n",<br>
+			page - mem_map);<br>
+#endif<br>
+	}<br>
+}<br>
+<br>
+inline void mark_page_clean(struct page *page)<br>
+{<br>
+	if (test_and_clear_bit(PG_dirty, &amp;page-&gt;flags)) {<br>
+		remove_from_dirty_page_list(page);<br>
+	} else {<br>
+		printk("mark_page_clean: page: %ld is already clean!\n",<br>
+		       page_address(page));<br>
+	}<br>
+}<br>
+<br>
+/*<br>
+ * Locks<br>
+ * ==========================================<br>
+ */ <br>
+<br>
+/* Run the hooks that have to be done when a page I/O has completed. */<br>
+static inline void after_unlock_page(struct page *page)<br>
+{<br>
+	if (test_and_clear_bit(PG_decr_after, &amp;page-&gt;flags)) {<br>
+		atomic_dec(&amp;nr_async_pages);<br>
+#ifdef DEBUG_SWAP<br>
+		printk ("DebugVM: Finished IO on page %p, nr_async_pages %d\n",<br>
+			(char *) page_address(page), <br>
+			atomic_read(&amp;nr_async_pages));<br>
+#endif<br>
+	}<br>
+	if (test_and_clear_bit(PG_swap_unlock_after, &amp;page-&gt;flags)) {<br>
+		swap_after_unlock_page(page-&gt;key);<br>
+	}<br>
+	if (test_and_clear_bit(PG_free_after, &amp;page-&gt;flags)) {<br>
+		__free_page(page);<br>
+	}<br>
+}<br>
+<br>
+void unlock_page(struct page *page)<br>
+{<br>
+	/* Note: There is a possible pathological case here<br>
+	 * Someone may wait for a page and then toally free it<br>
+	 * before after_unlock_page is called.  <br>
+	 * A proper setting of PG_free_after avoids this.<br>
+	 */<br>
+	clear_bit(PG_locked, &amp;page-&gt;flags);<br>
+	wake_up(&amp;page-&gt;wait);<br>
+	after_unlock_page(page);<br>
+}<br>
+<br>
+/*<br>
+ * Ideally all of the resources needed on the backing store have been<br>
+ * allocated by this point, and the only error possible would be a<br>
+ * failure of the backing store.<br>
+ * <br>
+ * Since there is at least one legitamate error condition I have implmented <br>
+ * some support for error handling.  Synchronous errors are returned<br>
+ * and asynchronous catastrophics errors may be returned by setting PG_error<br>
+ *<br>
+ */<br>
+int write_page(struct page *page)<br>
+{<br>
+	struct vm_store *store;<br>
+	int error = 0;<br>
+	/* Preconditions:<br>
+	 * 1) We are in the page cache<br>
+	 * 2) The page is not already doing some i/o.<br>
+	 */<br>
+	if (!page) {<br>
+		printk(KERN_DEBUG "write_page: nonexistent page!\n");<br>
+		return -EIO;<br>
+	}<br>
+	store = page-&gt;store;<br>
+	if (!store) {<br>
+		printk(KERN_DEBUG "write_page: page: %ld without store!\n",<br>
+		       page_address(page));<br>
+		error = -EIO;<br>
+	}<br>
+	<br>
+	if (!error &amp;&amp; PageDirty(page)) {<br>
+		/* A page being written out a second time while<br>
+		 * it is already being written should be rare case,<br>
+		 * Therfore wait until the situation passes and then<br>
+		 * write out the page.<br>
+		 *<br>
+		 * This also happens to handles the strange case of<br>
+		 * writing a page while it is being read, and prevents it...<br>
+		 */<br>
+		while(test_and_set_bit(PG_locked, &amp;page-&gt;flags)) {<br>
+			wait_on_page(page);<br>
+		}<br>
+		<br>
+		/* Mark the page clean before it is written, but<br>
+		 * after it is certain the page will be written<br>
+		 * so I don't have to worry about fancy locks on the<br>
+		 * dirty page list.   And so I can detect cases where<br>
+		 * a page becomes dirty while it is being written out.<br>
+		 */<br>
+		mark_page_clean(page);<br>
+<br>
+		if (store &amp;&amp; store-&gt;st_ops &amp;&amp; store-&gt;st_ops-&gt;write_page) {<br>
+			error = store-&gt;st_ops-&gt;write_page(<br>
+				store, page, page-&gt;key, &amp;page-&gt;generic_pp);<br>
+		} else {<br>
+			printk(KERN_ERR "No write_page function!\n");<br>
+			error = -EIO;<br>
+		}<br>
+	} <br>
+	if (error) {<br>
+		if (PageLocked(page)) <br>
+			unlock_page(page);<br>
+		if (is_dirty_page(page)) {<br>
+			mark_page_clean(page);<br>
+		} else {<br>
+			printk("Attempt to write clean page %lu",<br>
+				(long)(page - mem_map));<br>
+		}<br>
+		set_bit(PG_error, &amp;page-&gt;flags);<br>
+	}<br>
+	return error;<br>
+}<br>
+<br>
+<br>
+/*<br>
+ * generics<br>
+ * ==========================================<br>
+ */ <br>
+int generic_writepage(struct vm_store *store, struct page *page, unsigned long index, void **ptr)<br>
+{<br>
+	struct inode *inode;<br>
+	unsigned long block;<br>
+	int *p, nr[PAGE_SIZE/512];<br>
+	int i;<br>
+<br>
+	inode = store-&gt;generic_stp;<br>
+	atomic_inc(&amp;page-&gt;count);<br>
+	set_bit(PG_free_after, &amp;page-&gt;flags);<br>
+<br>
+	i = PAGE_SIZE &gt;&gt; inode-&gt;i_sb-&gt;s_blocksize_bits;<br>
+	block = index &lt;&lt; (PAGE_SHIFT - inode-&gt;i_sb-&gt;s_blocksize_bits);<br>
+	p = nr;<br>
+	do {<br>
+		/* FIXME: bmap doesn't allocate blocks for writing */<br>
+		*p = inode-&gt;i_op-&gt;bmap(inode, block);<br>
+		i--;<br>
+		block++;<br>
+		p++;<br>
+	} while(i &gt; 0);<br>
+<br>
+	/* IO start */<br>
+	brw_page(WRITE, page, inode-&gt;i_dev, nr, inode-&gt;i_sb-&gt;s_blocksize, 1);<br>
+	return 0;<br>
+}<br>
+<br>
+/* Do the basic work of updating a page */<br>
+int generic_updatepage(struct file *file, struct page *page, <br>
+	const char *buf, unsigned int offset,<br>
+	unsigned int count, int sync)<br>
+{<br>
+	int result = count;<br>
+<br>
+	mark_page_dirty(page);<br>
+<br>
+	/* Currently I assume that by the time you get here<br>
+	 * all needed resources have been obtained so the<br>
+	 * write should only fail if there is a hardware error.<br>
+	 * Which is an important case to handle, but not to optimize.<br>
+	 */<br>
+	if (sync) {<br>
+		result = write_page(page);<br>
+		wait_on_page(page);<br>
+		if (!result &amp;&amp; PageError(page)) {<br>
+			result = -EIO;<br>
+		}<br>
+	}<br>
+	return result;<br>
+}<br>
+<br>
+/*<br>
+ * ==========================================<br>
+ */ <br>
+<br>
+/* Here we attempt to write back old pages. <br>
+ */<br>
+int sync_pcache(int old, int max_write)<br>
+{<br>
+	struct page * page, *next;<br>
+	int i;<br>
+	int ndirty = 0, nwritten = 0;<br>
+	int min_cycle, cycles, cycle;<br>
+<br>
+	min_cycle = last_cycle +1;<br>
+	cycles = (jiffies - last_run) / PG_CYCLE_TIME;<br>
+	if (cycles &gt; PG_CYCLES) {<br>
+		cycles = PG_CYCLES;<br>
+	}<br>
+	last_run = jiffies;<br>
+	last_cycle = (last_cycle + cycles)% PG_CYCLES;<br>
+<br>
+	if (!max_write) {<br>
+		max_write = ((-1U) &gt;&gt; 1U);<br>
+	}<br>
+	debug_printk(2, (KERN_DEBUG "sync_pcache(%d,%d)\n", old, max_write));<br>
+repeat:<br>
+	page = first_dirty_page();<br>
+	i = page?atomic_read(&amp;nr_dirty_pages):0; <br>
+#ifdef DEBUG<br>
+	if (page) {<br>
+		debug_printk(2, (KERN_DEBUG "sync_pcache: page: %ld i: %d store=%p is_dirty:%d dirty:%d locked:%d\n", <br>
+			(long)(page - mem_map), i,<br>
+			page-&gt;store, is_dirty_page(page),<br>
+			PageDirty(page), PageLocked(page) ));<br>
+	} else {<br>
+		debug_printk(2, (KERN_DEBUG "sync_pcache: page: (none) i: %d\n", i));<br>
+	}<br>
+#endif<br>
+	for(; (i-- &gt; 0) &amp;&amp; (nwritten &lt; max_write); page = next) {<br>
+#ifdef DEBUG<br>
+		if (current-&gt;need_resched) {<br>
+			schedule();<br>
+		}<br>
+#endif		<br>
+		/* We may have stalled while waiting for I/O to complete. */<br>
+		if (!is_dirty_page(page)) <br>
+			goto repeat;<br>
+		next = next_dirty_page(page);<br>
+<br>
+		if (PageLocked(page)) {<br>
+			continue;<br>
+		}<br>
+		ndirty++;<br>
+<br>
+		cycle = (PageWCycle(page) - min_cycle)%PG_CYCLES;<br>
+		/* It is safe to write dirty pages that are shared<br>
+		 * because I clear the dirty indicator first.<br>
+		 */<br>
+		if (old &amp;&amp; (cycle &gt;= cycles)) {<br>
+			continue;<br>
+		}<br>
+		nwritten++;<br>
+		write_page(page);<br>
+	}<br>
+	debug_printk(1, (KERN_DEBUG "Wrote %d/%d buffers\n", nwritten, ndirty));<br>
+	return nwritten;<br>
+}<br>
+<br>
+<br>
+#if 0<br>
+/* Here we attempt to write back old pages. <br>
+ */<br>
+int sync_pcache_dev(kdev_t dev)<br>
+{<br>
+	struct page * page, *next;<br>
+	int i;<br>
+	int error = 0;<br>
+repeat:<br>
+	page = first_dirty_page();<br>
+	i = page?atomic_read(&amp;nr_dirty_pages):0; <br>
+	for(; (i-- &gt; 0) ; page = next) {<br>
+		struct vm_store *store = page-&gt;store;<br>
+		/* We may have stalled while waiting for I/O to complete. */<br>
+		if (!is_dirty_page(page))<br>
+			goto repeat;<br>
+		next = next_dirty_page(page);<br>
+<br>
+		if (PageLocked(page) || !PageDirty(page)<br>
+		    || (dev &amp;&amp; store &amp;&amp; (inode-&gt;i_dev != dev))) {<br>
+			continue;<br>
+		}<br>
+		/* It is safe to write shared dirty pages */<br>
+		error |= write_page(page);<br>
+	}<br>
+	return error? 0 : -EIO;<br>
+}<br>
+#endif<br>
+<br>
+/* ====================== pgflush support =================== */<br>
+<br>
+/* This is a simple kernel daemon, whose job it is to provide a dynamic<br>
+ * response to dirty buffers.  Once this process is activated, we write back<br>
+ * a limited number of buffers to the disks and then go back to sleep again.<br>
+ */<br>
+static DECLARE_WAIT_QUEUE_HEAD(pgflush_wait);<br>
+static DECLARE_WAIT_QUEUE_HEAD(pgflush_done);<br>
+struct task_struct *pgflush_tsk = 0;<br>
+<br>
+void wakeup_pgflush(int wait)<br>
+{<br>
+	if (current == pgflush_tsk)<br>
+		return;<br>
+	wake_up(&amp;pgflush_wait);<br>
+	if (wait) {<br>
+		run_task_queue(&amp;tq_disk);<br>
+		sleep_on(&amp;pgflush_done);<br>
+	}<br>
+}<br>
+<br>
+/* This is the actual pgflush daemon itself. <br>
+ * We launch it ourselves internally with<br>
+ * kernel_thread(...)  directly after the first thread in init/main.c */<br>
+<br>
+int pgflush(void *unsused)<br>
+{<br>
+	/*<br>
+	 *	We have a bare-bones task_struct, and really should fill<br>
+	 *	in a few more things so "top" and /proc/2/{exe,root,cwd}<br>
+	 *	display semi-sane things. Not real crucial though...  <br>
+	 */<br>
+<br>
+	current-&gt;session = 1;<br>
+	current-&gt;pgrp = 1;<br>
+	sprintf(current-&gt;comm, "kpgflushd");<br>
+	pgflush_tsk = current;<br>
+<br>
+	/*<br>
+	 *	As a kernel thread we want to tamper with system buffers<br>
+	 *	and other internals and thus be subject to the SMP locking<br>
+	 *	rules. (On a uniprocessor box this does nothing).<br>
+	 */<br>
+	lock_kernel();<br>
+	for(;;) {<br>
+		int ndirty;<br>
+<br>
+		debug_printk(1, (KERN_DEBUG "pgflush() activaged..."));<br>
+		<br>
+		/* CHECK_EMERGENCY_SYNC */ /* only if I replace bdflush */<br>
+<br>
+		ndirty = sync_pcache(0, PG_DIRTY_WRITE_COUNT);<br>
+<br>
+		debug_printk(1, (KERN_DEBUG "pgflush: sleeping again.\n"));<br>
+<br>
+		run_task_queue(&amp;tq_disk);<br>
+		wake_up(&amp;pgflush_done);<br>
+<br>
+		/* If there are still a lot of dirty pages around, skip the sleep <br>
+		 * and flush some more <br>
+		 */<br>
+<br>
+		if (ndirty == 0 || <br>
+		    atomic_read(&amp;nr_dirty_pages) &lt;= (page_cache_size * 5)/100) {<br>
+			spin_lock_irq(&amp;current-&gt;sigmask_lock);<br>
+			flush_signals(current);<br>
+			spin_unlock_irq(&amp;current-&gt;sigmask_lock);<br>
+<br>
+			interruptible_sleep_on(&amp;pgflush_wait);<br>
+		}<br>
+	}<br>
+}<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/page_syms.c linux-2.3.3.eb7/mm/page_syms.c<br>
--- linux-2.3.3.eb6/mm/page_syms.c	Wed Dec 31 18:00:00 1969<br>
+++ linux-2.3.3.eb7/mm/page_syms.c	Sat May 22 18:23:49 1999<br>
@@ -0,0 +1,38 @@<br>
+#include &lt;linux/config.h&gt;<br>
+#include &lt;linux/module.h&gt;<br>
+#include &lt;linux/fs.h&gt;<br>
+#include &lt;linux/pagemap.h&gt;<br>
+#include &lt;linux/mm.h&gt;<br>
+#include &lt;linux/vm_store.h&gt;<br>
+<br>
+/* store functions */<br>
+EXPORT_SYMBOL(get_store_page);<br>
+EXPORT_SYMBOL(invalidate_store_pages);<br>
+EXPORT_SYMBOL(zap_store_pages);<br>
+EXPORT_SYMBOL(remove_store_page);<br>
+EXPORT_SYMBOL(update_vm_store_cache);<br>
+EXPORT_SYMBOL(sync_store_pages);<br>
+EXPORT_SYMBOL(wait_on_store_pages);<br>
+<br>
+/* filemap functions */<br>
+EXPORT_SYMBOL(invalidate_inode_pages);<br>
+EXPORT_SYMBOL(truncate_inode_pages);<br>
+EXPORT_SYMBOL(sync_inode_pages);<br>
+EXPORT_SYMBOL(zap_inode_pages);<br>
+EXPORT_SYMBOL(update_vm_cache);<br>
+EXPORT_SYMBOL(get_inode_page);<br>
+<br>
+/* pagemap functions */<br>
+<br>
+EXPORT_SYMBOL(add_to_page_cache);<br>
+EXPORT_SYMBOL(page_cache_size);<br>
+EXPORT_SYMBOL(page_hash_table);<br>
+EXPORT_SYMBOL(__wait_on_page);<br>
+<br>
+EXPORT_SYMBOL(mark_page_dirty);<br>
+EXPORT_SYMBOL(mark_page_clean);<br>
+EXPORT_SYMBOL(unlock_page);<br>
+<br>
+EXPORT_SYMBOL(write_page);<br>
+EXPORT_SYMBOL(generic_writepage);<br>
+EXPORT_SYMBOL(generic_updatepage);<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/vm_store.c linux-2.3.3.eb7/mm/vm_store.c<br>
--- linux-2.3.3.eb6/mm/vm_store.c	Sat May 22 18:19:40 1999<br>
+++ linux-2.3.3.eb7/mm/vm_store.c	Sat May 22 18:23:49 1999<br>
@@ -14,7 +14,9 @@<br>
 	struct page **hash)<br>
 {<br>
 	atomic_inc(&amp;page-&gt;count);<br>
-	page-&gt;flags = (page-&gt;flags &amp; ~((1 &lt;&lt; PG_uptodate) | (1 &lt;&lt; PG_error))) | (1 &lt;&lt; PG_referenced);<br>
+	page-&gt;flags = (page-&gt;flags &amp; <br>
+		~((1 &lt;&lt; PG_uptodate) | (1 &lt;&lt; PG_error) | (1 &lt;&lt; PG_dirty))) <br>
+		| (1 &lt;&lt; PG_referenced);<br>
 	page-&gt;key = key;<br>
 	add_page_to_store_queue(store, page);<br>
 	__add_page_to_hash_queue(page, hash);<br>
@@ -67,6 +69,10 @@<br>
 	if (store &amp;&amp; store-&gt;st_ops &amp;&amp; store-&gt;st_ops-&gt;clear_page) {<br>
 		(store-&gt;st_ops-&gt;clear_page)(store, page, page-&gt;key, &amp;page-&gt;generic_pp);<br>
 	}<br>
+	/* If clear_page left us with a dirty page forget it */<br>
+	if (PageDirty(page)) {<br>
+		mark_page_clean(page);<br>
+	}<br>
 	remove_page_from_hash_queue(page);<br>
 	remove_page_from_store_queue(page);<br>
 	page_cache_release(page);<br>
@@ -151,6 +157,49 @@<br>
 	} while (count);<br>
 }<br>
 <br>
+/* <br>
+ * Make 1 pass through the pages and start I/O on all of the pages.<br>
+ * If an I/O error is detected -EIO is returned.<br>
+ * If non errors are detected 0 is returned.<br>
+ */<br>
+int sync_store_pages(struct vm_store *store)<br>
+{<br>
+	struct page *page, *next;<br>
+	int error = 0;<br>
+	page = store-&gt;st_pages;<br>
+	for(; page != NULL; page = next) {<br>
+		/* compute the next element early in case we sleep and the<br>
+		 * page goes away<br>
+		 */<br>
+		next = page-&gt;next;<br>
+		if (PageLocked(page)) {<br>
+			continue;<br>
+		}<br>
+		if (PageDirty(page)) {<br>
+			error |= write_page(page);<br>
+		}<br>
+	}<br>
+	return error? -EIO : 0;<br>
+}<br>
+<br>
+/* Make 1 pass through the store pages,<br>
+ * waiting on each page is uptodate, and locked.<br>
+ * If an I/O error is detected -EIO is returned<br>
+ */<br>
+int wait_on_store_pages(struct vm_store *store) <br>
+{<br>
+	int error = 0;<br>
+	struct page *page;<br>
+	page = store-&gt;st_pages;<br>
+	for(; page != NULL; page = page-&gt;next) {<br>
+		while (PageLocked(page) &amp;&amp; PageUptodate(page)) {<br>
+			atomic_inc(&amp;page-&gt;count);<br>
+			wait_on_page(page);<br>
+		}<br>
+		error |= PageError(page);<br>
+	}<br>
+	return error? -EIO : 0;<br>
+}<br>
 <br>
 /* <br>
  * Wait for IO to complete on a locked page.<br>
@@ -176,6 +225,7 @@<br>
 	remove_wait_queue(&amp;page-&gt;wait, &amp;wait);<br>
 }<br>
 <br>
+/* Find a freeable page and free it */<br>
 int shrink_mmap(int priority, int gfp_mask)<br>
 {<br>
 	static unsigned long clock = 0;<br>
@@ -207,7 +257,7 @@<br>
 		<br>
 		referenced = test_and_clear_bit(PG_referenced, &amp;page-&gt;flags);<br>
 <br>
-		if (PageLocked(page))<br>
+		if (PageLocked(page) || PageDirty(page))<br>
 			continue;<br>
 <br>
 		if ((gfp_mask &amp; __GFP_DMA) &amp;&amp; !PageDMA(page))<br>
diff -uNrX /home/eric/projects/linux/linux-ignore-files linux-2.3.3.eb6/mm/vmscan.c linux-2.3.3.eb7/mm/vmscan.c<br>
--- linux-2.3.3.eb6/mm/vmscan.c	Sat May 22 17:16:37 1999<br>
+++ linux-2.3.3.eb7/mm/vmscan.c	Sat May 22 18:23:49 1999<br>
@@ -385,6 +385,13 @@<br>
 	/* Always trim SLAB caches when memory gets low. */<br>
 	kmem_cache_reap(gfp_mask);<br>
 <br>
+	/* Write out dirty pages when memory gets low.<br>
+	 * Eventually they will unlock and we can free them if needed.<br>
+	 */<br>
+	if (atomic_read(&amp;nr_dirty_pages)) {<br>
+		wakeup_pgflush(0);<br>
+	}<br>
+ <br>
 	priority = 6;<br>
 	do {<br>
 		while (shrink_mmap(priority, gfp_mask)) {<br>
<p>
-<br>
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in<br>
the body of a message to majordomo@vger.rutgers.edu<br>
Please read the FAQ at <a href="http://www.tux.org/lkml/">http://www.tux.org/lkml/</a><br>
<!-- body="end" -->
<hr>
<p>
<ul>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0274.html">Alan Cox: "Re: 2.2.9 hangs in truncate_inode_pages"</a>
<li> <b>Previous message:</b> <a href="0272.html">Emil Briggs: "Re: Q: PAGE_CACHE_SIZE"</a>
<!-- nextthread="start" -->
<!-- reply="end" -->
</ul>
</font></body>
