Re: [RFC][PATCH] Re: Linux 2.4.4-ac10

Mike Galbraith (mikeg@wen-online.de)
Thu, 24 May 2001 10:48:48 +0200 (CEST)


On Sun, 20 May 2001, Rik van Riel wrote:

> Remember that inactive_clean pages are always immediately
> reclaimable by __alloc_pages(), if you measured a performance
> difference by freeing pages in a different way I'm pretty sure
> it's a side effect of something else. What that something
> else is I'm curious to find out, but I'm pretty convinced that
> throwing away data early isn't the way to go.

OK.. let's forget about throughput for a moment and consider
those annoying reports of 0 order allocations failing :)

What do you think of the below (ignore the refill_inactive bit)
wrt allocator reliability under heavy stress? The thing does
kick in and pump up zones even if I set the 'blood donor' level
to pages_min.

-Mike

--- linux-2.4.5-pre3/mm/page_alloc.c.org Mon May 21 10:35:06 2001
+++ linux-2.4.5-pre3/mm/page_alloc.c Thu May 24 08:18:36 2001
@@ -224,10 +224,11 @@
unsigned long order, int limit, int direct_reclaim)
{
zone_t **zone = zonelist->zones;
+ struct page *page = NULL;

for (;;) {
zone_t *z = *(zone++);
- unsigned long water_mark;
+ unsigned long water_mark = 1 << order;

if (!z)
break;
@@ -249,18 +250,44 @@
case PAGES_HIGH:
water_mark = z->pages_high;
}
+ if (z->free_pages + z->inactive_clean_pages < water_mark)
+ continue;

- if (z->free_pages + z->inactive_clean_pages > water_mark) {
- struct page *page = NULL;
- /* If possible, reclaim a page directly. */
- if (direct_reclaim && z->free_pages < z->pages_min + 8)
+ if (direct_reclaim) {
+ int count;
+
+ /* If we're in bad shape.. */
+ if (z->free_pages < z->pages_low && z->inactive_clean_pages) {
+ count = 4 * (1 << page_cluster);
+ /* reclaim a page for ourselves if we can afford to.. */
+ if (z->inactive_clean_pages > count)
+ page = reclaim_page(z);
+ if (z->inactive_clean_pages < 2 * count)
+ count = z->inactive_clean_pages / 2;
+ } else count = 0;
+
+ /*
+ * and make a small donation to the reclaim challenged.
+ *
+ * We don't ever want a zone to reach the state where we
+ * have nothing except reclaimable pages left.. not if
+ * we can possibly do something to help prevent it.
+ */
+ while (count--) {
+ struct page *page;
page = reclaim_page(z);
- /* If that fails, fall back to rmqueue. */
- if (!page)
- page = rmqueue(z, order);
- if (page)
- return page;
+ if (!page)
+ break;
+ __free_page(page);
+ }
}
+ if (!page)
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ if (z->inactive_clean_pages - z->free_pages > z->pages_low
+ && waitqueue_active(&kreclaimd_wait))
+ wake_up_interruptible(&kreclaimd_wait);
}

/* Found nothing. */
@@ -314,29 +341,6 @@
wakeup_bdflush(0);

try_again:
- /*
- * First, see if we have any zones with lots of free memory.
- *
- * We allocate free memory first because it doesn't contain
- * any data ... DUH!
- */
- zone = zonelist->zones;
- for (;;) {
- zone_t *z = *(zone++);
- if (!z)
- break;
- if (!z->size)
- BUG();
-
- if (z->free_pages >= z->pages_low) {
- page = rmqueue(z, order);
- if (page)
- return page;
- } else if (z->free_pages < z->pages_min &&
- waitqueue_active(&kreclaimd_wait)) {
- wake_up_interruptible(&kreclaimd_wait);
- }
- }

/*
* Try to allocate a page from a zone with a HIGH
--- linux-2.4.5-pre3/mm/vmscan.c.org Thu May 17 16:44:23 2001
+++ linux-2.4.5-pre3/mm/vmscan.c Thu May 24 08:05:21 2001
@@ -824,39 +824,17 @@
#define DEF_PRIORITY (6)
static int refill_inactive(unsigned int gfp_mask, int user)
{
- int count, start_count, maxtry;
-
- if (user) {
- count = (1 << page_cluster);
- maxtry = 6;
- } else {
- count = inactive_shortage();
- maxtry = 1 << DEF_PRIORITY;
- }
-
- start_count = count;
- do {
- if (current->need_resched) {
- __set_current_state(TASK_RUNNING);
- schedule();
- if (!inactive_shortage())
- return 1;
- }
-
- count -= refill_inactive_scan(DEF_PRIORITY, count);
- if (count <= 0)
- goto done;
-
- /* If refill_inactive_scan failed, try to page stuff out.. */
- swap_out(DEF_PRIORITY, gfp_mask);
-
- if (--maxtry <= 0)
- return 0;
-
- } while (inactive_shortage());
-
-done:
- return (count < start_count);
+ int shortage = inactive_shortage();
+ int large = freepages.high/2;
+ int scale;
+
+ scale = shortage/large;
+ scale += free_shortage()/large;
+ if (scale > DEF_PRIORITY-1)
+ scale = DEF_PRIORITY-1;
+ if (refill_inactive_scan(DEF_PRIORITY-scale, shortage) < shortage)
+ return swap_out(DEF_PRIORITY, gfp_mask);
+ return 1;
}

static int do_try_to_free_pages(unsigned int gfp_mask, int user)
@@ -976,8 +954,9 @@
* We go to sleep for one second, but if it's needed
* we'll be woken up earlier...
*/
- if (!free_shortage() || !inactive_shortage()) {
- interruptible_sleep_on_timeout(&kswapd_wait, HZ);
+ if (current->need_resched || !free_shortage() ||
+ !inactive_shortage()) {
+ interruptible_sleep_on_timeout(&kswapd_wait, HZ/10);
/*
* If we couldn't free enough memory, we see if it was
* due to the system just not having enough memory.
@@ -1051,10 +1030,13 @@
int i;
for(i = 0; i < MAX_NR_ZONES; i++) {
zone_t *zone = pgdat->node_zones + i;
+ int count;
if (!zone->size)
continue;

- while (zone->free_pages < zone->pages_low) {
+ count = zone->pages_low;
+ while (zone->free_pages < zone->inactive_clean_pages &&
+ count--) {
struct page * page;
page = reclaim_page(zone);
if (!page)
@@ -1064,6 +1046,9 @@
}
pgdat = pgdat->node_next;
} while (pgdat);
+#if 1
+ run_task_queue(&tq_disk);
+#endif
}
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/