[PATCH] 3/5 VM changes: dont-rotate-active-list.patch

Nikita Danilov (Nikita@Namesys.COM)
Wed, 9 Jul 2003 12:47:12 +0400


Currently, if zone is short on free pages, refill_inactive_zone() starts
moving pages from active_list to inactive_list, rotating active_list as it
goes. That is, pages from the tail of active_list are transferred to its head,
thus destroying lru ordering, exactly when we need it most --- when system is
low on free memory and page replacement has to be performed.

This patch modifies refill_inactive_zone() so that it scans active_list
without rotating it. To achieve this, special dummy page zone->scan_page
is maintained for each zone. This page marks a place in the active_list
reached during scanning.

As an additional bonus, if memory pressure is not so big as to start swapping
mapped pages (reclaim_mapped == 0 in refill_inactive_zone()), then not
referenced mapped pages can be left behind zone->scan_page instead of moving
them to the head of active_list. When reclaim_mapped mode is activated,
zone->scan_page is reset back to the tail of active_list so that these pages
can be re-scanned.

diff -puN include/linux/mmzone.h~dont-rotate-active-list include/linux/mmzone.h
--- i386/include/linux/mmzone.h~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/include/linux/mmzone.h Wed Jul 9 12:24:51 2003
@@ -146,6 +146,12 @@ struct zone {
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;

+ /*
+ * dummy page used as place holder during scanning of
+ * active_list in refill_inactive_zone()
+ */
+ struct page *scan_page;
+
/*
* rarely used fields:
*/
diff -puN mm/page_alloc.c~dont-rotate-active-list mm/page_alloc.c
--- i386/mm/page_alloc.c~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/mm/page_alloc.c Wed Jul 9 12:24:51 2003
@@ -1201,6 +1201,9 @@ void __init memmap_init_zone(struct page
memmap_init_zone((start), (size), (nid), (zone), (start_pfn))
#endif

+/* dummy pages used to scan active lists */
+static struct page scan_pages[MAX_NR_NODES][MAX_NR_ZONES];
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -1223,6 +1226,7 @@ static void __init free_area_init_core(s
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize;
unsigned long batch;
+ struct page *scan_page;

zone_table[nid * MAX_NR_ZONES + j] = zone;
realsize = size = zones_size[j];
@@ -1275,6 +1279,22 @@ static void __init free_area_init_core(s
atomic_set(&zone->refill_counter, 0);
zone->nr_active = 0;
zone->nr_inactive = 0;
+
+ /* initialize dummy page used for scanning */
+ scan_page = &scan_pages[nid][j];
+ zone->scan_page = scan_page;
+ memset(scan_page, 0, sizeof *scan_page);
+ scan_page->flags =
+ (1 << PG_locked) |
+ (1 << PG_error) |
+ (1 << PG_lru) |
+ (1 << PG_active) |
+ (1 << PG_reserved);
+ set_page_zone(scan_page, j);
+ page_cache_get(scan_page);
+ INIT_LIST_HEAD(&scan_page->list);
+ list_add(&scan_page->lru, &zone->active_list);
+
if (!size)
continue;

diff -puN mm/vmscan.c~dont-rotate-active-list mm/vmscan.c
--- i386/mm/vmscan.c~dont-rotate-active-list Wed Jul 9 12:24:51 2003
+++ i386-god/mm/vmscan.c Wed Jul 9 12:24:51 2003
@@ -49,14 +49,15 @@
int vm_swappiness = 60;
static long total_memory;

+#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
+
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
- prev = list_entry(_page->lru.prev, \
- struct page, lru); \
+ prev = lru_to_page(&(_page)->lru); \
prefetch(&prev->_field); \
} \
} while (0)
@@ -70,8 +71,7 @@ static long total_memory;
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
- prev = list_entry(_page->lru.prev, \
- struct page, lru); \
+ prev = lru_to_page(&(_page)->lru); \
prefetchw(&prev->_field); \
} \
} while (0)
@@ -350,7 +350,7 @@ shrink_list(struct list_head *page_list,
struct page *page;
int may_enter_fs;

- page = list_entry(page_list->prev, struct page, lru);
+ page = lru_to_page(page_list);
list_del(&page->lru);

if (TestSetPageLocked(page))
@@ -552,8 +552,7 @@ shrink_cache(const int nr_pages, struct

while (nr_scan++ < nr_to_process &&
!list_empty(&zone->inactive_list)) {
- page = list_entry(zone->inactive_list.prev,
- struct page, lru);
+ page = lru_to_page(&zone->inactive_list);

prefetchw_prev_lru_page(page,
&zone->inactive_list, flags);
@@ -591,7 +590,7 @@ shrink_cache(const int nr_pages, struct
* Put back any unfreeable pages.
*/
while (!list_empty(&page_list)) {
- page = list_entry(page_list.prev, struct page, lru);
+ page = lru_to_page(&page_list);
if (TestSetPageLRU(page))
BUG();
list_del(&page->lru);
@@ -612,6 +611,39 @@ done:
return ret;
}

+
+/* move pages from @page_list to the @spot, that should be somewhere on the
+ * @zone->active_list */
+static int
+spill_on_spot(struct zone *zone,
+ struct list_head *page_list, struct list_head *spot,
+ struct pagevec *pvec)
+{
+ struct page *page;
+ int moved;
+
+ moved = 0;
+ while (!list_empty(page_list)) {
+ page = lru_to_page(page_list);
+ prefetchw_prev_lru_page(page, page_list, flags);
+ if (TestSetPageLRU(page))
+ BUG();
+ BUG_ON(!PageActive(page));
+ list_move(&page->lru, spot);
+ moved++;
+ if (!pagevec_add(pvec, page)) {
+ zone->nr_active += moved;
+ moved = 0;
+ spin_unlock_irq(&zone->lru_lock);
+ __pagevec_release(pvec);
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+ return moved;
+}
+
+
+
/*
* This moves pages from the active list to the inactive list.
*
@@ -638,37 +670,17 @@ refill_inactive_zone(struct zone *zone,
int nr_pages = nr_pages_in;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
- LIST_HEAD(l_active); /* Pages to go onto the active_list */
+ LIST_HEAD(l_ignore); /* Pages to be returned to the active_list */
+ LIST_HEAD(l_active); /* Pages to go onto the head of the
+ * active_list */
struct page *page;
+ struct page *scan;
struct pagevec pvec;
int reclaim_mapped = 0;
long mapped_ratio;
long distress;
long swap_tendency;

- lru_add_drain();
- pgmoved = 0;
- spin_lock_irq(&zone->lru_lock);
- while (nr_pages && !list_empty(&zone->active_list)) {
- page = list_entry(zone->active_list.prev, struct page, lru);
- prefetchw_prev_lru_page(page, &zone->active_list, flags);
- if (!TestClearPageLRU(page))
- BUG();
- list_del(&page->lru);
- if (page_count(page) == 0) {
- /* It is currently in pagevec_release() */
- SetPageLRU(page);
- list_add(&page->lru, &zone->active_list);
- } else {
- page_cache_get(page);
- list_add(&page->lru, &l_hold);
- pgmoved++;
- }
- nr_pages--;
- }
- zone->nr_active -= pgmoved;
- spin_unlock_irq(&zone->lru_lock);
-
/*
* `distress' is a measure of how much trouble we're having reclaiming
* pages. 0 -> no problems. 100 -> great trouble.
@@ -700,10 +712,53 @@ refill_inactive_zone(struct zone *zone,
if (swap_tendency >= 100)
reclaim_mapped = 1;

+ scan = zone->scan_page;
+ lru_add_drain();
+ pgmoved = 0;
+ spin_lock_irq(&zone->lru_lock);
+ if (reclaim_mapped) {
+ /*
+ * When scanning active_list with !reclaim_mapped mapped
+ * inactive pages are left behind zone->scan_page. If zone is
+ * switched to reclaim_mapped mode reset zone->scan_page to
+ * the end of inactive list so that inactive mapped pages are
+ * re-scanned.
+ */
+ list_move_tail(&scan->lru, &zone->active_list);
+ }
+ while (nr_pages && zone->active_list.prev != zone->active_list.next) {
+ /*
+ * if head of active list reached---wrap to the tail
+ */
+ if (scan->lru.prev == &zone->active_list)
+ list_move_tail(&scan->lru, &zone->active_list);
+ page = lru_to_page(&scan->lru);
+ prefetchw_prev_lru_page(page, &zone->active_list, flags);
+ if (!TestClearPageLRU(page))
+ BUG();
+ list_del(&page->lru);
+ if (page_count(page) == 0) {
+ /* It is currently in pagevec_release() */
+ SetPageLRU(page);
+ list_add(&page->lru, &zone->active_list);
+ } else {
+ page_cache_get(page);
+ list_add(&page->lru, &l_hold);
+ pgmoved++;
+ }
+ nr_pages--;
+ }
+ zone->nr_active -= pgmoved;
+ spin_unlock_irq(&zone->lru_lock);
+
while (!list_empty(&l_hold)) {
- page = list_entry(l_hold.prev, struct page, lru);
+ page = lru_to_page(&l_hold);
list_del(&page->lru);
if (page_mapped(page)) {
+ /*
+ * probably it would be useful to transfer dirty bit
+ * from pte to the @page here.
+ */
pte_chain_lock(page);
if (page_mapped(page) && page_referenced(page)) {
pte_chain_unlock(page);
@@ -712,7 +767,7 @@ refill_inactive_zone(struct zone *zone,
}
pte_chain_unlock(page);
if (!reclaim_mapped) {
- list_add(&page->lru, &l_active);
+ list_add(&page->lru, &l_ignore);
continue;
}
}
@@ -732,7 +787,7 @@ refill_inactive_zone(struct zone *zone,
pgmoved = 0;
spin_lock_irq(&zone->lru_lock);
while (!list_empty(&l_inactive)) {
- page = list_entry(l_inactive.prev, struct page, lru);
+ page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
if (TestSetPageLRU(page))
BUG();
@@ -760,23 +815,9 @@ refill_inactive_zone(struct zone *zone,
spin_lock_irq(&zone->lru_lock);
}

- pgmoved = 0;
- while (!list_empty(&l_active)) {
- page = list_entry(l_active.prev, struct page, lru);
- prefetchw_prev_lru_page(page, &l_active, flags);
- if (TestSetPageLRU(page))
- BUG();
- BUG_ON(!PageActive(page));
- list_move(&page->lru, &zone->active_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_active += pgmoved;
- pgmoved = 0;
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
+ pgmoved = spill_on_spot(zone, &l_active, &zone->active_list, &pvec);
+ zone->nr_active += pgmoved;
+ pgmoved = spill_on_spot(zone, &l_ignore, &scan->lru, &pvec);
zone->nr_active += pgmoved;
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);

_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/