aa-093-vm_tunables

Andrew Morton (akpm@zip.com.au)
Tue, 19 Mar 2002 19:57:29 -0800

Messages sorted by: [ date ][ thread ][ subject ][ author ]
Next message: Andrew Morton: "aa-110-zone_accounting"
Previous message: Andrew Morton: "aa-040-touch_buffer"

Introduces a bunch of knobs for tuning the VM. They're described in
the patch.

They are not actually *used* in this patch. The usage creeps in across
subsequent patches.

It's probable that the default icache shrinkage here is insufficiently
aggressive - Al says we should shrink inode_unused with extreme
prejudice - priority == 1.

It's possible that the default dcache shrinkage is too aggressive.
We're shrinking the dcache by 1/6th for every 32 pages which are added
to the swapcache. Restoring those dcache/icache entries will be much,
much more expensive than swapping in 32 pages. So it's out of whack.
I've left it as-is at present; choosing a suitable default for the
shrink_dcache_memory priority is on my immediate things-to-do list.

=====================================

--- 2.4.19-pre3/include/linux/swap.h~aa-093-vm_tunables Tue Mar 19 19:48:54 2002
+++ 2.4.19-pre3-akpm/include/linux/swap.h Tue Mar 19 19:49:15 2002
@@ -112,6 +112,7 @@ extern void swap_setup(void);
/* linux/mm/vmscan.c */
extern wait_queue_head_t kswapd_wait;
extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int));
+extern int vm_vfs_scan_ratio, vm_cache_scan_ratio, vm_lru_balance_ratio, vm_passes, vm_gfp_debug, vm_mapped_ratio;

/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *);
--- 2.4.19-pre3/include/linux/sysctl.h~aa-093-vm_tunables Tue Mar 19 19:48:54 2002
+++ 2.4.19-pre3-akpm/include/linux/sysctl.h Tue Mar 19 19:48:54 2002
@@ -143,6 +143,12 @@ enum
VM_MAX_MAP_COUNT=11, /* int: Maximum number of active map areas */
VM_MIN_READAHEAD=12, /* Min file readahead */
VM_MAX_READAHEAD=13, /* Max file readahead */
+ VM_VFS_SCAN_RATIO=14, /* part of the inactive vfs lists to scan */
+ VM_LRU_BALANCE_RATIO=15,/* balance active and inactive caches */
+ VM_PASSES=16, /* number of vm passes before failing */
+ VM_GFP_DEBUG=17, /* debug GFP failures */
+ VM_CACHE_SCAN_RATIO=18, /* part of the inactive cache list to scan */
+ VM_MAPPED_RATIO=19, /* amount of unfreeable pages that triggers swapout */
};

--- 2.4.19-pre3/kernel/sysctl.c~aa-093-vm_tunables Tue Mar 19 19:48:54 2002
+++ 2.4.19-pre3-akpm/kernel/sysctl.c Tue Mar 19 19:48:54 2002
@@ -30,6 +30,7 @@
#include <linux/init.h>
#include <linux/sysrq.h>
#include <linux/highuid.h>
+#include <linux/swap.h>

#include <asm/uaccess.h>

@@ -260,6 +261,18 @@ static ctl_table kern_table[] = {
};

static ctl_table vm_table[] = {
+ {VM_GFP_DEBUG, "vm_gfp_debug",
+ &vm_gfp_debug, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_VFS_SCAN_RATIO, "vm_vfs_scan_ratio",
+ &vm_vfs_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_CACHE_SCAN_RATIO, "vm_cache_scan_ratio",
+ &vm_cache_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_MAPPED_RATIO, "vm_mapped_ratio",
+ &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_LRU_BALANCE_RATIO, "vm_lru_balance_ratio",
+ &vm_lru_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_PASSES, "vm_passes",
+ &vm_passes, sizeof(int), 0644, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&bdflush_min, &bdflush_max},
--- 2.4.19-pre3/mm/vmscan.c~aa-093-vm_tunables Tue Mar 19 19:48:54 2002
+++ 2.4.19-pre3-akpm/mm/vmscan.c Tue Mar 19 19:49:16 2002
@@ -25,12 +25,42 @@
#include <asm/pgalloc.h>

/*
- * The "priority" of VM scanning is how much of the queues we
- * will scan in one go. A value of 6 for DEF_PRIORITY implies
- * that we'll scan 1/64th of the queues ("queue_length >> 6")
- * during a normal aging round.
+ * "vm_passes" is the number of vm passes before failing the
+ * memory balancing. Take into account 3 passes are needed
+ * for a flush/wait/free cycle and that we only scan 1/vm_cache_scan_ratio
+ * of the inactive list at each pass.
*/
-#define DEF_PRIORITY (6)
+int vm_passes = 60;
+
+/*
+ * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan
+ * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll
+ * scan 1/6 of the inactive lists during a normal aging round.
+ */
+int vm_cache_scan_ratio = 6;
+
+/*
+ * "vm_mapped_ratio" controls the pageout rate, the smaller, the earlier
+ * we'll start to pageout.
+ */
+int vm_mapped_ratio = 100;
+
+/*
+ * "vm_lru_balance_ratio" controls the balance between active and
+ * inactive cache. The bigger vm_balance is, the easier the
+ * active cache will grow, because we'll rotate the active list
+ * slowly. A value of 2 means we'll go towards a balance of
+ * 1/3 of the cache being inactive.
+ */
+int vm_lru_balance_ratio = 2;
+
+/*
+ * "vm_vfs_scan_ratio" is what proportion of the VFS queues we will scan
+ * in one go. A value of 6 for vm_vfs_scan_ratio implies that 1/6th of
+ * the unused-inode, dentry and dquot caches will be freed during a normal
+ * aging round.
+ */
+int vm_vfs_scan_ratio = 6;

/*
* The swap-out function returns 1 if it successfully
@@ -579,7 +609,7 @@ static int shrink_caches(zone_t * classz
shrink_dcache_memory(priority, gfp_mask);
shrink_icache_memory(priority, gfp_mask);
#ifdef CONFIG_QUOTA
- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
+ shrink_dqcache_memory(priority, gfp_mask);
#endif

return nr_pages;
@@ -587,7 +617,7 @@ static int shrink_caches(zone_t * classz

int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
{
- int priority = DEF_PRIORITY;
+ int priority = 6;
int nr_pages = SWAP_CLUSTER_MAX;

gfp_mask = pf_gfp_mask(gfp_mask);
--- 2.4.19-pre3/mm/page_alloc.c~aa-093-vm_tunables Tue Mar 19 19:48:54 2002
+++ 2.4.19-pre3-akpm/mm/page_alloc.c Tue Mar 19 19:49:15 2002
@@ -39,6 +39,8 @@ static int zone_balance_ratio[MAX_NR_ZON
static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };

+int vm_gfp_debug = 0;
+
/*
* Free_page() adds the page to the free lists. This is optimized for
* fast normal cases (no error jumps taken normally).

-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Andrew Morton: "aa-110-zone_accounting"
Previous message: Andrew Morton: "aa-040-touch_buffer"