per-cpu pte_chain freelists

William Lee Irwin III (wli@holomorphy.com)
Tue, 21 May 2002 16:02:14 -0700

Messages sorted by: [ date ][ thread ][ subject ][ author ]
Next message: Paul Mackerras: "Re: Make 2.5.17 TLB even more friendlier"
Previous message: Linus Torvalds: "Re: suspend-to-{RAM,disk} for 2.5.17"

Seems to do a little better than per-zone.

Cheers,
Bill

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.423 -> 1.424
# mm/rmap.c 1.7 -> 1.8
# include/linux/mmzone.h 1.11 -> 1.12
# mm/page_alloc.c 1.47 -> 1.48
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/05/21 wli@tisifone.holomorphy.com 1.424
# per-cpu pte_chain freelists
# --------------------------------------------
#
diff --minimal -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h Tue May 21 15:55:48 2002
+++ b/include/linux/mmzone.h Tue May 21 15:55:48 2002
@@ -29,6 +29,11 @@

#define MAX_CHUNKS_PER_NODE 8

+struct pte_chain_freelist {
+ unsigned long count;
+ struct pte_chain *list;
+};
+
/*
* On machines where it is needed (eg PCs) we divide physical memory
* into multiple physical zones. On a PC we have 3 zones:
@@ -56,8 +61,8 @@
struct list_head inactive_dirty_list;
struct list_head inactive_clean_list;
free_area_t free_area[MAX_ORDER];
- spinlock_t pte_chain_freelist_lock;
- struct pte_chain *pte_chain_freelist;
+
+ struct pte_chain_freelist pte_chain_freelists[NR_CPUS];

/*
* wait_table -- the array holding the hash table
diff --minimal -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c Tue May 21 15:55:48 2002
+++ b/mm/page_alloc.c Tue May 21 15:55:48 2002
@@ -908,11 +908,13 @@
zone->inactive_clean_pages = 0;
zone->inactive_dirty_pages = 0;
zone->need_balance = 0;
- zone->pte_chain_freelist = NULL;
INIT_LIST_HEAD(&zone->active_list);
INIT_LIST_HEAD(&zone->inactive_dirty_list);
INIT_LIST_HEAD(&zone->inactive_clean_list);
- spin_lock_init(&zone->pte_chain_freelist_lock);
+ for (i = 0; i < NR_CPUS; ++i) {
+ zone->pte_chain_freelists[i].count = 0;
+ zone->pte_chain_freelists[i].list = NULL;
+ }

if (!size)
continue;
diff --minimal -Nru a/mm/rmap.c b/mm/rmap.c
--- a/mm/rmap.c Tue May 21 15:55:48 2002
+++ b/mm/rmap.c Tue May 21 15:55:48 2002
@@ -51,7 +51,7 @@
static inline struct pte_chain * pte_chain_alloc(zone_t *);
static inline void pte_chain_free(struct pte_chain *, struct pte_chain *,
struct page *, zone_t *);
-static void alloc_new_pte_chains(zone_t *);
+static void alloc_new_pte_chains(zone_t *, int cpu);

/**
* page_referenced - test if the page was referenced
@@ -335,25 +335,36 @@
** functions.
**/

-static inline void pte_chain_push(zone_t * zone,
- struct pte_chain * pte_chain)
+static inline void pte_chain_push(struct pte_chain ** list, struct pte_chain * pte_chain)
{
pte_chain->ptep = NULL;
- pte_chain->next = zone->pte_chain_freelist;
- zone->pte_chain_freelist = pte_chain;
+ pte_chain->next = *list;
+ *list = pte_chain;
}

-static inline struct pte_chain * pte_chain_pop(zone_t * zone)
+static inline struct pte_chain * pte_chain_pop(struct pte_chain ** list)
{
struct pte_chain *pte_chain;

- pte_chain = zone->pte_chain_freelist;
- zone->pte_chain_freelist = pte_chain->next;
+ pte_chain = *list;
+ *list = pte_chain->next;
pte_chain->next = NULL;

return pte_chain;
}

+static inline void add_to_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu, struct pte_chain * pte_chain)
+{
+ pte_chain_push(&freelists[cpu].list, pte_chain);
+ freelists[cpu].count++;
+}
+
+static inline struct pte_chain * del_from_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu)
+{
+ freelists[cpu].count--;
+ return pte_chain_pop(&freelists[cpu].list);
+}
+
/**
* pte_chain_free - free pte_chain structure
* @pte_chain: pte_chain struct to free
@@ -370,14 +381,17 @@
struct pte_chain * prev_pte_chain, struct page * page,
zone_t * zone)
{
+ int cpu = smp_processor_id();
+ struct pte_chain_freelist *freelists;
+
+ freelists = zone->pte_chain_freelists;
+
if (prev_pte_chain)
prev_pte_chain->next = pte_chain->next;
else if (page)
page->pte_chain = pte_chain->next;

- spin_lock(&zone->pte_chain_freelist_lock);
- pte_chain_push(zone, pte_chain);
- spin_unlock(&zone->pte_chain_freelist_lock);
+ add_to_pte_chain_freelist(freelists, cpu, pte_chain);
}

/**
@@ -391,17 +405,19 @@
static inline struct pte_chain * pte_chain_alloc(zone_t * zone)
{
struct pte_chain * pte_chain;
+ struct pte_chain_freelist *freelists;
+ int cpu = smp_processor_id();
+
+ freelists = zone->pte_chain_freelists;

- spin_lock(&zone->pte_chain_freelist_lock);

/* Allocate new pte_chain structs as needed. */
- if (!zone->pte_chain_freelist)
- alloc_new_pte_chains(zone);
+ if (!freelists[cpu].list)
+ alloc_new_pte_chains(zone, cpu);

/* Grab the first pte_chain from the freelist. */
- pte_chain = pte_chain_pop(zone);
+ pte_chain = del_from_pte_chain_freelist(freelists, cpu);

- spin_unlock(&zone->pte_chain_freelist_lock);

return pte_chain;
}
@@ -409,6 +425,7 @@
/**
* alloc_new_pte_chains - convert a free page to pte_chain structures
* @zone: memory zone to allocate pte_chains for
+ * @cpu: cpu pte_chains are to be allocated for
*
* Grabs a free page and converts it to pte_chain structures. We really
* should pre-allocate these earlier in the pagefault path or come up
@@ -416,18 +433,33 @@
*
* Note that we cannot use the slab cache because the pte_chain structure
* is way smaller than the minimum size of a slab cache allocation.
- * Caller needs to hold the zone->pte_chain_freelist_lock
+ * Caller needs to hold &zone->pte_chain_freelists[cpu].lock
*/
-static void alloc_new_pte_chains(zone_t *zone)
+
+#define PTE_CHAINS_PER_PAGE (PAGE_SIZE/sizeof(struct pte_chain))
+
+static void alloc_new_pte_chains(zone_t *zone, int cpu)
{
- struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
- int i = PAGE_SIZE / sizeof(struct pte_chain);
+ struct pte_chain * pte_chain;
+ struct pte_chain_freelist *freelists = zone->pte_chain_freelists;
+ int i;

- if (pte_chain) {
- for (; i-- > 0; pte_chain++)
- pte_chain_push(zone, pte_chain);
- } else {
- /* Yeah yeah, I'll fix the pte_chain allocation ... */
+ /*
+ * Atomically allocate a page and hand it back. Things are not
+ * highly unbalanced or there is good reason to allocate, so
+ * actually get a fresh page.
+ */
+ pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
+
+ /* Yeah yeah, I'll fix the pte_chain allocation ... */
+ if (!pte_chain)
panic("Fix pte_chain allocation, you lazy bastard!\n");
- }
+
+ /*
+ * Be greedy and give ourselves the chains. If some cpu wants
+ * them, it'll eventually end up taking them above.
+ */
+ freelists[cpu].count += PTE_CHAINS_PER_PAGE;
+ for (i = 0; i < PTE_CHAINS_PER_PAGE; ++i)
+ pte_chain_push(&freelists[cpu].list, &pte_chain[i]);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Paul Mackerras: "Re: Make 2.5.17 TLB even more friendlier"
Previous message: Linus Torvalds: "Re: suspend-to-{RAM,disk} for 2.5.17"