Re: Maximum swap space?

William Lee Irwin III (wli@holomorphy.com)
Sat, 7 Jun 2003 17:55:43 -0700


On Sat, Jun 07, 2003 at 01:50:46PM -0700, William Lee Irwin III wrote:
> These limits could be slightly relaxed by the kernel with some slightly
> more complex bit twiddlings to recover up to 6 bits of the lower byte
> of a non-present PTE for 64 swapfiles. The limitation on size seems to
> be in userspace. It appears the kernel has 24 bits for offsets in 4KB
> units, for up to something approaching 64GB swapfiles. Andi Kleen tells
> me newer distributions have fixed the mkswap(8) userspace limitation.
> So non-PAE x86 should be able to do 4TB of aggregate swapspace modulo
> vmallocspace and/or ZONE_NORMAL exhaustion from swap maps. Also, PAE
> should be able to do 64TB of aggregate swapspace (modulo vmallocespace)
> since it has an additional 4 bits usage for page offsets. But I didn't
> audit intensively, so some silly limits may be lurking in dark corners.

Santamarta on #kn tested the following patch to allow up to 64
swapfiles.

diff -prauN linux-2.5.70/include/asm-i386/pgtable.h swap-2.5.70/include/asm-i386/pgtable.h
--- linux-2.5.70/include/asm-i386/pgtable.h Thu May 1 19:15:41 2003
+++ swap-2.5.70/include/asm-i386/pgtable.h Sat Jun 7 16:47:04 2003
@@ -106,6 +106,7 @@
#define _PAGE_BIT_PCD 4
#define _PAGE_BIT_ACCESSED 5
#define _PAGE_BIT_DIRTY 6
+#define _PAGE_BIT_FILE 6
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */

@@ -320,12 +321,38 @@
*/
#define update_mmu_cache(vma,address,pte) do { } while (0)

-/* Encode and de-code a swap entry */
-#define __swp_type(x) (((x).val >> 1) & 0x1f)
+/*
+ * Encode and de-code a swap entry
+ * PAE could use more swapspace if swp_entry_t were wider, as there
+ * is an additional word in PTE's with 4 bits available. The benefit
+ * of extending it for such is, however, questionable.
+ */
+
#define __swp_offset(x) ((x).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+/*
+ * Bit 0 is reserved for present/non-present, and _PAGE_BIT_FILE is
+ * reserved for non-present PTE's representing file pages.
+ */
+#define __swp_type(entry) \
+({ \
+ unsigned long __val__ = (entry).val; \
+ (__val__ & (_PAGE_FILE - 1)) >> 1 \
+ | (__val__ & (_PAGE_FILE << 1)) >> 2; \
+})
+
+#define __swp_entry(type, offset) \
+({ \
+ unsigned long __type__ = type; \
+ (swp_entry_t) \
+ { (offset) << 8 \
+ | ((__type__ << 1) & (_PAGE_FILE - 1)) \
+ | ((__type__ << 2) & (_PAGE_FILE << 1)) }; \
+})
+
+#define MAX_SWAPFILES_SHIFT _PAGE_BIT_FILE

#endif /* !__ASSEMBLY__ */

diff -prauN linux-2.5.70/include/linux/swap.h swap-2.5.70/include/linux/swap.h
--- linux-2.5.70/include/linux/swap.h Wed May 7 21:19:58 2003
+++ swap-2.5.70/include/linux/swap.h Sat Jun 7 15:09:28 2003
@@ -10,6 +10,7 @@
#include <linux/sched.h>
#include <asm/atomic.h>
#include <asm/page.h>
+#include <asm/pgtable.h>

#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
@@ -25,10 +26,14 @@
* be swapped to. The swap type and the offset into that swap type are
* encoded into pte's and into pgoff_t's in the swapcache. Using five bits
* for the type means that the maximum number of swapcache pages is 27 bits
- * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs
- * the type/offset into the pte as 5/27 as well.
+ * on 32-bit-pgoff_t architectures. And that assumes that the
+ * architecture packs the type/offset into the pte as 5/27 as well.
+ * Architectures can override this by simply defining MAX_SWAPFILES_SHIFT
+ * in appropriate headers.
*/
+#ifndef MAX_SWAPFILES_SHIFT
#define MAX_SWAPFILES_SHIFT 5
+#endif
#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT)

/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/