Linus, Andrew,
Attached is the hugetlbpage patch for 2.5.48 containing following main 
changes:
1) Bug fixes (mainly in the unsuccessful attempts of hugepages).
2) Removal of Radix Tree field in key structure (as it is not needed).
3) Include the IPC_LOCK for permission to use hugepages.
4) Increment the key_counts during forks.
thanks,
rohit
--------------080100060507020703050208
Content-Type: text/plain;
 name="patch2548.1121"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch2548.1121"
--- linux-2.5.48/include/linux/hugetlb.h	Sun Nov 17 20:29:45 2002
+++ linux-2.5.48.work//include/linux/hugetlb.h	Thu Nov 21 11:49:57 2002
@@ -4,7 +4,17 @@
 #ifdef CONFIG_HUGETLB_PAGE
 
 struct ctl_table;
-struct hugetlb_key;
+struct hugetlb_key {
+	struct page *root;
+	loff_t size;
+	atomic_t count;
+	spinlock_t lock;
+	int key;
+	int busy;
+	uid_t uid;
+	gid_t gid;
+	umode_t mode;
+};
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
--- linux-2.5.48/arch/i386/mm/hugetlbpage.c	Sun Nov 17 20:29:55 2002
+++ linux-2.5.48.work/arch/i386/mm/hugetlbpage.c	Thu Nov 21 12:12:18 2002
@@ -19,6 +19,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sysctl.h>
+
 static long    htlbpagemem;
 int     htlbpage_max;
 static long    htlbzone_pages;
@@ -29,18 +31,6 @@
 
 #define MAX_ID 	32
 
-struct hugetlb_key {
-	struct radix_tree_root tree;
-	atomic_t count;
-	spinlock_t lock;
-	int key;
-	int busy;
-	uid_t uid;
-	gid_t gid;
-	umode_t mode;
-	loff_t size;
-};
-
 static struct hugetlb_key htlbpagek[MAX_ID];
 
 static void mark_key_busy(struct hugetlb_key *hugetlb_key)
@@ -81,7 +71,7 @@
 		spin_lock(&htlbpage_lock);
 		hugetlb_key = find_key(key);
 		if (!hugetlb_key) {
-			if (!capable(CAP_SYS_ADMIN) || !in_group_p(0))
+			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
 				hugetlb_key = ERR_PTR(-EPERM);
 			else if (!(flag & IPC_CREAT))
 				hugetlb_key = ERR_PTR(-ENOENT);
@@ -96,7 +86,7 @@
 					hugetlb_key = &htlbpagek[i];
 					mark_key_busy(hugetlb_key);
 					hugetlb_key->key = key;
-					INIT_RADIX_TREE(&hugetlb_key->tree, GFP_ATOMIC);
+					hugetlb_key->root = NULL;
 					hugetlb_key->uid = current->fsuid;
 					hugetlb_key->gid = current->fsgid;
 					hugetlb_key->mode = prot;
@@ -107,7 +97,6 @@
 			hugetlb_key = ERR_PTR(-EAGAIN);
 			spin_unlock(&htlbpage_lock);
 		} else if (check_size_prot(hugetlb_key, len, prot, flag) < 0) {
-			hugetlb_key->key = 0;
 			hugetlb_key = ERR_PTR(-EINVAL);
 		} 
 	} while (hugetlb_key == ERR_PTR(-EAGAIN));
@@ -120,7 +109,10 @@
 {
 	unsigned long index;
 	unsigned long max_idx;
+	struct page *page, *prev;
 
+	if (key == NULL)
+		return;
 	if (!atomic_dec_and_test(&key->count)) {
 		spin_lock(&htlbpage_lock);
 		clear_key_busy(key);
@@ -129,16 +121,19 @@
 	}
 
 	max_idx = (key->size >> HPAGE_SHIFT);
+	page = key->root;
 	for (index = 0; index < max_idx; ++index) {
-		struct page *page = radix_tree_lookup(&key->tree, index);
 		if (!page)
 			continue;
-		huge_page_release(page);
+		prev = page;
+		page = (struct page *)page->private;
+		prev->private = 0UL;
+		huge_page_release(prev);
 	}
 	spin_lock(&htlbpage_lock);
 	key->key = 0;
 	clear_key_busy(key);
-	INIT_RADIX_TREE(&key->tree, GFP_ATOMIC);
+	key->root = NULL;
 	spin_unlock(&htlbpage_lock);
 }
 
@@ -247,7 +242,7 @@
 		vma->vm_end = end;
 	}
 	spin_unlock(&mm->page_table_lock);
-      out_error1:
+out_error1:
 	return -1;
 }
 
@@ -259,7 +254,10 @@
 	struct page *ptepage;
 	unsigned long addr = vma->vm_start;
 	unsigned long end = vma->vm_end;
+	struct hugetlb_key *key = vma->vm_private_data;
 
+	if ( key )
+		atomic_inc(&key->count);
 	while (addr < end) {
 		dst_pte = huge_pte_alloc(dst, addr);
 		if (!dst_pte)
@@ -352,6 +350,8 @@
 	spin_unlock(&htlbpage_lock);
 	for (address = start; address < end; address += HPAGE_SIZE) {
 		pte = huge_pte_offset(mm, address);
+		if (pte_none(*pte))
+			continue;
 		page = pte_page(*pte);
 		huge_page_release(page);
 		pte_clear(pte);
@@ -381,25 +381,10 @@
 	return 0;
 }
 
-struct page *key_find_page(struct hugetlb_key *key, unsigned long index)
-{
-	struct page *page = radix_tree_lookup(&key->tree, index);
-	if (page)
-		get_page(page);
-	return page;
-}
-
-int key_add_page(struct page *page, struct hugetlb_key *key, unsigned long index)
-{
-	int error = radix_tree_insert(&key->tree, index, page);
-	if (!error)
-		get_page(page);
-	return error;
-}
-
-static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma)
+static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma, unsigned long *temp)
 {
 	struct mm_struct *mm = current->mm;
+	struct page *page, *prev;
 	unsigned long addr;
 	int ret = 0;
 
@@ -408,21 +393,18 @@
 
 	spin_lock(&mm->page_table_lock);
 	spin_lock(&key->lock);
+	prev = page = key->root;
 	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
-		unsigned long idx;
 		pte_t *pte = huge_pte_alloc(mm, addr);
-		struct page *page;
 
 		if (!pte) {
+			spin_unlock(&key->lock);
 			ret = -ENOMEM;
 			goto out;
 		}
 		if (!pte_none(*pte))
 			continue;
 
-		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
-			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
-		page = key_find_page(key, idx);
 		if (!page) {
 			page = alloc_hugetlb_page();
 			if (!page) {
@@ -430,13 +412,20 @@
 				ret = -ENOMEM;
 				goto out;
 			}
-			key_add_page(page, key, idx);
+			if (key->root == NULL)
+				key->root = page;
+			else
+				prev->private = (unsigned long)page;
 		}
+		get_page(page);
 		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+		prev = page;
+		page = (struct page *)page->private;
 	}
 	spin_unlock(&key->lock);
 out:
 	spin_unlock(&mm->page_table_lock);
+	*temp = addr;
 	return ret;
 }
 
@@ -446,6 +435,7 @@
 	struct vm_area_struct *vma;
 	struct hugetlb_key *hugetlb_key;
 	int retval = -ENOMEM;
+	unsigned long temp;
 
 	hugetlb_key = alloc_key(key, len, prot, flag );
 	spin_unlock(&htlbpage_lock);
@@ -455,17 +445,18 @@
 	addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
 			MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0);
 	if (IS_ERR((void *) addr))
-		goto out_release;
+		goto out;
 
 	vma = find_vma(mm, addr);
 	if (!vma) {
 		retval = -EINVAL;
-		goto out_release;
+		goto out;
 	}
 
-	retval = prefault_key(hugetlb_key, vma);
+	retval = prefault_key(hugetlb_key, vma, &temp);
+	addr = temp;
 	if (retval)
-		goto out;
+		goto out_release;
 
 	vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
 	vma->vm_ops = &hugetlb_vm_ops;
@@ -474,7 +465,7 @@
 	clear_key_busy(hugetlb_key);
 	spin_unlock(&htlbpage_lock);
 	return retval;
-out:
+out_release:
 	if (addr > vma->vm_start) {
 		unsigned long raddr;
 		raddr = vma->vm_end;
@@ -482,10 +473,8 @@
 		zap_hugepage_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 		vma->vm_end = raddr;
 	}
-	spin_lock(&mm->page_table_lock);
 	do_munmap(mm, vma->vm_start, len);
-	spin_unlock(&mm->page_table_lock);
-out_release:
+out:
 	hugetlb_release_key(hugetlb_key);
 	return retval;
 }
@@ -533,10 +522,8 @@
 
 static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
 {
-	if (!capable(CAP_SYS_ADMIN)) {
-		if (!in_group_p(0))
-			return -EPERM;
-	}
+	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
+		return -EPERM;
 	addr = do_mmap_pgoff(NULL, addr, len, prot,
 			MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
 	if (IS_ERR((void *) addr))
--- linux-2.5.48/arch/i386/kernel/sys_i386.c	Sun Nov 17 20:29:56 2002
+++ linux-2.5.48.work/arch/i386/kernel/sys_i386.c	Thu Nov 21 12:01:08 2002
@@ -294,17 +294,17 @@
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	struct hugetlb_key *key;
 	int retval;
 
-	vma = find_vma(current->mm, addr);
-	if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr)
-		return -EINVAL;
 	down_write(&mm->mmap_sem);
-	key = (struct hugetlb_key *)vma->vm_private_data;
+	vma = find_vma(current->mm, addr);
+	if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr) {
+		retval =  -EINVAL;
+		goto out;
+	}
 	retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr);
+out:
 	up_write(&mm->mmap_sem);
-	hugetlb_release_key(key);
 	return retval;
 }
 #else
--------------080100060507020703050208--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/