1 files changed, 220 insertions, 0 deletions
diff --git a/trunk/2.6.22/20069_xen-split-pt-lock.patch1 b/trunk/2.6.22/20069_xen-split-pt-lock.patch1
new file mode 100644
index 0000000..0b45a81
--- /dev/null
+++ b/trunk/2.6.22/20069_xen-split-pt-lock.patch1
@@ -0,0 +1,220 @@
+From: jbeulich@novell.com
+Subject: allow use of split page table locks
+Patch-mainline: obsolete
+
+---
+ arch/i386/mm/pgtable-xen.c    |   66 +++++++++++++++++++++++++++++++++++++++---
+ arch/x86_64/mm/pageattr-xen.c |   66 +++++++++++++++++++++++++++++++++++++++---
+ mm/Kconfig                    |    3 -
+ 3 files changed, 124 insertions(+), 11 deletions(-)
+
+--- a/arch/i386/mm/pgtable-xen.c	2007-08-27 14:01:27.000000000 -0400
++++ b/arch/i386/mm/pgtable-xen.c	2007-08-27 14:01:27.000000000 -0400
+@@ -658,6 +658,64 @@ void make_pages_writable(void *va, unsig
+ 	}
+ }
+ 
++static void _pin_lock(struct mm_struct *mm, int lock) {
++	if (lock)
++		spin_lock(&mm->page_table_lock);
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++	/* While mm->page_table_lock protects us against insertions and
++	 * removals of higher level page table pages, it doesn't protect
++	 * against updates of pte-s. Such updates, however, require the
++	 * pte pages to be in consistent state (unpinned+writable or
++	 * pinned+readonly). The pinning and attribute changes, however
++	 * cannot be done atomically, which is why such updates must be
++	 * prevented from happening concurrently.
++	 * Note that no pte lock can ever elsewhere be acquired nesting
++	 * with an already acquired one in the same mm, or with the mm's
++	 * page_table_lock already acquired, as that would break in the
++	 * non-split case (where all these are actually resolving to the
++	 * one page_table_lock). Thus acquiring all of them here is not
++	 * going to result in dead locks, and the order of acquires
++	 * doesn't matter.
++	 */
++	{
++		pgd_t *pgd = mm->pgd;
++		unsigned g;
++
++		for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
++			pud_t *pud;
++			unsigned u;
++
++			if (pgd_none(*pgd))
++				continue;
++			pud = pud_offset(pgd, 0);
++			for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++				pmd_t *pmd;
++				unsigned m;
++
++				if (pud_none(*pud))
++					continue;
++				pmd = pmd_offset(pud, 0);
++				for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++					spinlock_t *ptl;
++
++					if (pmd_none(*pmd))
++						continue;
++					ptl = pte_lockptr(0, pmd);
++					if (lock)
++						spin_lock(ptl);
++					else
++						spin_unlock(ptl);
++				}
++			}
++		}
++	}
++#endif
++	if (!lock)
++		spin_unlock(&mm->page_table_lock);
++}
++#define pin_lock(mm) _pin_lock(mm, 1)
++#define pin_unlock(mm) _pin_lock(mm, 0)
++
+ static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
+ {
+ 	unsigned long pfn = page_to_pfn(page);
+@@ -740,18 +798,18 @@ void mm_pin(struct mm_struct *mm)
+ {
+ 	if (xen_feature(XENFEAT_writable_page_tables))
+ 		return;
+-	spin_lock(&mm->page_table_lock);
++	pin_lock(mm);
+ 	__pgd_pin(mm->pgd);
+-	spin_unlock(&mm->page_table_lock);
++	pin_unlock(mm);
+ }
+ 
+ void mm_unpin(struct mm_struct *mm)
+ {
+ 	if (xen_feature(XENFEAT_writable_page_tables))
+ 		return;
+-	spin_lock(&mm->page_table_lock);
++	pin_lock(mm);
+ 	__pgd_unpin(mm->pgd);
+-	spin_unlock(&mm->page_table_lock);
++	pin_unlock(mm);
+ }
+ 
+ void mm_pin_all(void)
+--- a/arch/x86_64/mm/pageattr-xen.c	2007-08-27 14:01:27.000000000 -0400
++++ b/arch/x86_64/mm/pageattr-xen.c	2007-08-27 14:01:27.000000000 -0400
+@@ -20,6 +20,64 @@
+ LIST_HEAD(mm_unpinned);
+ DEFINE_SPINLOCK(mm_unpinned_lock);
+ 
++static void _pin_lock(struct mm_struct *mm, int lock) {
++	if (lock)
++		spin_lock(&mm->page_table_lock);
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++	/* While mm->page_table_lock protects us against insertions and
++	 * removals of higher level page table pages, it doesn't protect
++	 * against updates of pte-s. Such updates, however, require the
++	 * pte pages to be in consistent state (unpinned+writable or
++	 * pinned+readonly). The pinning and attribute changes, however
++	 * cannot be done atomically, which is why such updates must be
++	 * prevented from happening concurrently.
++	 * Note that no pte lock can ever elsewhere be acquired nesting
++	 * with an already acquired one in the same mm, or with the mm's
++	 * page_table_lock already acquired, as that would break in the
++	 * non-split case (where all these are actually resolving to the
++	 * one page_table_lock). Thus acquiring all of them here is not
++	 * going to result in dead locks, and the order of acquires
++	 * doesn't matter.
++	 */
++	{
++		pgd_t *pgd = mm->pgd;
++		unsigned g;
++
++		for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
++			pud_t *pud;
++			unsigned u;
++
++			if (pgd_none(*pgd))
++				continue;
++			pud = pud_offset(pgd, 0);
++			for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++				pmd_t *pmd;
++				unsigned m;
++
++				if (pud_none(*pud))
++					continue;
++				pmd = pmd_offset(pud, 0);
++				for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++					spinlock_t *ptl;
++
++					if (pmd_none(*pmd))
++						continue;
++					ptl = pte_lockptr(0, pmd);
++					if (lock)
++						spin_lock(ptl);
++					else
++						spin_unlock(ptl);
++				}
++			}
++		}
++	}
++#endif
++	if (!lock)
++		spin_unlock(&mm->page_table_lock);
++}
++#define pin_lock(mm) _pin_lock(mm, 1)
++#define pin_unlock(mm) _pin_lock(mm, 0)
++
+ static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
+ {
+ 	struct page *page = virt_to_page(pt);
+@@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm)
+ 	if (xen_feature(XENFEAT_writable_page_tables))
+ 		return;
+ 
+-	spin_lock(&mm->page_table_lock);
++	pin_lock(mm);
+ 
+ 	mm_walk(mm, PAGE_KERNEL_RO);
+ 	if (HYPERVISOR_update_va_mapping(
+@@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm)
+ 	list_del(&mm->context.unpinned);
+ 	spin_unlock(&mm_unpinned_lock);
+ 
+-	spin_unlock(&mm->page_table_lock);
++	pin_unlock(mm);
+ }
+ 
+ void mm_unpin(struct mm_struct *mm)
+@@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm)
+ 	if (xen_feature(XENFEAT_writable_page_tables))
+ 		return;
+ 
+-	spin_lock(&mm->page_table_lock);
++	pin_lock(mm);
+ 
+ 	xen_pgd_unpin(__pa(mm->pgd));
+ 	xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
+@@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm)
+ 	list_add(&mm->context.unpinned, &mm_unpinned);
+ 	spin_unlock(&mm_unpinned_lock);
+ 
+-	spin_unlock(&mm->page_table_lock);
++	pin_unlock(mm);
+ }
+ 
+ void mm_pin_all(void)
+--- a/mm/Kconfig	2007-08-27 14:01:25.000000000 -0400
++++ b/mm/Kconfig	2007-08-27 14:01:27.000000000 -0400
+@@ -132,14 +132,11 @@ config MEMORY_HOTPLUG_SPARSE
+ # Default to 4 for wider testing, though 8 might be more appropriate.
+ # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
+ # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+-# XEN on x86 architecture uses the mapping field on pagetable pages to store a
+-# pointer to the destructor. This conflicts with pte_lock_deinit().
+ #
+ config SPLIT_PTLOCK_CPUS
+ 	int
+ 	default "4096" if ARM && !CPU_CACHE_VIPT
+ 	default "4096" if PARISC && !PA20
+-	default "4096" if X86_XEN || X86_64_XEN
+ 	default "4"
+ 
+ #