diff options
Diffstat (limited to 'trunk/2.6.22/20069_xen-split-pt-lock.patch1')
-rw-r--r-- | trunk/2.6.22/20069_xen-split-pt-lock.patch1 | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/trunk/2.6.22/20069_xen-split-pt-lock.patch1 b/trunk/2.6.22/20069_xen-split-pt-lock.patch1 new file mode 100644 index 0000000..0b45a81 --- /dev/null +++ b/trunk/2.6.22/20069_xen-split-pt-lock.patch1 @@ -0,0 +1,220 @@ +From: jbeulich@novell.com +Subject: allow use of split page table locks +Patch-mainline: obsolete + +--- + arch/i386/mm/pgtable-xen.c | 66 +++++++++++++++++++++++++++++++++++++++--- + arch/x86_64/mm/pageattr-xen.c | 66 +++++++++++++++++++++++++++++++++++++++--- + mm/Kconfig | 3 - + 3 files changed, 124 insertions(+), 11 deletions(-) + +--- a/arch/i386/mm/pgtable-xen.c 2007-08-27 14:01:27.000000000 -0400 ++++ b/arch/i386/mm/pgtable-xen.c 2007-08-27 14:01:27.000000000 -0400 +@@ -658,6 +658,64 @@ void make_pages_writable(void *va, unsig + } + } + ++static void _pin_lock(struct mm_struct *mm, int lock) { ++ if (lock) ++ spin_lock(&mm->page_table_lock); ++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS ++ /* While mm->page_table_lock protects us against insertions and ++ * removals of higher level page table pages, it doesn't protect ++ * against updates of pte-s. Such updates, however, require the ++ * pte pages to be in consistent state (unpinned+writable or ++ * pinned+readonly). The pinning and attribute changes, however ++ * cannot be done atomically, which is why such updates must be ++ * prevented from happening concurrently. ++ * Note that no pte lock can ever elsewhere be acquired nesting ++ * with an already acquired one in the same mm, or with the mm's ++ * page_table_lock already acquired, as that would break in the ++ * non-split case (where all these are actually resolving to the ++ * one page_table_lock). Thus acquiring all of them here is not ++ * going to result in dead locks, and the order of acquires ++ * doesn't matter. ++ */ ++ { ++ pgd_t *pgd = mm->pgd; ++ unsigned g; ++ ++ for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { ++ pud_t *pud; ++ unsigned u; ++ ++ if (pgd_none(*pgd)) ++ continue; ++ pud = pud_offset(pgd, 0); ++ for (u = 0; u < PTRS_PER_PUD; u++, pud++) { ++ pmd_t *pmd; ++ unsigned m; ++ ++ if (pud_none(*pud)) ++ continue; ++ pmd = pmd_offset(pud, 0); ++ for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { ++ spinlock_t *ptl; ++ ++ if (pmd_none(*pmd)) ++ continue; ++ ptl = pte_lockptr(0, pmd); ++ if (lock) ++ spin_lock(ptl); ++ else ++ spin_unlock(ptl); ++ } ++ } ++ } ++ } ++#endif ++ if (!lock) ++ spin_unlock(&mm->page_table_lock); ++} ++#define pin_lock(mm) _pin_lock(mm, 1) ++#define pin_unlock(mm) _pin_lock(mm, 0) ++ + static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags) + { + unsigned long pfn = page_to_pfn(page); +@@ -740,18 +798,18 @@ void mm_pin(struct mm_struct *mm) + { + if (xen_feature(XENFEAT_writable_page_tables)) + return; +- spin_lock(&mm->page_table_lock); ++ pin_lock(mm); + __pgd_pin(mm->pgd); +- spin_unlock(&mm->page_table_lock); ++ pin_unlock(mm); + } + + void mm_unpin(struct mm_struct *mm) + { + if (xen_feature(XENFEAT_writable_page_tables)) + return; +- spin_lock(&mm->page_table_lock); ++ pin_lock(mm); + __pgd_unpin(mm->pgd); +- spin_unlock(&mm->page_table_lock); ++ pin_unlock(mm); + } + + void mm_pin_all(void) +--- a/arch/x86_64/mm/pageattr-xen.c 2007-08-27 14:01:27.000000000 -0400 ++++ b/arch/x86_64/mm/pageattr-xen.c 2007-08-27 14:01:27.000000000 -0400 +@@ -20,6 +20,64 @@ + LIST_HEAD(mm_unpinned); + DEFINE_SPINLOCK(mm_unpinned_lock); + ++static void _pin_lock(struct mm_struct *mm, int lock) { ++ if (lock) ++ spin_lock(&mm->page_table_lock); ++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS ++ /* While mm->page_table_lock protects us against insertions and ++ * removals of higher level page table pages, it doesn't protect ++ * against updates of pte-s. Such updates, however, require the ++ * pte pages to be in consistent state (unpinned+writable or ++ * pinned+readonly). The pinning and attribute changes, however ++ * cannot be done atomically, which is why such updates must be ++ * prevented from happening concurrently. ++ * Note that no pte lock can ever elsewhere be acquired nesting ++ * with an already acquired one in the same mm, or with the mm's ++ * page_table_lock already acquired, as that would break in the ++ * non-split case (where all these are actually resolving to the ++ * one page_table_lock). Thus acquiring all of them here is not ++ * going to result in dead locks, and the order of acquires ++ * doesn't matter. ++ */ ++ { ++ pgd_t *pgd = mm->pgd; ++ unsigned g; ++ ++ for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { ++ pud_t *pud; ++ unsigned u; ++ ++ if (pgd_none(*pgd)) ++ continue; ++ pud = pud_offset(pgd, 0); ++ for (u = 0; u < PTRS_PER_PUD; u++, pud++) { ++ pmd_t *pmd; ++ unsigned m; ++ ++ if (pud_none(*pud)) ++ continue; ++ pmd = pmd_offset(pud, 0); ++ for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { ++ spinlock_t *ptl; ++ ++ if (pmd_none(*pmd)) ++ continue; ++ ptl = pte_lockptr(0, pmd); ++ if (lock) ++ spin_lock(ptl); ++ else ++ spin_unlock(ptl); ++ } ++ } ++ } ++ } ++#endif ++ if (!lock) ++ spin_unlock(&mm->page_table_lock); ++} ++#define pin_lock(mm) _pin_lock(mm, 1) ++#define pin_unlock(mm) _pin_lock(mm, 0) ++ + static inline void mm_walk_set_prot(void *pt, pgprot_t flags) + { + struct page *page = virt_to_page(pt); +@@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm) + if (xen_feature(XENFEAT_writable_page_tables)) + return; + +- spin_lock(&mm->page_table_lock); ++ pin_lock(mm); + + mm_walk(mm, PAGE_KERNEL_RO); + if (HYPERVISOR_update_va_mapping( +@@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm) + list_del(&mm->context.unpinned); + spin_unlock(&mm_unpinned_lock); + +- spin_unlock(&mm->page_table_lock); ++ pin_unlock(mm); + } + + void mm_unpin(struct mm_struct *mm) +@@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm) + if (xen_feature(XENFEAT_writable_page_tables)) + return; + +- spin_lock(&mm->page_table_lock); ++ pin_lock(mm); + + xen_pgd_unpin(__pa(mm->pgd)); + xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); +@@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm) + list_add(&mm->context.unpinned, &mm_unpinned); + spin_unlock(&mm_unpinned_lock); + +- spin_unlock(&mm->page_table_lock); ++ pin_unlock(mm); + } + + void mm_pin_all(void) +--- a/mm/Kconfig 2007-08-27 14:01:25.000000000 -0400 ++++ b/mm/Kconfig 2007-08-27 14:01:27.000000000 -0400 +@@ -132,14 +132,11 @@ config MEMORY_HOTPLUG_SPARSE + # Default to 4 for wider testing, though 8 might be more appropriate. + # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. + # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes. +-# XEN on x86 architecture uses the mapping field on pagetable pages to store a +-# pointer to the destructor. This conflicts with pte_lock_deinit(). + # + config SPLIT_PTLOCK_CPUS + int + default "4096" if ARM && !CPU_CACHE_VIPT + default "4096" if PARISC && !PA20 +- default "4096" if X86_XEN || X86_64_XEN + default "4" + + # |