1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
From 74193f4292d9cfc2874866e941d9939d8f33fcef Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 9 Jun 2022 15:28:23 +0200
Subject: [PATCH 25/32] x86: Don't change the cacheability of the directmap
Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings
in response to guest mapping requests") attempted to keep the cacheability
consistent between different mappings of the same page.
The reason wasn't described in the changelog, but it is understood to be in
regards to a concern over machine check exceptions, owing to errata when using
mixed cacheabilities. It did this primarily by updating Xen's mapping of the
page in the direct map when the guest mapped a page with reduced cacheability.
Unfortunately, the logic didn't actually prevent mixed cacheability from
occurring:
* A guest could map a page normally, and then map the same page with
different cacheability; nothing prevented this.
* The cacheability of the directmap was always latest-takes-precedence in
terms of guest requests.
* Grant-mapped frames with lesser cacheability didn't adjust the page's
cacheattr settings.
* The map_domain_page() function still unconditionally created WB mappings,
irrespective of the page's cacheattr settings.
Additionally, update_xen_mappings() had a bug where the alias calculation was
wrong for mfn's which were .init content, which should have been treated as
fully guest pages, not Xen pages.
Worse yet, the logic introduced a vulnerability whereby necessary
pagetable/segdesc adjustments made by Xen in the validation logic could become
non-coherent between the cache and main memory. The CPU could subsequently
operate on the stale value in the cache, rather than the safe value in main
memory.
The directmap contains primarily mappings of RAM. PAT/MTRR conflict
resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser
cacheability resolves to being coherent. The special case is WC mappings,
which are non-coherent against MTRR=WB regions (except for fully-coherent
CPUs).
Xen must not have any WC cacheability in the directmap, to prevent Xen's
actions from creating non-coherency. (Guest actions creating non-coherency is
dealt with in subsequent patches.) As all memory types for MTRR=WB ranges
inter-operate coherently, so leave Xen's directmap mappings as WB.
Only PV guests with access to devices can use reduced-cacheability mappings to
begin with, and they're trusted not to mount DoSs against the system anyway.
Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them.
Shift the later PGC_* constants up, to gain 3 extra bits in the main reference
count. Retain the check in get_page_from_l1e() for special_pages() because a
guest has no business using reduced cacheability on these.
This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0
This is CVE-2022-26363, part of XSA-402.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
master commit: ae09597da34aee6bc5b76475c5eea6994457e854
master date: 2022-06-09 14:22:08 +0200
---
xen/arch/x86/mm.c | 84 ++++------------------------------------
xen/include/asm-x86/mm.h | 23 +++++------
2 files changed, 17 insertions(+), 90 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index c6429b0f749a..ab32d13a1a0d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn)
return (page_get_owner(page) == dom_io);
}
-static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr)
-{
- int err = 0;
- bool alias = mfn >= PFN_DOWN(xen_phys_start) &&
- mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
- unsigned long xen_va =
- XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
-
- if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) )
- return 0;
-
- if ( unlikely(alias) && cacheattr )
- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0);
- if ( !err )
- err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1,
- PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr));
- if ( unlikely(alias) && !cacheattr && !err )
- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR);
-
- return err;
-}
-
#ifndef NDEBUG
struct mmio_emul_range_ctxt {
const struct domain *d;
@@ -1009,47 +987,14 @@ get_page_from_l1e(
goto could_not_pin;
}
- if ( pte_flags_to_cacheattr(l1f) !=
- ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) )
+ if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) )
{
- unsigned long x, nx, y = page->count_info;
- unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
- int err;
-
- if ( is_special_page(page) )
- {
- if ( write )
- put_page_type(page);
- put_page(page);
- gdprintk(XENLOG_WARNING,
- "Attempt to change cache attributes of Xen heap page\n");
- return -EACCES;
- }
-
- do {
- x = y;
- nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
-
- err = update_xen_mappings(mfn, cacheattr);
- if ( unlikely(err) )
- {
- cacheattr = y & PGC_cacheattr_mask;
- do {
- x = y;
- nx = (x & ~PGC_cacheattr_mask) | cacheattr;
- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
-
- if ( write )
- put_page_type(page);
- put_page(page);
-
- gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn
- " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n",
- mfn, get_gpfn_from_mfn(mfn),
- l1e_get_intpte(l1e), l1e_owner->domain_id);
- return err;
- }
+ if ( write )
+ put_page_type(page);
+ put_page(page);
+ gdprintk(XENLOG_WARNING,
+ "Attempt to change cache attributes of Xen heap page\n");
+ return -EACCES;
}
return 0;
@@ -2467,24 +2412,9 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
*/
static int cleanup_page_mappings(struct page_info *page)
{
- unsigned int cacheattr =
- (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base;
int rc = 0;
unsigned long mfn = mfn_x(page_to_mfn(page));
- /*
- * If we've modified xen mappings as a result of guest cache
- * attributes, restore them to the "normal" state.
- */
- if ( unlikely(cacheattr) )
- {
- page->count_info &= ~PGC_cacheattr_mask;
-
- BUG_ON(is_special_page(page));
-
- rc = update_xen_mappings(mfn, 0);
- }
-
/*
* If this may be in a PV domain's IOMMU, remove it.
*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index cb9052749963..8a9a43bb0a9d 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -69,25 +69,22 @@
/* Set when is using a page as a page table */
#define _PGC_page_table PG_shift(3)
#define PGC_page_table PG_mask(1, 3)
- /* 3-bit PAT/PCD/PWT cache-attribute hint. */
-#define PGC_cacheattr_base PG_shift(6)
-#define PGC_cacheattr_mask PG_mask(7, 6)
/* Page is broken? */
-#define _PGC_broken PG_shift(7)
-#define PGC_broken PG_mask(1, 7)
+#define _PGC_broken PG_shift(4)
+#define PGC_broken PG_mask(1, 4)
/* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */
-#define PGC_state PG_mask(3, 9)
-#define PGC_state_inuse PG_mask(0, 9)
-#define PGC_state_offlining PG_mask(1, 9)
-#define PGC_state_offlined PG_mask(2, 9)
-#define PGC_state_free PG_mask(3, 9)
+#define PGC_state PG_mask(3, 6)
+#define PGC_state_inuse PG_mask(0, 6)
+#define PGC_state_offlining PG_mask(1, 6)
+#define PGC_state_offlined PG_mask(2, 6)
+#define PGC_state_free PG_mask(3, 6)
#define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
/* Page is not reference counted (see below for caveats) */
-#define _PGC_extra PG_shift(10)
-#define PGC_extra PG_mask(1, 10)
+#define _PGC_extra PG_shift(7)
+#define PGC_extra PG_mask(1, 7)
/* Count of references to this frame. */
-#define PGC_count_width PG_shift(10)
+#define PGC_count_width PG_shift(7)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
/*
--
2.35.1
|