1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
From 943635d8f8486209e4e48966507ad57963e96284 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 14:54:00 +0200
Subject: [PATCH 09/87] x86/p2m: truly free paging pool memory for dying
domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Modify {hap,shadow}_free to free the page immediately if the domain is
dying, so that pages don't accumulate in the pool when
{shadow,hap}_final_teardown() get called. This is to limit the amount of
work which needs to be done there (in a non-preemptable manner).
Note the call to shadow_free() in shadow_free_p2m_page() is moved after
increasing total_pages, so that the decrease done in shadow_free() in
case the domain is dying doesn't underflow the counter, even if just for
a short interval.
This is part of CVE-2022-33746 / XSA-410.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
master commit: f50a2c0e1d057c00d6061f40ae24d068226052ad
master date: 2022-10-11 14:23:51 +0200
---
xen/arch/x86/mm/hap/hap.c | 12 ++++++++++++
xen/arch/x86/mm/shadow/common.c | 28 +++++++++++++++++++++++++---
2 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 787991233e53..aef2297450e1 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -265,6 +265,18 @@ static void hap_free(struct domain *d, mfn_t mfn)
ASSERT(paging_locked_by_me(d));
+ /*
+ * For dying domains, actually free the memory here. This way less work is
+ * left to hap_final_teardown(), which cannot easily have preemption checks
+ * added.
+ */
+ if ( unlikely(d->is_dying) )
+ {
+ free_domheap_page(pg);
+ d->arch.paging.hap.total_pages--;
+ return;
+ }
+
d->arch.paging.hap.free_pages++;
page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 9807f6ec6c00..9eb33eafc7f7 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1187,6 +1187,7 @@ mfn_t shadow_alloc(struct domain *d,
void shadow_free(struct domain *d, mfn_t smfn)
{
struct page_info *next = NULL, *sp = mfn_to_page(smfn);
+ bool dying = ACCESS_ONCE(d->is_dying);
struct page_list_head *pin_list;
unsigned int pages;
u32 shadow_type;
@@ -1229,11 +1230,32 @@ void shadow_free(struct domain *d, mfn_t smfn)
* just before the allocator hands the page out again. */
page_set_tlbflush_timestamp(sp);
perfc_decr(shadow_alloc_count);
- page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
+
+ /*
+ * For dying domains, actually free the memory here. This way less
+ * work is left to shadow_final_teardown(), which cannot easily have
+ * preemption checks added.
+ */
+ if ( unlikely(dying) )
+ {
+ /*
+ * The backpointer field (sh.back) used by shadow code aliases the
+ * domain owner field, unconditionally clear it here to avoid
+ * free_domheap_page() attempting to parse it.
+ */
+ page_set_owner(sp, NULL);
+ free_domheap_page(sp);
+ }
+ else
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
+
sp = next;
}
- d->arch.paging.shadow.free_pages += pages;
+ if ( unlikely(dying) )
+ d->arch.paging.shadow.total_pages -= pages;
+ else
+ d->arch.paging.shadow.free_pages += pages;
}
/* Divert a page from the pool to be used by the p2m mapping.
@@ -1303,9 +1325,9 @@ shadow_free_p2m_page(struct domain *d, struct page_info *pg)
* paging lock) and the log-dirty code (which always does). */
paging_lock_recursive(d);
- shadow_free(d, page_to_mfn(pg));
d->arch.paging.shadow.p2m_pages--;
d->arch.paging.shadow.total_pages++;
+ shadow_free(d, page_to_mfn(pg));
paging_unlock(d);
}
--
2.37.4
|