summaryrefslogtreecommitdiff
blob: 92b3bf11e4a73c1cbec289a73ead4736a908f00c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
From 62e7fb702db4adaa9415ac87d95e0f461e32d9ca Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Wed, 24 Aug 2022 14:16:44 +0100
Subject: [PATCH 43/87] x86/vmx: Revert "VMX: use a single, global APIC access
 page"

The claim "No accesses would ever go to this page." is false.  A consequence
of how Intel's APIC Acceleration works, and Xen's choice to have per-domain
P2Ms (rather than per-vCPU P2Ms) means that the APIC page is fully read-write
to any vCPU which is not in xAPIC mode.

This reverts commit 58850b9074d3e7affdf3bc94c84e417ecfa4d165.

This is XSA-412 / CVE-2022-42327.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
(cherry picked from commit 3b5beaf49033cddf4b2cc4e4d391b966f4203471)
---
 xen/arch/x86/hvm/vmx/vmx.c         | 59 ++++++++++++++++++++++--------
 xen/arch/x86/mm/shadow/set.c       |  8 ----
 xen/arch/x86/mm/shadow/types.h     |  7 ----
 xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
 xen/include/asm-x86/mm.h           | 20 +---------
 5 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index d429d76c18c9..3f4276531322 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -66,7 +66,8 @@ boolean_param("force-ept", opt_force_ept);
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
 
-static int alloc_vlapic_mapping(void);
+static int  vmx_alloc_vlapic_mapping(struct domain *d);
+static void vmx_free_vlapic_mapping(struct domain *d);
 static void vmx_install_vlapic_mapping(struct vcpu *v);
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
                                 unsigned int flags);
@@ -77,8 +78,6 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
 static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
 static void vmx_invlpg(struct vcpu *v, unsigned long linear);
 
-static mfn_t __read_mostly apic_access_mfn = INVALID_MFN_INITIALIZER;
-
 /* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
 #define PI_CSW_FROM (1u << 0)
 #define PI_CSW_TO   (1u << 1)
@@ -402,6 +401,7 @@ static int vmx_domain_initialise(struct domain *d)
         .to   = vmx_ctxt_switch_to,
         .tail = vmx_do_resume,
     };
+    int rc;
 
     d->arch.ctxt_switch = &csw;
 
@@ -411,15 +411,24 @@ static int vmx_domain_initialise(struct domain *d)
      */
     d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
 
+    if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
+        return rc;
+
     return 0;
 }
 
+static void vmx_domain_relinquish_resources(struct domain *d)
+{
+    vmx_free_vlapic_mapping(d);
+}
+
 static void domain_creation_finished(struct domain *d)
 {
     gfn_t gfn = gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE);
+    mfn_t apic_access_mfn = d->arch.hvm.vmx.apic_access_mfn;
     bool ipat;
 
-    if ( !has_vlapic(d) || mfn_eq(apic_access_mfn, INVALID_MFN) )
+    if ( mfn_eq(apic_access_mfn, _mfn(0)) )
         return;
 
     ASSERT(epte_get_entry_emt(d, gfn, apic_access_mfn, 0, &ipat,
@@ -2481,6 +2490,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .cpu_up_prepare       = vmx_cpu_up_prepare,
     .cpu_dead             = vmx_cpu_dead,
     .domain_initialise    = vmx_domain_initialise,
+    .domain_relinquish_resources = vmx_domain_relinquish_resources,
     .domain_creation_finished = domain_creation_finished,
     .vcpu_initialise      = vmx_vcpu_initialise,
     .vcpu_destroy         = vmx_vcpu_destroy,
@@ -2731,7 +2741,7 @@ const struct hvm_function_table * __init start_vmx(void)
 {
     set_in_cr4(X86_CR4_VMXE);
 
-    if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
+    if ( vmx_vmcs_init() )
     {
         printk("VMX: failed to initialise.\n");
         return NULL;
@@ -3305,36 +3315,55 @@ gp_fault:
     return X86EMUL_EXCEPTION;
 }
 
-static int __init alloc_vlapic_mapping(void)
+static int vmx_alloc_vlapic_mapping(struct domain *d)
 {
     struct page_info *pg;
     mfn_t mfn;
 
-    if ( !cpu_has_vmx_virtualize_apic_accesses )
+    if ( !has_vlapic(d) || !cpu_has_vmx_virtualize_apic_accesses )
         return 0;
 
-    pg = alloc_domheap_page(NULL, 0);
+    pg = alloc_domheap_page(d, MEMF_no_refcount);
     if ( !pg )
         return -ENOMEM;
 
-    /*
-     * Signal to shadow code that this page cannot be refcounted. This also
-     * makes epte_get_entry_emt() recognize this page as "special".
-     */
-    page_suppress_refcounting(pg);
+    if ( !get_page_and_type(pg, d, PGT_writable_page) )
+    {
+        /*
+         * The domain can't possibly know about this page yet, so failure
+         * here is a clear indication of something fishy going on.
+         */
+        domain_crash(d);
+        return -ENODATA;
+    }
 
     mfn = page_to_mfn(pg);
     clear_domain_page(mfn);
-    apic_access_mfn = mfn;
+    d->arch.hvm.vmx.apic_access_mfn = mfn;
 
     return 0;
 }
 
+static void vmx_free_vlapic_mapping(struct domain *d)
+{
+    mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
+
+    d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
+    if ( !mfn_eq(mfn, _mfn(0)) )
+    {
+        struct page_info *pg = mfn_to_page(mfn);
+
+        put_page_alloc_ref(pg);
+        put_page_and_type(pg);
+    }
+}
+
 static void vmx_install_vlapic_mapping(struct vcpu *v)
 {
+    mfn_t apic_access_mfn = v->domain->arch.hvm.vmx.apic_access_mfn;
     paddr_t virt_page_ma, apic_page_ma;
 
-    if ( !has_vlapic(v->domain) || mfn_eq(apic_access_mfn, INVALID_MFN) )
+    if ( mfn_eq(apic_access_mfn, _mfn(0)) )
         return;
 
     ASSERT(cpu_has_vmx_virtualize_apic_accesses);
diff --git a/xen/arch/x86/mm/shadow/set.c b/xen/arch/x86/mm/shadow/set.c
index 87e9c6eeb219..bd6c68b547c9 100644
--- a/xen/arch/x86/mm/shadow/set.c
+++ b/xen/arch/x86/mm/shadow/set.c
@@ -101,14 +101,6 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
         owner = page_get_owner(pg);
     }
 
-    /*
-     * Check whether refcounting is suppressed on this page. For example,
-     * VMX'es APIC access MFN is just a surrogate page.  It doesn't actually
-     * get accessed, and hence there's no need to refcount it.
-     */
-    if ( pg && page_refcounting_suppressed(pg) )
-        return 0;
-
     if ( owner == dom_io )
         owner = NULL;
 
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 6970e7d6ea4a..814a4018535a 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -276,16 +276,9 @@ int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
 static void inline
 shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
 {
-    mfn_t mfn = shadow_l1e_get_mfn(sl1e);
-
     if ( !shadow_mode_refcounts(d) )
         return;
 
-    if ( mfn_valid(mfn) &&
-         /* See the respective comment in shadow_get_page_from_l1e(). */
-         page_refcounting_suppressed(mfn_to_page(mfn)) )
-        return;
-
     put_page_from_l1e(sl1e, d);
 }
 
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 03c9ccf627ab..8073af323b96 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -58,6 +58,7 @@ struct ept_data {
 #define _VMX_DOMAIN_PML_ENABLED    0
 #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
 struct vmx_domain {
+    mfn_t apic_access_mfn;
     /* VMX_DOMAIN_* */
     unsigned int status;
 
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7bdf9c2290d8..e1bcea57a8f5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -83,7 +83,7 @@
 #define PGC_state_offlined  PG_mask(2, 6)
 #define PGC_state_free      PG_mask(3, 6)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
-/* Page is not reference counted (see below for caveats) */
+/* Page is not reference counted */
 #define _PGC_extra        PG_shift(7)
 #define PGC_extra         PG_mask(1, 7)
 
@@ -375,24 +375,6 @@ void zap_ro_mpt(mfn_t mfn);
 
 bool is_iomem_page(mfn_t mfn);
 
-/*
- * Pages with no owner which may get passed to functions wanting to
- * refcount them can be marked PGC_extra to bypass this refcounting (which
- * would fail due to the lack of an owner).
- *
- * (For pages with owner PGC_extra has different meaning.)
- */
-static inline void page_suppress_refcounting(struct page_info *pg)
-{
-   ASSERT(!page_get_owner(pg));
-   pg->count_info |= PGC_extra;
-}
-
-static inline bool page_refcounting_suppressed(const struct page_info *pg)
-{
-    return !page_get_owner(pg) && (pg->count_info & PGC_extra);
-}
-
 struct platform_bad_page {
     unsigned long mfn;
     unsigned int order;
-- 
2.37.4