diff options
author | Florian Schmaus <flow@gentoo.org> | 2022-07-14 10:15:10 +0200 |
---|---|---|
committer | Florian Schmaus <flow@gentoo.org> | 2022-07-14 10:15:10 +0200 |
commit | cccde0377d523ea4e4ca57539a4141eef673f0e1 (patch) | |
tree | f6d6a0981bba9068f380c9b5ec59f76055698eea | |
parent | Correctly obtain the array length (diff) | |
download | xen-upstream-patches-cccde0377d523ea4e4ca57539a4141eef673f0e1.tar.gz xen-upstream-patches-cccde0377d523ea4e4ca57539a4141eef673f0e1.tar.bz2 xen-upstream-patches-cccde0377d523ea4e4ca57539a4141eef673f0e1.zip |
Xen 4.15.4-pre-patchset-0.14.15.4-pre-patchset-0.1
Signed-off-by: Florian Schmaus <flow@gentoo.org>
52 files changed, 1 insertions, 5320 deletions
diff --git a/0001-update-Xen-version-to-4.16.2-pre.patch b/0001-update-Xen-version-to-4.16.2-pre.patch deleted file mode 100644 index 2e62c21..0000000 --- a/0001-update-Xen-version-to-4.16.2-pre.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 5be9edb482ab20cf3e7acb05b511465294d1e19b Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 13:55:17 +0200 -Subject: [PATCH 01/51] update Xen version to 4.16.2-pre - ---- - xen/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/Makefile b/xen/Makefile -index 8abc71cf73aa..90a29782dbf4 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -2,7 +2,7 @@ - # All other places this is stored (eg. compile.h) should be autogenerated. - export XEN_VERSION = 4 - export XEN_SUBVERSION = 16 --export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION) -+export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION) - export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) - -include xen-version - --- -2.35.1 - diff --git a/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch b/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch deleted file mode 100644 index 0ba090e..0000000 --- a/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b58fb6e81bd55b6bd946abc3070770f7994c9ef9 Mon Sep 17 00:00:00 2001 -From: Jason Andryuk <jandryuk@gmail.com> -Date: Tue, 7 Jun 2022 13:55:39 +0200 -Subject: [PATCH 02/51] x86/irq: skip unmap_domain_pirq XSM during destruction -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -xsm_unmap_domain_irq was seen denying unmap_domain_pirq when called from -complete_domain_destroy as an RCU callback. The source context was an -unexpected, random domain. Since this is a xen-internal operation, -going through the XSM hook is inapproriate. - -Check d->is_dying and skip the XSM hook when set since this is a cleanup -operation for a domain being destroyed. - -Suggested-by: Roger Pau Monné <roger.pau@citrix.com> -Signed-off-by: Jason Andryuk <jandryuk@gmail.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 2e6f95a942d1927a53f077c301db0b799c54c05a -master date: 2022-04-08 14:51:52 +0200 ---- - xen/arch/x86/irq.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c -index 67cbf6b979dc..47b86af5dce9 100644 ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2342,8 +2342,14 @@ int unmap_domain_pirq(struct domain *d, int pirq) - nr = msi_desc->msi.nvec; - } - -- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -- msi_desc ? msi_desc->dev : NULL); -+ /* -+ * When called by complete_domain_destroy via RCU, current is a random -+ * domain. Skip the XSM check since this is a Xen-initiated action. -+ */ -+ if ( !d->is_dying ) -+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -+ msi_desc ? msi_desc->dev : NULL); -+ - if ( ret ) - goto done; - --- -2.35.1 - diff --git a/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch b/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch deleted file mode 100644 index fa1443c..0000000 --- a/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 6c6bbfdff9374ef41f84c4ebed7b8a7a40767ef6 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 13:56:54 +0200 -Subject: [PATCH 03/51] xen: fix XEN_DOMCTL_gdbsx_guestmemio crash - -A hypervisor built without CONFIG_GDBSX will crash in case the -XEN_DOMCTL_gdbsx_guestmemio domctl is being called, as the call will -end up in iommu_do_domctl() with d == NULL: - - (XEN) CPU: 6 - (XEN) RIP: e008:[<ffff82d040269984>] iommu_do_domctl+0x4/0x30 - (XEN) RFLAGS: 0000000000010202 CONTEXT: hypervisor (d0v0) - (XEN) rax: 00000000000003e8 rbx: ffff830856277ef8 rcx: ffff830856277fff - ... - (XEN) Xen call trace: - (XEN) [<ffff82d040269984>] R iommu_do_domctl+0x4/0x30 - (XEN) [<ffff82d04035cd5f>] S arch_do_domctl+0x7f/0x2330 - (XEN) [<ffff82d040239e46>] S do_domctl+0xe56/0x1930 - (XEN) [<ffff82d040238ff0>] S do_domctl+0/0x1930 - (XEN) [<ffff82d0402f8c59>] S pv_hypercall+0x99/0x110 - (XEN) [<ffff82d0402f5161>] S arch/x86/pv/domain.c#_toggle_guest_pt+0x11/0x90 - (XEN) [<ffff82d040366288>] S lstar_enter+0x128/0x130 - (XEN) - (XEN) Pagetable walk from 0000000000000144: - (XEN) L4[0x000] = 0000000000000000 ffffffffffffffff - (XEN) - (XEN) **************************************** - (XEN) Panic on CPU 6: - (XEN) FATAL PAGE FAULT - (XEN) [error_code=0000] - (XEN) Faulting linear address: 0000000000000144 - (XEN) **************************************** - -It used to be permitted to pass DOMID_IDLE to dbg_rw_mem(), which is why the -special case skipping the domid checks exists. Now that it is only permitted -to pass proper domids, remove the special case, making 'd' always valid. - -Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com> -Fixes: e726a82ca0dc ("xen: make gdbsx support configurable") -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: f00daf1fb3213a9b0335d9dcd90fe9cb5c02b7a9 -master date: 2022-04-19 17:07:08 +0100 ---- - xen/common/domctl.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/xen/common/domctl.c b/xen/common/domctl.c -index 271862ae587f..419e4070f59d 100644 ---- a/xen/common/domctl.c -+++ b/xen/common/domctl.c -@@ -304,7 +304,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) - if ( op->domain == DOMID_INVALID ) - { - case XEN_DOMCTL_createdomain: -- case XEN_DOMCTL_gdbsx_guestmemio: - d = NULL; - break; - } --- -2.35.1 - diff --git a/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch b/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch deleted file mode 100644 index a4d229a..0000000 --- a/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch +++ /dev/null @@ -1,49 +0,0 @@ -From b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 13:58:16 +0200 -Subject: [PATCH 04/51] VT-d: refuse to use IOMMU with reserved CAP.ND value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The field taking the value 7 (resulting in 18-bit DIDs when using the -calculation in cap_ndoms(), when the DID fields are only 16 bits wide) -is reserved. Instead of misbehaving in case we would encounter such an -IOMMU, refuse to use it. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Kevin Tian <kevin.tian@intel.com> -master commit: a1545fbf45c689aff39ce76a6eaa609d32ef72a7 -master date: 2022-04-20 10:54:26 +0200 ---- - xen/drivers/passthrough/vtd/iommu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 93dd8aa643aa..8975c1de61bc 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -1279,8 +1279,11 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - - quirk_iommu_caps(iommu); - -+ nr_dom = cap_ndoms(iommu->cap); -+ - if ( cap_fault_reg_offset(iommu->cap) + - cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || -+ ((nr_dom - 1) >> 16) /* I.e. cap.nd > 6 */ || - ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) - { - printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n"); -@@ -1305,7 +1308,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - vtd_ops.sync_cache = sync_cache; - - /* allocate domain id bitmap */ -- nr_dom = cap_ndoms(iommu->cap); - iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); - if ( !iommu->domid_bitmap ) - return -ENOMEM; --- -2.35.1 - diff --git a/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch b/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch deleted file mode 100644 index 45a1825..0000000 --- a/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 7c003ab4a398ff4ddd54d15d4158cffb463134cc Mon Sep 17 00:00:00 2001 -From: David Vrabel <dvrabel@amazon.co.uk> -Date: Tue, 7 Jun 2022 13:59:31 +0200 -Subject: [PATCH 05/51] x86/mm: avoid inadvertently degrading a TLB flush to - local only -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If the direct map is incorrectly modified with interrupts disabled, -the required TLB flushes are degraded to flushing the local CPU only. - -This could lead to very hard to diagnose problems as different CPUs will -end up with different views of memory. Although, no such issues have yet -been identified. - -Change the check in the flush_area() macro to look at system_state -instead. This defers the switch from local to all later in the boot -(see xen/arch/x86/setup.c:__start_xen()). This is fine because -additional PCPUs are not brought up until after the system state is -SYS_STATE_smp_boot. - -Signed-off-by: David Vrabel <dvrabel@amazon.co.uk> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -x86/flushtlb: remove flush_area check on system state - -Booting with Shadow Stacks leads to the following assert on a debug -hypervisor: - -Assertion 'local_irq_is_enabled()' failed at arch/x86/smp.c:265 -----[ Xen-4.17.0-10.24-d x86_64 debug=y Not tainted ]---- -CPU: 0 -RIP: e008:[<ffff82d040345300>] flush_area_mask+0x40/0x13e -[...] -Xen call trace: - [<ffff82d040345300>] R flush_area_mask+0x40/0x13e - [<ffff82d040338a40>] F modify_xen_mappings+0xc5/0x958 - [<ffff82d0404474f9>] F arch/x86/alternative.c#_alternative_instructions+0xb7/0xb9 - [<ffff82d0404476cc>] F alternative_branches+0xf/0x12 - [<ffff82d04044e37d>] F __start_xen+0x1ef4/0x2776 - [<ffff82d040203344>] F __high_start+0x94/0xa0 - -This is due to SYS_STATE_smp_boot being set before calling -alternative_branches(), and the flush in modify_xen_mappings() then -using flush_area_all() with interrupts disabled. Note that -alternative_branches() is called before APs are started, so the flush -must be a local one (and indeed the cpumask passed to -flush_area_mask() just contains one CPU). - -Take the opportunity to simplify a bit the logic and make flush_area() -an alias of flush_area_all() in mm.c, taking into account that -cpu_online_map just contains the BSP before APs are started. This -requires widening the assert in flush_area_mask() to allow being -called with interrupts disabled as long as it's strictly a local only -flush. - -The overall result is that a conditional can be removed from -flush_area(). - -While there also introduce an ASSERT to check that a vCPU state flush -is not issued for the local CPU only. - -Fixes: 78e072bc37 ('x86/mm: avoid inadvertently degrading a TLB flush to local only') -Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 78e072bc375043e81691a59454e09f0b38241ddd -master date: 2022-04-20 10:55:01 +0200 -master commit: 9f735ee4903f1b9f1966bb4ba5b5616b03ae08b5 -master date: 2022-05-25 11:09:46 +0200 ---- - xen/arch/x86/mm.c | 10 ++-------- - xen/arch/x86/smp.c | 5 ++++- - 2 files changed, 6 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 4d799032dc82..e222d9aa98ee 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5051,14 +5051,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - #define l1f_to_lNf(f) (((f) & _PAGE_PRESENT) ? ((f) | _PAGE_PSE) : (f)) - #define lNf_to_l1f(f) (((f) & _PAGE_PRESENT) ? ((f) & ~_PAGE_PSE) : (f)) - --/* -- * map_pages_to_xen() can be called with interrupts disabled during -- * early bootstrap. In this case it is safe to use flush_area_local() -- * and avoid locking because only the local CPU is online. -- */ --#define flush_area(v,f) (!local_irq_is_enabled() ? \ -- flush_area_local((const void *)v, f) : \ -- flush_area_all((const void *)v, f)) -+/* flush_area_all() can be used prior to any other CPU being online. */ -+#define flush_area(v, f) flush_area_all((const void *)(v), f) - - #define L3T_INIT(page) (page) = ZERO_BLOCK_PTR - -diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c -index eef0f9c6cbf4..3556ec116608 100644 ---- a/xen/arch/x86/smp.c -+++ b/xen/arch/x86/smp.c -@@ -262,7 +262,10 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags) - { - unsigned int cpu = smp_processor_id(); - -- ASSERT(local_irq_is_enabled()); -+ /* Local flushes can be performed with interrupts disabled. */ -+ ASSERT(local_irq_is_enabled() || cpumask_subset(mask, cpumask_of(cpu))); -+ /* Exclude use of FLUSH_VCPU_STATE for the local CPU. */ -+ ASSERT(!cpumask_test_cpu(cpu, mask) || !(flags & FLUSH_VCPU_STATE)); - - if ( (flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK)) && - cpumask_test_cpu(cpu, mask) ) --- -2.35.1 - diff --git a/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch b/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch deleted file mode 100644 index 7eb13cd..0000000 --- a/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 4bb8c34ba4241c2bf7845cd8b80c17530dbfb085 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 7 Jun 2022 14:00:09 +0200 -Subject: [PATCH 06/51] xen/build: Fix dependency for the MAP rule - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: e1e72198213b80b7a82bdc90f96ed05ae4f53e20 -master date: 2022-04-20 19:10:59 +0100 ---- - xen/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/Makefile b/xen/Makefile -index 90a29782dbf4..ce4eca3ee4d7 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -507,7 +507,7 @@ cscope: - cscope -k -b -q - - .PHONY: _MAP --_MAP: -+_MAP: $(TARGET) - $(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map - - %.o %.i %.s: %.c FORCE --- -2.35.1 - diff --git a/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch b/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch deleted file mode 100644 index ed98922..0000000 --- a/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 13a29f3756bc4cab96c59f46c3875b483553fb8f Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 14:00:31 +0200 -Subject: [PATCH 07/51] tools/libs/evtchn: don't set errno to negative values - -Setting errno to a negative value makes no sense. - -Fixes: 6b6500b3cbaa ("tools/libs/evtchn: Add support for restricting a handle") -Signed-off-by: Juergen Gross <jgross@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 60245b71c1cd001686fa7b7a26869cbcb80d074c -master date: 2022-04-22 20:39:34 +0100 ---- - tools/libs/evtchn/freebsd.c | 2 +- - tools/libs/evtchn/minios.c | 2 +- - tools/libs/evtchn/netbsd.c | 2 +- - tools/libs/evtchn/solaris.c | 2 +- - 4 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/tools/libs/evtchn/freebsd.c b/tools/libs/evtchn/freebsd.c -index 7427ab240860..fa17a0f8dbb5 100644 ---- a/tools/libs/evtchn/freebsd.c -+++ b/tools/libs/evtchn/freebsd.c -@@ -58,7 +58,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce) - - int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid) - { -- errno = -EOPNOTSUPP; -+ errno = EOPNOTSUPP; - - return -1; - } -diff --git a/tools/libs/evtchn/minios.c b/tools/libs/evtchn/minios.c -index e5dfdc5ef52e..c0bd5429eea2 100644 ---- a/tools/libs/evtchn/minios.c -+++ b/tools/libs/evtchn/minios.c -@@ -97,7 +97,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce) - - int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid) - { -- errno = -EOPNOTSUPP; -+ errno = EOPNOTSUPP; - - return -1; - } -diff --git a/tools/libs/evtchn/netbsd.c b/tools/libs/evtchn/netbsd.c -index 1cebc21ffce0..56409513bc23 100644 ---- a/tools/libs/evtchn/netbsd.c -+++ b/tools/libs/evtchn/netbsd.c -@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce) - - int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid) - { -- errno = -EOPNOTSUPP; -+ errno = EOPNOTSUPP; - - return -1; - } -diff --git a/tools/libs/evtchn/solaris.c b/tools/libs/evtchn/solaris.c -index df9579df1778..beaa7721425f 100644 ---- a/tools/libs/evtchn/solaris.c -+++ b/tools/libs/evtchn/solaris.c -@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce) - - int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid) - { -- errno = -EOPNOTSUPP; -+ errno = EOPNOTSUPP; - return -1; - } - --- -2.35.1 - diff --git a/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch b/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch deleted file mode 100644 index 166f0ff..0000000 --- a/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch +++ /dev/null @@ -1,36 +0,0 @@ -From ba62afdbc31a8cfe897191efd25ed4449d9acd94 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 14:01:03 +0200 -Subject: [PATCH 08/51] tools/libs/ctrl: don't set errno to a negative value - -The claimed reason for setting errno to -1 is wrong. On x86 -xc_domain_pod_target() will set errno to a sane value in the error -case. - -Fixes: ff1745d5882b ("tools: libxl: do not set the PoD target on ARM") -Signed-off-by: Juergen Gross <jgross@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: a0fb7e0e73483ed042d5ca34861a891a51ad337b -master date: 2022-04-22 20:39:34 +0100 ---- - tools/libs/ctrl/xc_domain.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c -index b155d6afd2ef..9d675c8f21e1 100644 ---- a/tools/libs/ctrl/xc_domain.c -+++ b/tools/libs/ctrl/xc_domain.c -@@ -1297,9 +1297,7 @@ int xc_domain_get_pod_target(xc_interface *xch, - uint64_t *pod_cache_pages, - uint64_t *pod_entries) - { -- /* On x86 (above) xc_domain_pod_target will incorrectly return -1 -- * with errno==-1 on error. Do the same for least surprise. */ -- errno = -1; -+ errno = EOPNOTSUPP; - return -1; - } - #endif --- -2.35.1 - diff --git a/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch b/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch deleted file mode 100644 index 5d035f6..0000000 --- a/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch +++ /dev/null @@ -1,32 +0,0 @@ -From a2cf30eec08db5df974a9e8bb7366fee8fc7fcd9 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 14:01:27 +0200 -Subject: [PATCH 09/51] tools/libs/guest: don't set errno to a negative value - -Setting errno to a negative error value makes no sense. - -Fixes: cb99a64029c9 ("libxc: arm: allow passing a device tree blob to the guest") -Signed-off-by: Juergen Gross <jgross@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 438e96ab479495a932391a22e219ee62fa8c4f47 -master date: 2022-04-22 20:39:34 +0100 ---- - tools/libs/guest/xg_dom_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c -index 2e4c1330ea6b..65975a75da37 100644 ---- a/tools/libs/guest/xg_dom_core.c -+++ b/tools/libs/guest/xg_dom_core.c -@@ -856,7 +856,7 @@ int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename) - return -1; - return 0; - #else -- errno = -EINVAL; -+ errno = EINVAL; - return -1; - #endif - } --- -2.35.1 - diff --git a/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch b/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch deleted file mode 100644 index ac900ae..0000000 --- a/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 15391de8e2bb6153eadd483154c53044ab53d98d Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 14:01:44 +0200 -Subject: [PATCH 10/51] tools/libs/light: don't set errno to a negative value - -Setting errno to a negative value makes no sense. - -Fixes: e78e8b9bb649 ("libxl: Add interface for querying hypervisor about PCI topology") -Signed-off-by: Juergen Gross <jgross@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 2419a159fb943c24a6f2439604b9fdb1478fcd08 -master date: 2022-04-22 20:39:34 +0100 ---- - tools/libs/light/libxl_linux.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libs/light/libxl_linux.c b/tools/libs/light/libxl_linux.c -index 8d62dfd255cb..27f2bce71837 100644 ---- a/tools/libs/light/libxl_linux.c -+++ b/tools/libs/light/libxl_linux.c -@@ -288,7 +288,7 @@ int libxl__pci_topology_init(libxl__gc *gc, - if (i == num_devs) { - LOG(ERROR, "Too many devices"); - err = ERROR_FAIL; -- errno = -ENOSPC; -+ errno = ENOSPC; - goto out; - } - --- -2.35.1 - diff --git a/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch b/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch deleted file mode 100644 index 3c60de4..0000000 --- a/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch +++ /dev/null @@ -1,112 +0,0 @@ -From a6c32abd144ec6443c6a433b5a2ac00e2615aa86 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Tue, 7 Jun 2022 14:02:08 +0200 -Subject: [PATCH 11/51] xen/iommu: cleanup iommu related domctl handling - -Today iommu_do_domctl() is being called from arch_do_domctl() in the -"default:" case of a switch statement. This has led already to crashes -due to unvalidated parameters. - -Fix that by moving the call of iommu_do_domctl() to the main switch -statement of do_domctl(). - -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> # Arm -master commit: 9cd7e31b3f584e97a138a770cfb031a91a867936 -master date: 2022-04-26 10:23:58 +0200 ---- - xen/arch/arm/domctl.c | 11 +---------- - xen/arch/x86/domctl.c | 2 +- - xen/common/domctl.c | 7 +++++++ - xen/include/xen/iommu.h | 12 +++++++++--- - 4 files changed, 18 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c -index 6245af6d0bab..1baf25c3d98b 100644 ---- a/xen/arch/arm/domctl.c -+++ b/xen/arch/arm/domctl.c -@@ -176,16 +176,7 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d, - return rc; - } - default: -- { -- int rc; -- -- rc = subarch_do_domctl(domctl, d, u_domctl); -- -- if ( rc == -ENOSYS ) -- rc = iommu_do_domctl(domctl, d, u_domctl); -- -- return rc; -- } -+ return subarch_do_domctl(domctl, d, u_domctl); - } - } - -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 7d102e0647ec..0fa51f2ebd10 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -1380,7 +1380,7 @@ long arch_do_domctl( - break; - - default: -- ret = iommu_do_domctl(domctl, d, u_domctl); -+ ret = -ENOSYS; - break; - } - -diff --git a/xen/common/domctl.c b/xen/common/domctl.c -index 419e4070f59d..65d2a4588b71 100644 ---- a/xen/common/domctl.c -+++ b/xen/common/domctl.c -@@ -870,6 +870,13 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) - copyback = 1; - break; - -+ case XEN_DOMCTL_assign_device: -+ case XEN_DOMCTL_test_assign_device: -+ case XEN_DOMCTL_deassign_device: -+ case XEN_DOMCTL_get_device_group: -+ ret = iommu_do_domctl(op, d, u_domctl); -+ break; -+ - default: - ret = arch_do_domctl(op, d, u_domctl); - break; -diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h -index 92b2d23f0ba2..861579562e8a 100644 ---- a/xen/include/xen/iommu.h -+++ b/xen/include/xen/iommu.h -@@ -342,8 +342,17 @@ struct domain_iommu { - /* Does the IOMMU pagetable need to be kept synchronized with the P2M */ - #ifdef CONFIG_HAS_PASSTHROUGH - #define need_iommu_pt_sync(d) (dom_iommu(d)->need_sync) -+ -+int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d, -+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl); - #else - #define need_iommu_pt_sync(d) ({ (void)(d); false; }) -+ -+static inline int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d, -+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) -+{ -+ return -ENOSYS; -+} - #endif - - int __must_check iommu_suspend(void); -@@ -357,9 +366,6 @@ int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d, - XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); - #endif - --int iommu_do_domctl(struct xen_domctl *, struct domain *d, -- XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); -- - void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev); - - /* --- -2.35.1 - diff --git a/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch b/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch deleted file mode 100644 index 37b9005..0000000 --- a/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 4cf9a7c7bdb9d544fbac81105bbc1059ba3dd932 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 14:02:30 +0200 -Subject: [PATCH 12/51] IOMMU: make domctl handler tolerate NULL domain - -Besides the reporter's issue of hitting a NULL deref when !CONFIG_GDBSX, -XEN_DOMCTL_test_assign_device can legitimately end up having NULL passed -here, when the domctl was passed DOMID_INVALID. - -Fixes: 71e617a6b8f6 ("use is_iommu_enabled() where appropriate...") -Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Paul Durrant <paul@xen.org> -Reviewed-by: Juergen Gross <jgross@suse.com> -master commit: fa4d84e6dd3c3bfd23a525b75a5483d4ce15adbb -master date: 2022-04-26 10:25:54 +0200 ---- - xen/drivers/passthrough/iommu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c -index caaba62c8865..287f63fc736f 100644 ---- a/xen/drivers/passthrough/iommu.c -+++ b/xen/drivers/passthrough/iommu.c -@@ -535,7 +535,7 @@ int iommu_do_domctl( - { - int ret = -ENODEV; - -- if ( !is_iommu_enabled(d) ) -+ if ( !(d ? is_iommu_enabled(d) : iommu_enabled) ) - return -EOPNOTSUPP; - - #ifdef CONFIG_HAS_PCI --- -2.35.1 - diff --git a/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch b/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch deleted file mode 100644 index 8416c96..0000000 --- a/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch +++ /dev/null @@ -1,229 +0,0 @@ -From 838f6c211f7f05f107e1acdfb0977ab61ec0bf2e Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 14:03:20 +0200 -Subject: [PATCH 13/51] IOMMU/x86: disallow device assignment to PoD guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -While it is okay for IOMMU page tables to be set up for guests starting -in PoD mode, actual device assignment may only occur once all PoD -entries have been removed from the P2M. So far this was enforced only -for boot-time assignment, and only in the tool stack. - -Also use the new function to replace p2m_pod_entry_count(): Its unlocked -access to p2m->pod.entry_count wasn't really okay (irrespective of the -result being stale by the time the caller gets to see it). Nor was the -use of that function in line with the immediately preceding comment: A -PoD guest isn't just one with a non-zero entry count, but also one with -a non-empty cache (e.g. prior to actually launching the guest). - -To allow the tool stack to see a consistent snapshot of PoD state, move -the tail of XENMEM_{get,set}_pod_target handling into a function, adding -proper locking there. - -In libxl take the liberty to use the new local variable r also for a -pre-existing call into libxc. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: ad4312d764e8b40a1e45b64aac6d840a60c59f13 -master date: 2022-05-02 08:48:02 +0200 ---- - xen/arch/x86/mm.c | 6 +--- - xen/arch/x86/mm/p2m-pod.c | 43 ++++++++++++++++++++++++++++- - xen/common/vm_event.c | 2 +- - xen/drivers/passthrough/x86/iommu.c | 3 +- - xen/include/asm-x86/p2m.h | 21 +++++++------- - 5 files changed, 57 insertions(+), 18 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index e222d9aa98ee..4ee2de11051d 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4777,7 +4777,6 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - { - xen_pod_target_t target; - struct domain *d; -- struct p2m_domain *p2m; - - if ( copy_from_guest(&target, arg, 1) ) - return -EFAULT; -@@ -4812,10 +4811,7 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - } - else if ( rc >= 0 ) - { -- p2m = p2m_get_hostp2m(d); -- target.tot_pages = domain_tot_pages(d); -- target.pod_cache_pages = p2m->pod.count; -- target.pod_entries = p2m->pod.entry_count; -+ p2m_pod_get_mem_target(d, &target); - - if ( __copy_to_guest(arg, &target, 1) ) - { -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index d8d1a0ce7ed7..a3c9d8a97423 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -20,6 +20,7 @@ - */ - - #include <xen/event.h> -+#include <xen/iocap.h> - #include <xen/ioreq.h> - #include <xen/mm.h> - #include <xen/sched.h> -@@ -362,7 +363,10 @@ p2m_pod_set_mem_target(struct domain *d, unsigned long target) - - ASSERT( pod_target >= p2m->pod.count ); - -- ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/); -+ if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) -+ ret = -ENOTEMPTY; -+ else -+ ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/); - - out: - pod_unlock(p2m); -@@ -370,6 +374,23 @@ out: - return ret; - } - -+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target) -+{ -+ struct p2m_domain *p2m = p2m_get_hostp2m(d); -+ -+ ASSERT(is_hvm_domain(d)); -+ -+ pod_lock(p2m); -+ lock_page_alloc(p2m); -+ -+ target->tot_pages = domain_tot_pages(d); -+ target->pod_cache_pages = p2m->pod.count; -+ target->pod_entries = p2m->pod.entry_count; -+ -+ unlock_page_alloc(p2m); -+ pod_unlock(p2m); -+} -+ - int p2m_pod_empty_cache(struct domain *d) - { - struct p2m_domain *p2m = p2m_get_hostp2m(d); -@@ -1387,6 +1408,9 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn, - if ( !paging_mode_translate(d) ) - return -EINVAL; - -+ if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) -+ return -ENOTEMPTY; -+ - do { - rc = mark_populate_on_demand(d, gfn, chunk_order); - -@@ -1408,3 +1432,20 @@ void p2m_pod_init(struct p2m_domain *p2m) - for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) - p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN); - } -+ -+bool p2m_pod_active(const struct domain *d) -+{ -+ struct p2m_domain *p2m; -+ bool res; -+ -+ if ( !is_hvm_domain(d) ) -+ return false; -+ -+ p2m = p2m_get_hostp2m(d); -+ -+ pod_lock(p2m); -+ res = p2m->pod.entry_count | p2m->pod.count; -+ pod_unlock(p2m); -+ -+ return res; -+} -diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c -index 70ab3ba406ff..21d2f0edf727 100644 ---- a/xen/common/vm_event.c -+++ b/xen/common/vm_event.c -@@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec) - - rc = -EXDEV; - /* Disallow paging in a PoD guest */ -- if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) ) -+ if ( p2m_pod_active(d) ) - break; - - /* domain_pause() not required here, see XSA-99 */ -diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c -index a36a6bd4b249..dc9936e16930 100644 ---- a/xen/drivers/passthrough/x86/iommu.c -+++ b/xen/drivers/passthrough/x86/iommu.c -@@ -502,11 +502,12 @@ bool arch_iommu_use_permitted(const struct domain *d) - { - /* - * Prevent device assign if mem paging, mem sharing or log-dirty -- * have been enabled for this domain. -+ * have been enabled for this domain, or if PoD is still in active use. - */ - return d == dom_io || - (likely(!mem_sharing_enabled(d)) && - likely(!mem_paging_enabled(d)) && -+ likely(!p2m_pod_active(d)) && - likely(!p2m_get_hostp2m(d)->global_logdirty)); - } - -diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h -index 357a8087481e..f2af7a746ced 100644 ---- a/xen/include/asm-x86/p2m.h -+++ b/xen/include/asm-x86/p2m.h -@@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d); - * domain matches target */ - int p2m_pod_set_mem_target(struct domain *d, unsigned long target); - -+/* Obtain a consistent snapshot of PoD related domain state. */ -+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target); -+ -+/* Check whether PoD is (still) active in a domain. */ -+bool p2m_pod_active(const struct domain *d); -+ - /* Scan pod cache when offline/broken page triggered */ - int - p2m_pod_offline_or_broken_hit(struct page_info *p); -@@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct page_info *p); - void - p2m_pod_offline_or_broken_replace(struct page_info *p); - --static inline long p2m_pod_entry_count(const struct p2m_domain *p2m) --{ -- return p2m->pod.entry_count; --} -- - void p2m_pod_init(struct p2m_domain *p2m); - - #else -@@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(struct domain *d) - return 0; - } - -+static inline bool p2m_pod_active(const struct domain *d) -+{ -+ return false; -+} -+ - static inline int p2m_pod_offline_or_broken_hit(struct page_info *p) - { - return 0; -@@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_broken_replace(struct page_info *p) - ASSERT_UNREACHABLE(); - } - --static inline long p2m_pod_entry_count(const struct p2m_domain *p2m) --{ -- return 0; --} -- - static inline void p2m_pod_init(struct p2m_domain *p2m) {} - - #endif --- -2.35.1 - diff --git a/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch b/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch deleted file mode 100644 index 69049f1..0000000 --- a/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 9ebe2ba83644ec6cd33a93c68dab5f551adcbea0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 7 Jun 2022 14:04:16 +0200 -Subject: [PATCH 14/51] x86/msr: handle reads to MSR_P5_MC_{ADDR,TYPE} -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Windows Server 2019 Essentials will unconditionally attempt to read -P5_MC_ADDR MSR at boot and throw a BSOD if injected a #GP. - -Fix this by mapping MSR_P5_MC_{ADDR,TYPE} to -MSR_IA32_MCi_{ADDR,STATUS}, as reported also done by hardware in Intel -SDM "Mapping of the Pentium Processor Machine-Check Errors to the -Machine-Check Architecture" section. - -Reported-by: Steffen Einsle <einsle@phptrix.de> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: ce59e472b581e4923f6892172dde62b88c8aa8b7 -master date: 2022-05-02 08:49:12 +0200 ---- - xen/arch/x86/cpu/mcheck/mce.h | 6 ++++++ - xen/arch/x86/cpu/mcheck/mce_intel.c | 19 +++++++++++++++++++ - xen/arch/x86/cpu/mcheck/vmce.c | 2 ++ - xen/arch/x86/msr.c | 2 ++ - xen/include/asm-x86/msr-index.h | 3 +++ - 5 files changed, 32 insertions(+) - -diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h -index 195362691904..192315ecfa3d 100644 ---- a/xen/arch/x86/cpu/mcheck/mce.h -+++ b/xen/arch/x86/cpu/mcheck/mce.h -@@ -169,6 +169,12 @@ static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr) - if (msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) ) - return 1; -+ fallthrough; -+ -+ case X86_VENDOR_CENTAUR: -+ case X86_VENDOR_SHANGHAI: -+ if (msr == MSR_P5_MC_ADDR || msr == MSR_P5_MC_TYPE) -+ return 1; - break; - - case X86_VENDOR_AMD: -diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c -index bb9f3a3ff795..d364e9bf5ad1 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_intel.c -+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c -@@ -1001,8 +1001,27 @@ int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - - int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - { -+ const struct cpuid_policy *cp = v->domain->arch.cpuid; - unsigned int bank = msr - MSR_IA32_MC0_CTL2; - -+ switch ( msr ) -+ { -+ case MSR_P5_MC_ADDR: -+ /* -+ * Bank 0 is used for the 'bank 0 quirk' on older processors. -+ * See vcpu_fill_mc_msrs() for reference. -+ */ -+ *val = v->arch.vmce.bank[1].mci_addr; -+ return 1; -+ -+ case MSR_P5_MC_TYPE: -+ *val = v->arch.vmce.bank[1].mci_status; -+ return 1; -+ } -+ -+ if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ) -+ return 0; -+ - if ( bank < GUEST_MC_BANK_NUM ) - { - *val = v->arch.vmce.bank[bank].mci_ctl2; -diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c -index eb6434a3ba20..0899df58bcbf 100644 ---- a/xen/arch/x86/cpu/mcheck/vmce.c -+++ b/xen/arch/x86/cpu/mcheck/vmce.c -@@ -150,6 +150,8 @@ static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - default: - switch ( boot_cpu_data.x86_vendor ) - { -+ case X86_VENDOR_CENTAUR: -+ case X86_VENDOR_SHANGHAI: - case X86_VENDOR_INTEL: - ret = vmce_intel_rdmsr(v, msr, val); - break; -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index aaedb2c31287..da305c7aa4c9 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -282,6 +282,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - *val = msrs->misc_features_enables.raw; - break; - -+ case MSR_P5_MC_ADDR: -+ case MSR_P5_MC_TYPE: - case MSR_IA32_MCG_CAP ... MSR_IA32_MCG_CTL: /* 0x179 -> 0x17b */ - case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */ - case MSR_IA32_MCx_CTL(0) ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */ -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 3e038db618ff..31964b88af7a 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -15,6 +15,9 @@ - * abbreviated name. Exceptions will be considered on a case-by-case basis. - */ - -+#define MSR_P5_MC_ADDR 0 -+#define MSR_P5_MC_TYPE 0x00000001 -+ - #define MSR_APIC_BASE 0x0000001b - #define APIC_BASE_BSP (_AC(1, ULL) << 8) - #define APIC_BASE_EXTD (_AC(1, ULL) << 10) --- -2.35.1 - diff --git a/0015-kconfig-detect-LD-implementation.patch b/0015-kconfig-detect-LD-implementation.patch deleted file mode 100644 index 4507bc7..0000000 --- a/0015-kconfig-detect-LD-implementation.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 3754bd128d1a6b3d5864d1a3ee5d27b67d35387a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 7 Jun 2022 14:05:06 +0200 -Subject: [PATCH 15/51] kconfig: detect LD implementation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Detect GNU and LLVM ld implementations. This is required for further -patches that will introduce diverging behaviour depending on the -linker implementation in use. - -Note that LLVM ld returns "compatible with GNU linkers" as part of the -version string, so be on the safe side and use '^' to only match at -the start of the line in case LLVM ever decides to change the text to -use "compatible with GNU ld" instead. - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Michal Orzel <michal.orzel@arm.com> -Acked-by: Julien Grall <jgrall@amazon.com> -master commit: c70c4b624f85f7d4e28c70a804a0a3f20d73092b -master date: 2022-05-02 08:50:39 +0200 ---- - xen/Kconfig | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/Kconfig b/xen/Kconfig -index bcbd2758e5d3..0c89afd50fcf 100644 ---- a/xen/Kconfig -+++ b/xen/Kconfig -@@ -23,6 +23,12 @@ config CLANG_VERSION - int - default $(shell,$(BASEDIR)/scripts/clang-version.sh $(CC)) - -+config LD_IS_GNU -+ def_bool $(success,$(LD) --version | head -n 1 | grep -q "^GNU ld") -+ -+config LD_IS_LLVM -+ def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD") -+ - # -fvisibility=hidden reduces -fpic cost, if it's available - config CC_HAS_VISIBILITY_ATTRIBUTE - def_bool $(cc-option,-fvisibility=hidden) --- -2.35.1 - diff --git a/0016-linker-lld-do-not-generate-quoted-section-names.patch b/0016-linker-lld-do-not-generate-quoted-section-names.patch deleted file mode 100644 index 5b3a8cd..0000000 --- a/0016-linker-lld-do-not-generate-quoted-section-names.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 88b653f73928117461dc250acd1e830a47a14c2b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 7 Jun 2022 14:05:24 +0200 -Subject: [PATCH 16/51] linker/lld: do not generate quoted section names -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -LLVM LD doesn't strip the quotes from the section names, and so the -resulting binary ends up with section names like: - - [ 1] ".text" PROGBITS ffff82d040200000 00008000 - 000000000018cbc1 0000000000000000 AX 0 0 4096 - -This confuses some tools (like gdb) and prevents proper parsing of the -binary. - -The issue has already been reported and is being fixed in LLD. In -order to workaround this issue and keep the GNU ld support define -different DECL_SECTION macros depending on the used ld -implementation. - -Drop the quotes from the definitions of the debug sections in -DECL_DEBUG{2}, as those quotes are not required for GNU ld either. - -Fixes: 6254920587c3 ('x86: quote section names when defining them in linker script') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 702c9a800eb3ecd4b8595998d37a769d470c5bb0 -master date: 2022-05-02 08:51:45 +0200 ---- - xen/arch/x86/xen.lds.S | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S -index 4c58f3209c3d..bc9b9651b192 100644 ---- a/xen/arch/x86/xen.lds.S -+++ b/xen/arch/x86/xen.lds.S -@@ -18,7 +18,11 @@ ENTRY(efi_start) - #else /* !EFI */ - - #define FORMAT "elf64-x86-64" --#define DECL_SECTION(x) #x : AT(ADDR(#x) - __XEN_VIRT_START) -+#ifdef CONFIG_LD_IS_GNU -+# define DECL_SECTION(x) x : AT(ADDR(#x) - __XEN_VIRT_START) -+#else -+# define DECL_SECTION(x) x : AT(ADDR(x) - __XEN_VIRT_START) -+#endif - - ENTRY(start_pa) - --- -2.35.1 - diff --git a/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch b/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch deleted file mode 100644 index bc48a84..0000000 --- a/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 982a314bd3000a16c3128afadb36a8ff41029adc Mon Sep 17 00:00:00 2001 -From: Julien Grall <jgrall@amazon.com> -Date: Tue, 7 Jun 2022 14:06:11 +0200 -Subject: [PATCH 17/51] xen: io: Fix race between sending an I/O and domain - shutdown - -Xen provides hypercalls to shutdown (SCHEDOP_shutdown{,_code}) and -resume a domain (XEN_DOMCTL_resumedomain). They can be used for checkpoint -where the expectation is the domain should continue as nothing happened -afterwards. - -hvmemul_do_io() and handle_pio() will act differently if the return -code of hvm_send_ioreq() (resp. hvmemul_do_pio_buffer()) is X86EMUL_RETRY. - -In this case, the I/O state will be reset to STATE_IOREQ_NONE (i.e -no I/O is pending) and/or the PC will not be advanced. - -If the shutdown request happens right after the I/O was sent to the -IOREQ, then emulation code will end up to re-execute the instruction -and therefore forward again the same I/O (at least when reading IO port). - -This would be problem if the access has a side-effect. A dumb example, -is a device implementing a counter which is incremented by one for every -access. When running shutdown/resume in a loop, the value read by the -OS may not be the old value + 1. - -Add an extra boolean in the structure hvm_vcpu_io to indicate whether -the I/O was suspended. This is then used in place of checking the domain -is shutting down in hvmemul_do_io() and handle_pio() as they should -act on suspend (i.e. vcpu_start_shutdown_deferral() returns false) rather -than shutdown. - -Signed-off-by: Julien Grall <jgrall@amazon.com> -Reviewed-by: Paul Durrant <paul@xen.org> -master commit: b7e0d8978810b534725e94a321736496928f00a5 -master date: 2022-05-06 17:16:22 +0100 ---- - xen/arch/arm/ioreq.c | 3 ++- - xen/arch/x86/hvm/emulate.c | 3 ++- - xen/arch/x86/hvm/io.c | 7 ++++--- - xen/common/ioreq.c | 4 ++++ - xen/include/xen/sched.h | 5 +++++ - 5 files changed, 17 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c -index 308650b40051..fbccef212bf1 100644 ---- a/xen/arch/arm/ioreq.c -+++ b/xen/arch/arm/ioreq.c -@@ -80,9 +80,10 @@ enum io_state try_fwd_ioserv(struct cpu_user_regs *regs, - return IO_ABORT; - - vio->req = p; -+ vio->suspended = false; - - rc = ioreq_send(s, &p, 0); -- if ( rc != IO_RETRY || v->domain->is_shutting_down ) -+ if ( rc != IO_RETRY || vio->suspended ) - vio->req.state = STATE_IOREQ_NONE; - else if ( !ioreq_needs_completion(&vio->req) ) - rc = IO_HANDLED; -diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c -index 76a2ccfafe23..7da348b5d486 100644 ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -239,6 +239,7 @@ static int hvmemul_do_io( - ASSERT(p.count); - - vio->req = p; -+ vio->suspended = false; - - rc = hvm_io_intercept(&p); - -@@ -334,7 +335,7 @@ static int hvmemul_do_io( - else - { - rc = ioreq_send(s, &p, 0); -- if ( rc != X86EMUL_RETRY || currd->is_shutting_down ) -+ if ( rc != X86EMUL_RETRY || vio->suspended ) - vio->req.state = STATE_IOREQ_NONE; - else if ( !ioreq_needs_completion(&vio->req) ) - rc = X86EMUL_OKAY; -diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c -index 93f1d1503fa6..80915f27e488 100644 ---- a/xen/arch/x86/hvm/io.c -+++ b/xen/arch/x86/hvm/io.c -@@ -138,10 +138,11 @@ bool handle_pio(uint16_t port, unsigned int size, int dir) - - case X86EMUL_RETRY: - /* -- * We should not advance RIP/EIP if the domain is shutting down or -- * if X86EMUL_RETRY has been returned by an internal handler. -+ * We should not advance RIP/EIP if the vio was suspended (e.g. -+ * because the domain is shutting down) or if X86EMUL_RETRY has -+ * been returned by an internal handler. - */ -- if ( curr->domain->is_shutting_down || !vcpu_ioreq_pending(curr) ) -+ if ( vio->suspended || !vcpu_ioreq_pending(curr) ) - return false; - break; - -diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c -index d732dc045df9..42414b750bef 100644 ---- a/xen/common/ioreq.c -+++ b/xen/common/ioreq.c -@@ -1256,6 +1256,7 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p, - struct vcpu *curr = current; - struct domain *d = curr->domain; - struct ioreq_vcpu *sv; -+ struct vcpu_io *vio = &curr->io; - - ASSERT(s); - -@@ -1263,7 +1264,10 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p, - return ioreq_send_buffered(s, proto_p); - - if ( unlikely(!vcpu_start_shutdown_deferral(curr)) ) -+ { -+ vio->suspended = true; - return IOREQ_STATUS_RETRY; -+ } - - list_for_each_entry ( sv, - &s->ioreq_vcpu_list, -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 28146ee404e6..9671062360ac 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -159,6 +159,11 @@ enum vio_completion { - struct vcpu_io { - /* I/O request in flight to device model. */ - enum vio_completion completion; -+ /* -+ * Indicate whether the I/O was not handled because the domain -+ * is about to be paused. -+ */ -+ bool suspended; - ioreq_t req; - }; - --- -2.35.1 - diff --git a/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch b/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch deleted file mode 100644 index b20a99a..0000000 --- a/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 4890031d224262a6cf43d3bef1af4a16c13db306 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 14:06:51 +0200 -Subject: [PATCH 18/51] build: suppress GNU ld warning about RWX load segments - -We cannot really avoid such and we're also not really at risk because of -them, as we control page table permissions ourselves rather than relying -on a loader of some sort. Present GNU ld master started warning about -such, and hence 2.39 is anticipated to have this warning. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Julien Grall <jgrall@amazon.com> -master commit: 68f5aac012b9ae36ce9b65d9ca9cc9f232191ad3 -master date: 2022-05-18 11:17:19 +0200 ---- - xen/Makefile | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/xen/Makefile b/xen/Makefile -index ce4eca3ee4d7..4d9abe704628 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -260,6 +260,8 @@ endif - - AFLAGS += -D__ASSEMBLY__ - -+LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments -+ - CFLAGS += $(CFLAGS-y) - # allow extra CFLAGS externally via EXTRA_CFLAGS_XEN_CORE - CFLAGS += $(EXTRA_CFLAGS_XEN_CORE) --- -2.35.1 - diff --git a/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch b/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch deleted file mode 100644 index e4d739b..0000000 --- a/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 1bc669a568a9f4bdab9e9ddb95823ba370dc0baf Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 14:07:11 +0200 -Subject: [PATCH 19/51] build: silence GNU ld warning about executable stacks - -While for C files the compiler is supposed to arrange for emitting -respective information, for assembly sources we're responsible ourselves. -Present GNU ld master started warning about such, and hence 2.39 is -anticipated to have this warning. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Julien Grall <jgrall@amazon.com> -master commit: 62d22296a95d259c934ca2f39ac511d729cfbb68 -master date: 2022-05-18 11:18:45 +0200 ---- - xen/Makefile | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/xen/Makefile b/xen/Makefile -index 4d9abe704628..971028eda240 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -260,6 +260,8 @@ endif - - AFLAGS += -D__ASSEMBLY__ - -+$(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack) -+ - LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments - - CFLAGS += $(CFLAGS-y) --- -2.35.1 - diff --git a/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch b/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch deleted file mode 100644 index baa1e15..0000000 --- a/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch +++ /dev/null @@ -1,50 +0,0 @@ -From f1be0b62a03b90a40a03e21f965e4cbb89809bb1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - <marmarek@invisiblethingslab.com> -Date: Tue, 7 Jun 2022 14:07:34 +0200 -Subject: [PATCH 20/51] ns16550: use poll mode if INTERRUPT_LINE is 0xff -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Intel LPSS has INTERRUPT_LINE set to 0xff by default, that is declared -by the PCI Local Bus Specification Revision 3.0 (from 2004) as -"unknown"/"no connection". Fallback to poll mode in this case. -The 0xff handling is x86-specific, the surrounding code is guarded with -CONFIG_X86 anyway. - -Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 6a2ea1a2370a0c8a0210accac0ae62e68c185134 -master date: 2022-05-20 12:19:45 +0200 ---- - xen/drivers/char/ns16550.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c -index 30596d60d4ed..2d2bd2a02469 100644 ---- a/xen/drivers/char/ns16550.c -+++ b/xen/drivers/char/ns16550.c -@@ -1221,6 +1221,19 @@ pci_uart_config(struct ns16550 *uart, bool_t skip_amt, unsigned int idx) - pci_conf_read8(PCI_SBDF(0, b, d, f), - PCI_INTERRUPT_LINE) : 0; - -+#ifdef CONFIG_X86 -+ /* -+ * PCI Local Bus Specification Revision 3.0 defines 0xff value -+ * as special only for X86. -+ */ -+ if ( uart->irq == 0xff ) -+ uart->irq = 0; -+#endif -+ if ( !uart->irq ) -+ printk(XENLOG_INFO -+ "ns16550: %pp: no legacy IRQ, using poll mode\n", -+ &PCI_SBDF(0, b, d, f)); -+ - return 0; - } - } --- -2.35.1 - diff --git a/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch b/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch deleted file mode 100644 index 1312bda..0000000 --- a/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 8e11ec8fbf6f933f8854f4bc54226653316903f2 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 7 Jun 2022 14:08:06 +0200 -Subject: [PATCH 21/51] PCI: don't allow "pci-phantom=" to mark real devices as - phantom functions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -IOMMU code mapping / unmapping devices and interrupts will misbehave if -a wrong command line option declared a function "phantom" when there's a -real device at that position. Warn about this and adjust the specified -stride (in the worst case ignoring the option altogether). - -Requested-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 444b555dc9e09fa3ce90f066e0c88dec9b47f422 -master date: 2022-05-20 12:20:35 +0200 ---- - xen/drivers/passthrough/pci.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 395958698e6a..e0491c908f10 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -382,7 +382,24 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) - phantom_devs[i].slot == PCI_SLOT(devfn) && - phantom_devs[i].stride > PCI_FUNC(devfn) ) - { -- pdev->phantom_stride = phantom_devs[i].stride; -+ pci_sbdf_t sbdf = pdev->sbdf; -+ unsigned int stride = phantom_devs[i].stride; -+ -+ while ( (sbdf.fn += stride) > PCI_FUNC(devfn) ) -+ { -+ if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) == 0xffff && -+ pci_conf_read16(sbdf, PCI_DEVICE_ID) == 0xffff ) -+ continue; -+ stride <<= 1; -+ printk(XENLOG_WARNING -+ "%pp looks to be a real device; bumping %04x:%02x:%02x stride to %u\n", -+ &sbdf, phantom_devs[i].seg, -+ phantom_devs[i].bus, phantom_devs[i].slot, -+ stride); -+ sbdf = pdev->sbdf; -+ } -+ if ( PCI_FUNC(stride) ) -+ pdev->phantom_stride = stride; - break; - } - } --- -2.35.1 - diff --git a/0022-x86-pv-Clean-up-_get_page_type.patch b/0022-x86-pv-Clean-up-_get_page_type.patch deleted file mode 100644 index 0270beb..0000000 --- a/0022-x86-pv-Clean-up-_get_page_type.patch +++ /dev/null @@ -1,180 +0,0 @@ -From b152dfbc3ad71a788996440b18174d995c3bffc9 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:27:19 +0200 -Subject: [PATCH 22/51] x86/pv: Clean up _get_page_type() - -Various fixes for clarity, ahead of making complicated changes. - - * Split the overflow check out of the if/else chain for type handling, as - it's somewhat unrelated. - * Comment the main if/else chain to explain what is going on. Adjust one - ASSERT() and state the bit layout for validate-locked and partial states. - * Correct the comment about TLB flushing, as it's backwards. The problem - case is when writeable mappings are retained to a page becoming read-only, - as it allows the guest to bypass Xen's safety checks for updates. - * Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not - valid for use by subsequent logic. Switch to using ACCESS_ONCE() to treat - all reads as explicitly volatile. The only thing preventing the validated - wait-loop being infinite is the compiler barrier hidden in cpu_relax(). - * Replace one page_get_owner(page) with the already-calculated 'd' already in - scope. - -No functional change. - -This is part of XSA-401 / CVE-2022-26362. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> -master commit: 9186e96b199e4f7e52e033b238f9fe869afb69c7 -master date: 2022-06-09 14:20:36 +0200 ---- - xen/arch/x86/mm.c | 72 +++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 61 insertions(+), 11 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 4ee2de11051d..79ad7fdd2b82 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2906,16 +2906,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags, - static int _get_page_type(struct page_info *page, unsigned long type, - bool preemptible) - { -- unsigned long nx, x, y = page->u.inuse.type_info; -+ unsigned long nx, x; - int rc = 0; - - ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); - ASSERT(!in_irq()); - -- for ( ; ; ) -+ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; ) - { - x = y; - nx = x + 1; -+ - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - gdprintk(XENLOG_WARNING, -@@ -2923,8 +2924,15 @@ static int _get_page_type(struct page_info *page, unsigned long type, - mfn_x(page_to_mfn(page))); - return -EINVAL; - } -- else if ( unlikely((x & PGT_count_mask) == 0) ) -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) - { -+ /* -+ * Typeref 0 -> 1. -+ * -+ * Type changes are permitted when the typeref is 0. If the type -+ * actually changes, the page needs re-validating. -+ */ - struct domain *d = page_get_owner(page); - - if ( d && shadow_mode_enabled(d) ) -@@ -2935,8 +2943,8 @@ static int _get_page_type(struct page_info *page, unsigned long type, - { - /* - * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with mappings of a frame -- * which is about to become writeable to the guest. -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. - */ - cpumask_t *mask = this_cpu(scratch_cpumask); - -@@ -2948,7 +2956,7 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!cpumask_empty(mask)) && - /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(page_get_owner(page)) || -+ (!shadow_mode_enabled(d) || - ((nx & PGT_type_mask) == PGT_writable_page)) ) - { - perfc_incr(need_flush_tlb_flush); -@@ -2979,7 +2987,14 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) - { -- /* Don't log failure if it could be a recursive-mapping attempt. */ -+ /* -+ * else, we're trying to take a new reference, of the wrong type. -+ * -+ * This (being able to prohibit use of the wrong type) is what the -+ * typeref system exists for, but skip printing the failure if it -+ * looks like a recursive mapping, as subsequent logic might -+ * ultimately permit the attempt. -+ */ - if ( ((x & PGT_type_mask) == PGT_l2_page_table) && - (type == PGT_l1_page_table) ) - return -EINVAL; -@@ -2998,18 +3013,46 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely(!(x & PGT_validated)) ) - { -+ /* -+ * else, the count is non-zero, and we're grabbing the right type; -+ * but the page hasn't been validated yet. -+ * -+ * The page is in one of two states (depending on PGT_partial), -+ * and should have exactly one reference. -+ */ -+ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1)); -+ - if ( !(x & PGT_partial) ) - { -- /* Someone else is updating validation of this page. Wait... */ -+ /* -+ * The page has been left in the "validate locked" state -+ * (i.e. PGT_[type] | 1) which means that a concurrent caller -+ * of _get_page_type() is in the middle of validation. -+ * -+ * Spin waiting for the concurrent user to complete (partial -+ * or fully validated), then restart our attempt to acquire a -+ * type reference. -+ */ - do { - if ( preemptible && hypercall_preempt_check() ) - return -EINTR; - cpu_relax(); -- } while ( (y = page->u.inuse.type_info) == x ); -+ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x ); - continue; - } -- /* Type ref count was left at 1 when PGT_partial got set. */ -- ASSERT((x & PGT_count_mask) == 1); -+ -+ /* -+ * The page has been left in the "partial" state -+ * (i.e., PGT_[type] | PGT_partial | 1). -+ * -+ * Rather than bumping the type count, we need to try to grab the -+ * validation lock; if we succeed, we need to validate the page, -+ * then drop the general ref associated with the PGT_partial bit. -+ * -+ * We grab the validation lock by setting nx to (PGT_[type] | 1) -+ * (i.e., non-zero type count, neither PGT_validated nor -+ * PGT_partial set). -+ */ - nx = x & ~PGT_partial; - } - -@@ -3058,6 +3101,13 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - - out: -+ /* -+ * Did we drop the PGT_partial bit when acquiring the typeref? If so, -+ * drop the general reference that went along with it. -+ * -+ * N.B. validate_page() may have have re-set PGT_partial, not reflected in -+ * nx, but will have taken an extra ref when doing so. -+ */ - if ( (x & PGT_partial) && !(nx & PGT_partial) ) - put_page(page); - --- -2.35.1 - diff --git a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch deleted file mode 100644 index 1e3febd..0000000 --- a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch +++ /dev/null @@ -1,201 +0,0 @@ -From 8dab3f79b122e69cbcdebca72cdc14f004ee2193 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:27:37 +0200 -Subject: [PATCH 23/51] x86/pv: Fix ABAC cmpxchg() race in _get_page_type() - -_get_page_type() suffers from a race condition where it incorrectly assumes -that because 'x' was read and a subsequent a cmpxchg() succeeds, the type -cannot have changed in-between. Consider: - -CPU A: - 1. Creates an L2e referencing pg - `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page - 2. Issues flush_tlb_mask() -CPU B: - 3. Creates a writeable mapping of pg - `-> _get_page_type(pg, PGT_writable_page), count increases to 1 - 4. Writes into new mapping, creating a TLB entry for pg - 5. Removes the writeable mapping of pg - `-> _put_page_type(pg), count goes back down to 0 -CPU A: - 7. Issues cmpxchg(), setting count 1, type PGT_l1_page_table - -CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and -suitably protected (i.e. read-only). The TLB flush in step 2 must be deferred -until after the guest is prohibited from creating new writeable mappings, -which is after step 7. - -Defer all safety actions until after the cmpxchg() has successfully taken the -intended typeref, because that is what prevents concurrent users from using -the old type. - -Also remove the early validation for writeable and shared pages. This removes -race conditions where one half of a parallel mapping attempt can return -successfully before: - * The IOMMU pagetables are in sync with the new page type - * Writeable mappings to shared pages have been torn down - -This is part of XSA-401 / CVE-2022-26362. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> -master commit: 8cc5036bc385112a82f1faff27a0970e6440dfed -master date: 2022-06-09 14:21:04 +0200 ---- - xen/arch/x86/mm.c | 116 ++++++++++++++++++++++++++-------------------- - 1 file changed, 67 insertions(+), 49 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 79ad7fdd2b82..c6429b0f749a 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2933,56 +2933,12 @@ static int _get_page_type(struct page_info *page, unsigned long type, - * Type changes are permitted when the typeref is 0. If the type - * actually changes, the page needs re-validating. - */ -- struct domain *d = page_get_owner(page); -- -- if ( d && shadow_mode_enabled(d) ) -- shadow_prepare_page_type_change(d, page, type); - - ASSERT(!(x & PGT_pae_xen_l2)); - if ( (x & PGT_type_mask) != type ) - { -- /* -- * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with writeable mappings -- * to a frame which is intending to become pgtable/segdesc. -- */ -- cpumask_t *mask = this_cpu(scratch_cpumask); -- -- BUG_ON(in_irq()); -- cpumask_copy(mask, d->dirty_cpumask); -- -- /* Don't flush if the timestamp is old enough */ -- tlbflush_filter(mask, page->tlbflush_timestamp); -- -- if ( unlikely(!cpumask_empty(mask)) && -- /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(d) || -- ((nx & PGT_type_mask) == PGT_writable_page)) ) -- { -- perfc_incr(need_flush_tlb_flush); -- /* -- * If page was a page table make sure the flush is -- * performed using an IPI in order to avoid changing the -- * type of a page table page under the feet of -- * spurious_page_fault(). -- */ -- flush_mask(mask, -- (x & PGT_type_mask) && -- (x & PGT_type_mask) <= PGT_root_page_table -- ? FLUSH_TLB | FLUSH_FORCE_IPI -- : FLUSH_TLB); -- } -- -- /* We lose existing type and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); - nx |= type; -- -- /* -- * No special validation needed for writable pages. -- * Page tables and GDT/LDT need to be scanned for validity. -- */ -- if ( type == PGT_writable_page || type == PGT_shared_page ) -- nx |= PGT_validated; - } - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) -@@ -3063,6 +3019,56 @@ static int _get_page_type(struct page_info *page, unsigned long type, - return -EINTR; - } - -+ /* -+ * One typeref has been taken and is now globally visible. -+ * -+ * The page is either in the "validate locked" state (PGT_[type] | 1) or -+ * fully validated (PGT_[type] | PGT_validated | >0). -+ */ -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) -+ { -+ struct domain *d = page_get_owner(page); -+ -+ if ( d && shadow_mode_enabled(d) ) -+ shadow_prepare_page_type_change(d, page, type); -+ -+ if ( (x & PGT_type_mask) != type ) -+ { -+ /* -+ * On type change we check to flush stale TLB entries. It is -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. -+ */ -+ cpumask_t *mask = this_cpu(scratch_cpumask); -+ -+ BUG_ON(in_irq()); -+ cpumask_copy(mask, d->dirty_cpumask); -+ -+ /* Don't flush if the timestamp is old enough */ -+ tlbflush_filter(mask, page->tlbflush_timestamp); -+ -+ if ( unlikely(!cpumask_empty(mask)) && -+ /* Shadow mode: track only writable pages. */ -+ (!shadow_mode_enabled(d) || -+ ((nx & PGT_type_mask) == PGT_writable_page)) ) -+ { -+ perfc_incr(need_flush_tlb_flush); -+ /* -+ * If page was a page table make sure the flush is -+ * performed using an IPI in order to avoid changing the -+ * type of a page table page under the feet of -+ * spurious_page_fault(). -+ */ -+ flush_mask(mask, -+ (x & PGT_type_mask) && -+ (x & PGT_type_mask) <= PGT_root_page_table -+ ? FLUSH_TLB | FLUSH_FORCE_IPI -+ : FLUSH_TLB); -+ } -+ } -+ } -+ - if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) != - (type == PGT_writable_page)) ) - { -@@ -3091,13 +3097,25 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!(nx & PGT_validated)) ) - { -- if ( !(x & PGT_partial) ) -+ /* -+ * No special validation needed for writable or shared pages. Page -+ * tables and GDT/LDT need to have their contents audited. -+ * -+ * per validate_page(), non-atomic updates are fine here. -+ */ -+ if ( type == PGT_writable_page || type == PGT_shared_page ) -+ page->u.inuse.type_info |= PGT_validated; -+ else - { -- page->nr_validated_ptes = 0; -- page->partial_flags = 0; -- page->linear_pt_count = 0; -+ if ( !(x & PGT_partial) ) -+ { -+ page->nr_validated_ptes = 0; -+ page->partial_flags = 0; -+ page->linear_pt_count = 0; -+ } -+ -+ rc = validate_page(page, type, preemptible); - } -- rc = validate_page(page, type, preemptible); - } - - out: --- -2.35.1 - diff --git a/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch b/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch deleted file mode 100644 index 409b72f..0000000 --- a/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 9cfd796ae05421ded8e4f70b2c55352491cfa841 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:27:53 +0200 -Subject: [PATCH 24/51] x86/page: Introduce _PAGE_* constants for memory types - -... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_* -constants. These are going to be needed by forthcoming logic. - -No functional change. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 1be8707c75bf4ba68447c74e1618b521dd432499 -master date: 2022-06-09 14:21:38 +0200 ---- - xen/include/asm-x86/page.h | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h -index 1d080cffbe84..2e542050f65a 100644 ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - - #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) - -+/* Memory types, encoded under Xen's choice of MSR_PAT. */ -+#define _PAGE_WB ( 0) -+#define _PAGE_WT ( _PAGE_PWT) -+#define _PAGE_UCM ( _PAGE_PCD ) -+#define _PAGE_UC ( _PAGE_PCD | _PAGE_PWT) -+#define _PAGE_WC (_PAGE_PAT ) -+#define _PAGE_WP (_PAGE_PAT | _PAGE_PWT) -+ - /* - * Debug option: Ensure that granted mappings are not implicitly unmapped. - * WARNING: This will need to be disabled to run OSes that use the spare PTE -@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - #define __PAGE_HYPERVISOR_RX (_PAGE_PRESENT | _PAGE_ACCESSED) - #define __PAGE_HYPERVISOR (__PAGE_HYPERVISOR_RX | \ - _PAGE_DIRTY | _PAGE_RW) --#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD) --#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT) -+#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM) -+#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_UC) - #define __PAGE_HYPERVISOR_SHSTK (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY) - - #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */ --- -2.35.1 - diff --git a/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch b/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch deleted file mode 100644 index 0a24a0a..0000000 --- a/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch +++ /dev/null @@ -1,223 +0,0 @@ -From 74193f4292d9cfc2874866e941d9939d8f33fcef Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:28:23 +0200 -Subject: [PATCH 25/51] x86: Don't change the cacheability of the directmap - -Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings -in response to guest mapping requests") attempted to keep the cacheability -consistent between different mappings of the same page. - -The reason wasn't described in the changelog, but it is understood to be in -regards to a concern over machine check exceptions, owing to errata when using -mixed cacheabilities. It did this primarily by updating Xen's mapping of the -page in the direct map when the guest mapped a page with reduced cacheability. - -Unfortunately, the logic didn't actually prevent mixed cacheability from -occurring: - * A guest could map a page normally, and then map the same page with - different cacheability; nothing prevented this. - * The cacheability of the directmap was always latest-takes-precedence in - terms of guest requests. - * Grant-mapped frames with lesser cacheability didn't adjust the page's - cacheattr settings. - * The map_domain_page() function still unconditionally created WB mappings, - irrespective of the page's cacheattr settings. - -Additionally, update_xen_mappings() had a bug where the alias calculation was -wrong for mfn's which were .init content, which should have been treated as -fully guest pages, not Xen pages. - -Worse yet, the logic introduced a vulnerability whereby necessary -pagetable/segdesc adjustments made by Xen in the validation logic could become -non-coherent between the cache and main memory. The CPU could subsequently -operate on the stale value in the cache, rather than the safe value in main -memory. - -The directmap contains primarily mappings of RAM. PAT/MTRR conflict -resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser -cacheability resolves to being coherent. The special case is WC mappings, -which are non-coherent against MTRR=WB regions (except for fully-coherent -CPUs). - -Xen must not have any WC cacheability in the directmap, to prevent Xen's -actions from creating non-coherency. (Guest actions creating non-coherency is -dealt with in subsequent patches.) As all memory types for MTRR=WB ranges -inter-operate coherently, so leave Xen's directmap mappings as WB. - -Only PV guests with access to devices can use reduced-cacheability mappings to -begin with, and they're trusted not to mount DoSs against the system anyway. - -Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them. -Shift the later PGC_* constants up, to gain 3 extra bits in the main reference -count. Retain the check in get_page_from_l1e() for special_pages() because a -guest has no business using reduced cacheability on these. - -This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0 - -This is CVE-2022-26363, part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> -master commit: ae09597da34aee6bc5b76475c5eea6994457e854 -master date: 2022-06-09 14:22:08 +0200 ---- - xen/arch/x86/mm.c | 84 ++++------------------------------------ - xen/include/asm-x86/mm.h | 23 +++++------ - 2 files changed, 17 insertions(+), 90 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index c6429b0f749a..ab32d13a1a0d 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn) - return (page_get_owner(page) == dom_io); - } - --static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr) --{ -- int err = 0; -- bool alias = mfn >= PFN_DOWN(xen_phys_start) && -- mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START); -- unsigned long xen_va = -- XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT); -- -- if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) ) -- return 0; -- -- if ( unlikely(alias) && cacheattr ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0); -- if ( !err ) -- err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1, -- PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr)); -- if ( unlikely(alias) && !cacheattr && !err ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR); -- -- return err; --} -- - #ifndef NDEBUG - struct mmio_emul_range_ctxt { - const struct domain *d; -@@ -1009,47 +987,14 @@ get_page_from_l1e( - goto could_not_pin; - } - -- if ( pte_flags_to_cacheattr(l1f) != -- ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) ) -+ if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) ) - { -- unsigned long x, nx, y = page->count_info; -- unsigned long cacheattr = pte_flags_to_cacheattr(l1f); -- int err; -- -- if ( is_special_page(page) ) -- { -- if ( write ) -- put_page_type(page); -- put_page(page); -- gdprintk(XENLOG_WARNING, -- "Attempt to change cache attributes of Xen heap page\n"); -- return -EACCES; -- } -- -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base); -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- err = update_xen_mappings(mfn, cacheattr); -- if ( unlikely(err) ) -- { -- cacheattr = y & PGC_cacheattr_mask; -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | cacheattr; -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- if ( write ) -- put_page_type(page); -- put_page(page); -- -- gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn -- " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n", -- mfn, get_gpfn_from_mfn(mfn), -- l1e_get_intpte(l1e), l1e_owner->domain_id); -- return err; -- } -+ if ( write ) -+ put_page_type(page); -+ put_page(page); -+ gdprintk(XENLOG_WARNING, -+ "Attempt to change cache attributes of Xen heap page\n"); -+ return -EACCES; - } - - return 0; -@@ -2467,24 +2412,9 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, - */ - static int cleanup_page_mappings(struct page_info *page) - { -- unsigned int cacheattr = -- (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base; - int rc = 0; - unsigned long mfn = mfn_x(page_to_mfn(page)); - -- /* -- * If we've modified xen mappings as a result of guest cache -- * attributes, restore them to the "normal" state. -- */ -- if ( unlikely(cacheattr) ) -- { -- page->count_info &= ~PGC_cacheattr_mask; -- -- BUG_ON(is_special_page(page)); -- -- rc = update_xen_mappings(mfn, 0); -- } -- - /* - * If this may be in a PV domain's IOMMU, remove it. - * -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index cb9052749963..8a9a43bb0a9d 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -69,25 +69,22 @@ - /* Set when is using a page as a page table */ - #define _PGC_page_table PG_shift(3) - #define PGC_page_table PG_mask(1, 3) -- /* 3-bit PAT/PCD/PWT cache-attribute hint. */ --#define PGC_cacheattr_base PG_shift(6) --#define PGC_cacheattr_mask PG_mask(7, 6) - /* Page is broken? */ --#define _PGC_broken PG_shift(7) --#define PGC_broken PG_mask(1, 7) -+#define _PGC_broken PG_shift(4) -+#define PGC_broken PG_mask(1, 4) - /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */ --#define PGC_state PG_mask(3, 9) --#define PGC_state_inuse PG_mask(0, 9) --#define PGC_state_offlining PG_mask(1, 9) --#define PGC_state_offlined PG_mask(2, 9) --#define PGC_state_free PG_mask(3, 9) -+#define PGC_state PG_mask(3, 6) -+#define PGC_state_inuse PG_mask(0, 6) -+#define PGC_state_offlining PG_mask(1, 6) -+#define PGC_state_offlined PG_mask(2, 6) -+#define PGC_state_free PG_mask(3, 6) - #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) - /* Page is not reference counted (see below for caveats) */ --#define _PGC_extra PG_shift(10) --#define PGC_extra PG_mask(1, 10) -+#define _PGC_extra PG_shift(7) -+#define PGC_extra PG_mask(1, 7) - - /* Count of references to this frame. */ --#define PGC_count_width PG_shift(10) -+#define PGC_count_width PG_shift(7) - #define PGC_count_mask ((1UL<<PGC_count_width)-1) - - /* --- -2.35.1 - diff --git a/0026-x86-Split-cache_flush-out-of-cache_writeback.patch b/0026-x86-Split-cache_flush-out-of-cache_writeback.patch deleted file mode 100644 index 50f70f4..0000000 --- a/0026-x86-Split-cache_flush-out-of-cache_writeback.patch +++ /dev/null @@ -1,294 +0,0 @@ -From 8eafa2d871ae51d461256e4a14175e24df330c70 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:28:48 +0200 -Subject: [PATCH 26/51] x86: Split cache_flush() out of cache_writeback() - -Subsequent changes will want a fully flushing version. - -Use the new helper rather than opencoding it in flush_area_local(). This -resolves an outstanding issue where the conditional sfence is on the wrong -side of the clflushopt loop. clflushopt is ordered with respect to older -stores, not to younger stores. - -Rename gnttab_cache_flush()'s helper to avoid colliding in name. -grant_table.c can see the prototype from cache.h so the build fails -otherwise. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 9a67ffee3371506e1cbfdfff5b90658d4828f6a2 -master date: 2022-06-09 14:22:38 +0200 ---- - xen/arch/x86/flushtlb.c | 84 ++++++++++++++++++++++++--- - xen/common/grant_table.c | 4 +- - xen/drivers/passthrough/vtd/extern.h | 1 - - xen/drivers/passthrough/vtd/iommu.c | 53 +---------------- - xen/drivers/passthrough/vtd/x86/vtd.c | 5 -- - xen/include/asm-x86/cache.h | 7 +++ - 6 files changed, 88 insertions(+), 66 deletions(-) - -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 25798df50f54..0c912b8669f8 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - if ( flags & FLUSH_CACHE ) - { - const struct cpuinfo_x86 *c = ¤t_cpu_data; -- unsigned long i, sz = 0; -+ unsigned long sz = 0; - - if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) - sz = 1UL << (order + PAGE_SHIFT); -@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - c->x86_clflush_size && c->x86_cache_size && sz && - ((sz >> 10) < c->x86_cache_size) ) - { -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -- for ( i = 0; i < sz; i += c->x86_clflush_size ) -- alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";" -- " clflush %0", -- "data16 clflush %0", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- "m" (((const char *)va)[i])); -+ cache_flush(va, sz); - flags &= ~FLUSH_CACHE; - } - else -@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+void cache_flush(const void *addr, unsigned int size) -+{ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ const void *end = addr + size; -+ -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+ /* -+ * Note regarding the "ds" prefix use: it's faster to do a clflush -+ * + prefix than a clflush + nop, and hence the prefix is added instead -+ * of letting the alternative framework fill the gap by appending nops. -+ */ -+ alternative_io("ds; clflush %[p]", -+ "data16 clflush %[p]", /* clflushopt */ -+ X86_FEATURE_CLFLUSHOPT, -+ /* no outputs */, -+ [p] "m" (*(const char *)(addr))); -+ } -+ -+ alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+} -+ -+void cache_writeback(const void *addr, unsigned int size) -+{ -+ unsigned int clflush_size; -+ const void *end = addr + size; -+ -+ /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */ -+ if ( !boot_cpu_has(X86_FEATURE_CLWB) ) -+ return cache_flush(addr, size); -+ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+/* -+ * The arguments to a macro must not include preprocessor directives. Doing so -+ * results in undefined behavior, so we have to create some defines here in -+ * order to avoid it. -+ */ -+#if defined(HAVE_AS_CLWB) -+# define CLWB_ENCODING "clwb %[p]" -+#elif defined(HAVE_AS_XSAVEOPT) -+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ -+#else -+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ -+#endif -+ -+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) -+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) -+# define INPUT BASE_INPUT -+#else -+# define INPUT(addr) "a" (addr), BASE_INPUT(addr) -+#endif -+ -+ asm volatile (CLWB_ENCODING :: INPUT(addr)); -+ -+#undef INPUT -+#undef BASE_INPUT -+#undef CLWB_ENCODING -+ } -+ -+ asm volatile ("sfence" ::: "memory"); -+} -+ - unsigned int guest_flush_tlb_flags(const struct domain *d) - { - bool shadow = paging_mode_shadow(d); -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 66f8ce71741c..4c742cd8fe81 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop, - return 0; - } - --static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) -+static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - { - struct domain *d, *owner; - struct page_info *page; -@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop, - return -EFAULT; - for ( ; ; ) - { -- int ret = cache_flush(&op, cur_ref); -+ int ret = _cache_flush(&op, cur_ref); - - if ( ret < 0 ) - return ret; -diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h -index 01e010a10d61..401079299725 100644 ---- a/xen/drivers/passthrough/vtd/extern.h -+++ b/xen/drivers/passthrough/vtd/extern.h -@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu, - struct pci_dev *pdev, - u16 did, u16 size, u64 addr); - --unsigned int get_cache_line_size(void); - void flush_all_cache(void); - - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node); -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 8975c1de61bc..bc377c9bcfa4 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -31,6 +31,7 @@ - #include <xen/pci.h> - #include <xen/pci_regs.h> - #include <xen/keyhandler.h> -+#include <asm/cache.h> - #include <asm/msi.h> - #include <asm/nops.h> - #include <asm/irq.h> -@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d, - } - } - --static void sync_cache(const void *addr, unsigned int size) --{ -- static unsigned long clflush_size = 0; -- const void *end = addr + size; -- -- if ( clflush_size == 0 ) -- clflush_size = get_cache_line_size(); -- -- addr -= (unsigned long)addr & (clflush_size - 1); -- for ( ; addr < end; addr += clflush_size ) --/* -- * The arguments to a macro must not include preprocessor directives. Doing so -- * results in undefined behavior, so we have to create some defines here in -- * order to avoid it. -- */ --#if defined(HAVE_AS_CLWB) --# define CLWB_ENCODING "clwb %[p]" --#elif defined(HAVE_AS_XSAVEOPT) --# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ --#else --# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ --#endif -- --#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) --#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) --# define INPUT BASE_INPUT --#else --# define INPUT(addr) "a" (addr), BASE_INPUT(addr) --#endif -- /* -- * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush -- * + prefix than a clflush + nop, and hence the prefix is added instead -- * of letting the alternative framework fill the gap by appending nops. -- */ -- alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]", -- "data16 clflush %[p]", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- CLWB_ENCODING, -- X86_FEATURE_CLWB, /* no outputs */, -- INPUT(addr)); --#undef INPUT --#undef BASE_INPUT --#undef CLWB_ENCODING -- -- alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT, -- "sfence", X86_FEATURE_CLWB); --} -- - /* Allocate page table, return its machine address */ - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - { -@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - clear_page(vaddr); - - if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache ) -- sync_cache(vaddr, PAGE_SIZE); -+ cache_writeback(vaddr, PAGE_SIZE); - unmap_domain_page(vaddr); - cur_pg++; - } -@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - iommu->nr_pt_levels = agaw_to_level(agaw); - - if ( !ecap_coherent(iommu->ecap) ) -- vtd_ops.sync_cache = sync_cache; -+ vtd_ops.sync_cache = cache_writeback; - - /* allocate domain id bitmap */ - iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); -diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c -index 6681dccd6970..55f0faa521cb 100644 ---- a/xen/drivers/passthrough/vtd/x86/vtd.c -+++ b/xen/drivers/passthrough/vtd/x86/vtd.c -@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va) - unmap_domain_page(va); - } - --unsigned int get_cache_line_size(void) --{ -- return ((cpuid_ebx(1) >> 8) & 0xff) * 8; --} -- - void flush_all_cache() - { - wbinvd(); -diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h -index 1f7173d8c72c..e4770efb22b9 100644 ---- a/xen/include/asm-x86/cache.h -+++ b/xen/include/asm-x86/cache.h -@@ -11,4 +11,11 @@ - - #define __read_mostly __section(".data.read_mostly") - -+#ifndef __ASSEMBLY__ -+ -+void cache_flush(const void *addr, unsigned int size); -+void cache_writeback(const void *addr, unsigned int size); -+ -+#endif -+ - #endif --- -2.35.1 - diff --git a/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch b/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch deleted file mode 100644 index 060bc99..0000000 --- a/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch +++ /dev/null @@ -1,95 +0,0 @@ -From c4815be949aae6583a9a22897beb96b095b4f1a2 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:29:13 +0200 -Subject: [PATCH 27/51] x86/amd: Work around CLFLUSH ordering on older parts - -On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything, -including reads and writes to the address, and LFENCE/SFENCE instructions. - -This creates a multitude of problematic corner cases, laid out in the manual. -Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 062868a5a8b428b85db589fa9a6d6e43969ffeb9 -master date: 2022-06-09 14:23:07 +0200 ---- - xen/arch/x86/cpu/amd.c | 8 ++++++++ - xen/arch/x86/flushtlb.c | 13 ++++++++++++- - xen/include/asm-x86/cpufeatures.h | 1 + - 3 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index a8e37dbb1f5c..b3b9a0df5fed 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c) - if (!cpu_has_lfence_dispatch) - __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); - -+ /* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with -+ * everything, including reads and writes to address, and -+ * LFENCE/SFENCE instructions. -+ */ -+ if (!cpu_has_clflushopt) -+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE); -+ - switch(c->x86) - { - case 0xf ... 0x11: -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 0c912b8669f8..dcbb4064012e 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+/* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything, -+ * including reads and writes to address, and LFENCE/SFENCE instructions. -+ * -+ * This function only works safely after alternatives have run. Luckily, at -+ * the time of writing, we don't flush the caches that early. -+ */ - void cache_flush(const void *addr, unsigned int size) - { - /* -@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size) - unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; - const void *end = addr + size; - -+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE); -+ - addr -= (unsigned long)addr & (clflush_size - 1); - for ( ; addr < end; addr += clflush_size ) - { -@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size) - [p] "m" (*(const char *)(addr))); - } - -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+ alternative_2("", -+ "sfence", X86_FEATURE_CLFLUSHOPT, -+ "mfence", X86_BUG_CLFLUSH_MFENCE); - } - - void cache_writeback(const void *addr, unsigned int size) -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 7413febd7ad8..ff3157d52d13 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch - - #define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */ - #define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */ -+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ - - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ --- -2.35.1 - diff --git a/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch b/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch deleted file mode 100644 index af60348..0000000 --- a/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch +++ /dev/null @@ -1,160 +0,0 @@ -From dc020d8d1ba420e2dd0e7a40f5045db897f3c4f4 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 9 Jun 2022 15:29:38 +0200 -Subject: [PATCH 28/51] x86/pv: Track and flush non-coherent mappings of RAM - -There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with -devices that make non-coherent writes. The Linux sound subsystem makes -extensive use of this technique. - -For such usecases, the guest's DMA buffer is mapped and consistently used as -WC, and Xen doesn't interact with the buffer. - -However, a mischevious guest can use WC mappings to deliberately create -non-coherency between the cache and RAM, and use this to trick Xen into -validating a pagetable which isn't actually safe. - -Allocate a new PGT_non_coherent to track the non-coherency of mappings. Set -it whenever a non-coherent writeable mapping is created. If the page is used -as anything other than PGT_writable_page, force a cache flush before -validation. Also force a cache flush before the page is returned to the heap. - -This is CVE-2022-26364, part of XSA-402. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: c1c9cae3a9633054b177c5de21ad7268162b2f2c -master date: 2022-06-09 14:23:37 +0200 ---- - xen/arch/x86/mm.c | 38 +++++++++++++++++++++++++++++++++++ - xen/arch/x86/pv/grant_table.c | 21 +++++++++++++++++++ - xen/include/asm-x86/mm.h | 6 +++++- - 3 files changed, 64 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ab32d13a1a0d..bab9624fabb7 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -997,6 +997,15 @@ get_page_from_l1e( - return -EACCES; - } - -+ /* -+ * Track writeable non-coherent mappings to RAM pages, to trigger a cache -+ * flush later if the target is used as anything but a PGT_writeable page. -+ * We care about all writeable mappings, including foreign mappings. -+ */ -+ if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) && -+ (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) ) -+ set_bit(_PGT_non_coherent, &page->u.inuse.type_info); -+ - return 0; - - could_not_pin: -@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page) - } - } - -+ /* -+ * Flush the cache if there were previously non-coherent writeable -+ * mappings of this page. This forces the page to be coherent before it -+ * is freed back to the heap. -+ */ -+ if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ } -+ - return rc; - } - -@@ -3027,6 +3049,22 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!(nx & PGT_validated)) ) - { -+ /* -+ * Flush the cache if there were previously non-coherent mappings of -+ * this page, and we're trying to use it as anything other than a -+ * writeable page. This forces the page to be coherent before we -+ * validate its contents for safety. -+ */ -+ if ( (nx & PGT_non_coherent) && type != PGT_writable_page ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ -+ page->u.inuse.type_info &= ~PGT_non_coherent; -+ } -+ - /* - * No special validation needed for writable or shared pages. Page - * tables and GDT/LDT need to have their contents audited. -diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c -index 0325618c9883..81c72e61ed55 100644 ---- a/xen/arch/x86/pv/grant_table.c -+++ b/xen/arch/x86/pv/grant_table.c -@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame, - - ol1e = *pl1e; - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * We always create mappings in this path. However, our caller, -+ * map_grant_ref(), only passes potentially non-zero cache_flags for -+ * MMIO frames, so this path doesn't create non-coherent mappings of -+ * RAM frames and there's no need to calculate PGT_non_coherent. -+ */ -+ ASSERT(!cache_flags || is_iomem_page(frame)); -+ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame, - l1e_get_flags(ol1e), addr, grant_pte_flags); - - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * Generally, replace_grant_pv_mapping() is used to destroy mappings -+ * (n1le = l1e_empty()), but it can be a present mapping on the -+ * GNTABOP_unmap_and_replace path. -+ * -+ * In such cases, the PTE is fully transplanted from its old location -+ * via steal_linear_addr(), so we need not perform PGT_non_coherent -+ * checking here. -+ */ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 8a9a43bb0a9d..7464167ae192 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -53,8 +53,12 @@ - #define _PGT_partial PG_shift(8) - #define PGT_partial PG_mask(1, 8) - -+/* Has this page been mapped writeable with a non-coherent memory type? */ -+#define _PGT_non_coherent PG_shift(9) -+#define PGT_non_coherent PG_mask(1, 9) -+ - /* Count of uses of this frame as its current type. */ --#define PGT_count_width PG_shift(8) -+#define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL<<PGT_count_width)-1) - - /* Are the 'type mask' bits identical? */ --- -2.35.1 - diff --git a/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch b/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch deleted file mode 100644 index 90ce4cf..0000000 --- a/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 0b4e62847c5af1a59eea8d17093feccd550d1c26 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Fri, 10 Jun 2022 10:28:28 +0200 -Subject: [PATCH 29/51] x86/mm: account for PGT_pae_xen_l2 in recently added - assertion - -While PGT_pae_xen_l2 will be zapped once the type refcount of an L2 page -reaches zero, it'll be retained as long as the type refcount is non- -zero. Hence any checking against the requested type needs to either zap -the bit from the type or include it in the used mask. - -Fixes: 9186e96b199e ("x86/pv: Clean up _get_page_type()") -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: c2095ac76be0f4a1940346c9ffb49fb967345060 -master date: 2022-06-10 10:21:06 +0200 ---- - xen/arch/x86/mm.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index bab9624fabb7..c1b9a3bb102a 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2928,7 +2928,8 @@ static int _get_page_type(struct page_info *page, unsigned long type, - * The page is in one of two states (depending on PGT_partial), - * and should have exactly one reference. - */ -- ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1)); -+ ASSERT((x & (PGT_type_mask | PGT_pae_xen_l2 | PGT_count_mask)) == -+ (type | 1)); - - if ( !(x & PGT_partial) ) - { --- -2.35.1 - diff --git a/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch b/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch deleted file mode 100644 index af25b5c..0000000 --- a/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch +++ /dev/null @@ -1,258 +0,0 @@ -From 0e80f9f61168d4e4f008da75762cee0118f802ed Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 13 Jun 2022 16:19:01 +0100 -Subject: [PATCH 30/51] x86/spec-ctrl: Make VERW flushing runtime conditional -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently, VERW flushing to mitigate MDS is boot time conditional per domain -type. However, to provide mitigations for DRPW (CVE-2022-21166), we need to -conditionally use VERW based on the trustworthiness of the guest, and the -devices passed through. - -Remove the PV/HVM alternatives and instead issue a VERW on the return-to-guest -path depending on the SCF_verw bit in cpuinfo spec_ctrl_flags. - -Introduce spec_ctrl_init_domain() and d->arch.verw to calculate the VERW -disposition at domain creation time, and context switch the SCF_verw bit. - -For now, VERW flushing is used and controlled exactly as before, but later -patches will add per-domain cases too. - -No change in behaviour. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -(cherry picked from commit e06b95c1d44ab80da255219fc9f1e2fc423edcb6) ---- - docs/misc/xen-command-line.pandoc | 5 ++--- - xen/arch/x86/domain.c | 12 ++++++++++-- - xen/arch/x86/hvm/vmx/entry.S | 2 +- - xen/arch/x86/spec_ctrl.c | 30 +++++++++++++++++------------ - xen/include/asm-x86/cpufeatures.h | 3 +-- - xen/include/asm-x86/domain.h | 3 +++ - xen/include/asm-x86/spec_ctrl.h | 2 ++ - xen/include/asm-x86/spec_ctrl_asm.h | 16 +++++++++++++-- - 8 files changed, 51 insertions(+), 22 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 1d08fb7e9aa6..d5cb09f86541 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2258,9 +2258,8 @@ in place for guests to use. - Use of a positive boolean value for either of these options is invalid. - - The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine --grained control over the alternative blocks used by Xen. These impact Xen's --ability to protect itself, and Xen's ability to virtualise support for guests --to use. -+grained control over the primitives by Xen. These impact Xen's ability to -+protect itself, and Xen's ability to virtualise support for guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index ef1812dc1402..1fe6644a71ae 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -863,6 +863,8 @@ int arch_domain_create(struct domain *d, - - d->arch.msr_relaxed = config->arch.misc_flags & XEN_X86_MSR_RELAXED; - -+ spec_ctrl_init_domain(d); -+ - return 0; - - fail: -@@ -2017,14 +2019,15 @@ static void __context_switch(void) - void context_switch(struct vcpu *prev, struct vcpu *next) - { - unsigned int cpu = smp_processor_id(); -+ struct cpu_info *info = get_cpu_info(); - const struct domain *prevd = prev->domain, *nextd = next->domain; - unsigned int dirty_cpu = read_atomic(&next->dirty_cpu); - - ASSERT(prev != next); - ASSERT(local_irq_is_enabled()); - -- get_cpu_info()->use_pv_cr3 = false; -- get_cpu_info()->xen_cr3 = 0; -+ info->use_pv_cr3 = false; -+ info->xen_cr3 = 0; - - if ( unlikely(dirty_cpu != cpu) && dirty_cpu != VCPU_CPU_CLEAN ) - { -@@ -2088,6 +2091,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - *last_id = next_id; - } - } -+ -+ /* Update the top-of-stack block with the VERW disposition. */ -+ info->spec_ctrl_flags &= ~SCF_verw; -+ if ( nextd->arch.verw ) -+ info->spec_ctrl_flags |= SCF_verw; - } - - sched_context_switched(prev, next); -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 49651f3c435a..5f5de45a1309 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -87,7 +87,7 @@ UNLIKELY_END(realmode) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ - /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ -- ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM -+ DO_SPEC_CTRL_COND_VERW - - mov VCPU_hvm_guest_cr2(%rbx),%rax - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index c19464da70ce..21730aa03071 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -36,8 +36,8 @@ static bool __initdata opt_msr_sc_pv = true; - static bool __initdata opt_msr_sc_hvm = true; - static int8_t __initdata opt_rsb_pv = -1; - static bool __initdata opt_rsb_hvm = true; --static int8_t __initdata opt_md_clear_pv = -1; --static int8_t __initdata opt_md_clear_hvm = -1; -+static int8_t __read_mostly opt_md_clear_pv = -1; -+static int8_t __read_mostly opt_md_clear_hvm = -1; - - /* Cmdline controls for Xen's speculative settings. */ - static enum ind_thunk { -@@ -932,6 +932,13 @@ static __init void mds_calculations(uint64_t caps) - } - } - -+void spec_ctrl_init_domain(struct domain *d) -+{ -+ bool pv = is_pv_domain(d); -+ -+ d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm; -+} -+ - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; -@@ -1196,21 +1203,20 @@ void __init init_speculation_mitigations(void) - boot_cpu_has(X86_FEATURE_MD_CLEAR)); - - /* -- * Enable MDS defences as applicable. The PV blocks need using all the -- * time, and the Idle blocks need using if either PV or HVM defences are -- * used. -+ * Enable MDS defences as applicable. The Idle blocks need using if -+ * either PV or HVM defences are used. - * - * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with -- * equivelent semantics to avoid needing to perform both flushes on the -- * HVM path. The HVM blocks don't need activating if our hypervisor told -- * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves. -+ * equivalent semantics to avoid needing to perform both flushes on the -+ * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH. -+ * -+ * After calculating the appropriate idle setting, simplify -+ * opt_md_clear_hvm to mean just "should we VERW on the way into HVM -+ * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv ) -- setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV); - if ( opt_md_clear_pv || opt_md_clear_hvm ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush ) -- setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM); -+ opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index ff3157d52d13..bd45a144ee78 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -35,8 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ - XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ --XEN_CPUFEATURE(SC_VERW_PV, X86_SYNTH(23)) /* VERW used by Xen for PV */ --XEN_CPUFEATURE(SC_VERW_HVM, X86_SYNTH(24)) /* VERW used by Xen for HVM */ -+/* Bits 23,24 unused. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ - XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 92d54de0b9a1..2398a1d99da9 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -319,6 +319,9 @@ struct arch_domain - uint32_t pci_cf8; - uint8_t cmos_idx; - -+ /* Use VERW on return-to-guest for its flushing side effect. */ -+ bool verw; -+ - union { - struct pv_domain pv; - struct hvm_domain hvm; -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index f76029523610..751355f471f4 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -24,6 +24,7 @@ - #define SCF_use_shadow (1 << 0) - #define SCF_ist_wrmsr (1 << 1) - #define SCF_ist_rsb (1 << 2) -+#define SCF_verw (1 << 3) - - #ifndef __ASSEMBLY__ - -@@ -32,6 +33,7 @@ - #include <asm/msr-index.h> - - void init_speculation_mitigations(void); -+void spec_ctrl_init_domain(struct domain *d); - - extern bool opt_ibpb; - extern bool opt_ssbd; -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 02b3b18ce69f..5a590bac44aa 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -136,6 +136,19 @@ - #endif - .endm - -+.macro DO_SPEC_CTRL_COND_VERW -+/* -+ * Requires %rsp=cpuinfo -+ * -+ * Issue a VERW for its flushing side effect, if indicated. This is a Spectre -+ * v1 gadget, but the IRET/VMEntry is serialising. -+ */ -+ testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) -+ jz .L\@_verw_skip -+ verw CPUINFO_verw_sel(%rsp) -+.L\@_verw_skip: -+.endm -+ - .macro DO_SPEC_CTRL_ENTRY maybexen:req - /* - * Requires %rsp=regs (also cpuinfo if !maybexen) -@@ -231,8 +244,7 @@ - #define SPEC_CTRL_EXIT_TO_PV \ - ALTERNATIVE "", \ - DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ -- ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ -- X86_FEATURE_SC_VERW_PV -+ DO_SPEC_CTRL_COND_VERW - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. --- -2.35.1 - diff --git a/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch b/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch deleted file mode 100644 index 3b91fb5..0000000 --- a/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch +++ /dev/null @@ -1,98 +0,0 @@ -From a83108736db0ddaa5855f5abda6dcc8ae4fe25e9 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 20 Sep 2021 18:47:49 +0100 -Subject: [PATCH 31/51] x86/spec-ctrl: Enumeration for MMIO Stale Data controls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The three *_NO bits indicate non-susceptibility to the SSDP, FBSDP and PSDP -data movement primitives. - -FB_CLEAR indicates that the VERW instruction has re-gained it's Fill Buffer -flushing side effect. This is only enumerated on parts where VERW had -previously lost it's flushing side effect due to the MDS/TAA vulnerabilities -being fixed in hardware. - -FB_CLEAR_CTRL is available on a subset of FB_CLEAR parts where the Fill Buffer -clearing side effect of VERW can be turned off for performance reasons. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -(cherry picked from commit 2ebe8fe9b7e0d36e9ec3cfe4552b2b197ef0dcec) ---- - xen/arch/x86/spec_ctrl.c | 11 ++++++++--- - xen/include/asm-x86/msr-index.h | 6 ++++++ - 2 files changed, 14 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 21730aa03071..d285538bde9f 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -323,7 +323,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -332,13 +332,16 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", - (caps & ARCH_CAPS_MDS_NO) ? " MDS_NO" : "", - (caps & ARCH_CAPS_TAA_NO) ? " TAA_NO" : "", -+ (caps & ARCH_CAPS_SBDR_SSDP_NO) ? " SBDR_SSDP_NO" : "", -+ (caps & ARCH_CAPS_FBSDP_NO) ? " FBSDP_NO" : "", -+ (caps & ARCH_CAPS_PSDP_NO) ? " PSDP_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -353,7 +356,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "", - (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "", - (e8b & cpufeat_mask(X86_FEATURE_VIRT_SSBD)) ? " VIRT_SSBD" : "", -- (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : ""); -+ (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", -+ (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", -+ (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : ""); - - /* Compiled-in support which pertains to mitigations. */ - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 31964b88af7a..72bc32ba04ff 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -66,6 +66,11 @@ - #define ARCH_CAPS_IF_PSCHANGE_MC_NO (_AC(1, ULL) << 6) - #define ARCH_CAPS_TSX_CTRL (_AC(1, ULL) << 7) - #define ARCH_CAPS_TAA_NO (_AC(1, ULL) << 8) -+#define ARCH_CAPS_SBDR_SSDP_NO (_AC(1, ULL) << 13) -+#define ARCH_CAPS_FBSDP_NO (_AC(1, ULL) << 14) -+#define ARCH_CAPS_PSDP_NO (_AC(1, ULL) << 15) -+#define ARCH_CAPS_FB_CLEAR (_AC(1, ULL) << 17) -+#define ARCH_CAPS_FB_CLEAR_CTRL (_AC(1, ULL) << 18) - - #define MSR_FLUSH_CMD 0x0000010b - #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) -@@ -83,6 +88,7 @@ - #define MCU_OPT_CTRL_RNGDS_MITG_DIS (_AC(1, ULL) << 0) - #define MCU_OPT_CTRL_RTM_ALLOW (_AC(1, ULL) << 1) - #define MCU_OPT_CTRL_RTM_LOCKED (_AC(1, ULL) << 2) -+#define MCU_OPT_CTRL_FB_CLEAR_DIS (_AC(1, ULL) << 3) - - #define MSR_RTIT_OUTPUT_BASE 0x00000560 - #define MSR_RTIT_OUTPUT_MASK 0x00000561 --- -2.35.1 - diff --git a/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch b/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch deleted file mode 100644 index c63891a..0000000 --- a/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch +++ /dev/null @@ -1,187 +0,0 @@ -From 2e82446cb252f6c8ac697e81f4155872c69afde4 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 13 Jun 2022 19:18:32 +0100 -Subject: [PATCH 32/51] x86/spec-ctrl: Add spec-ctrl=unpriv-mmio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Per Xen's support statement, PCI passthrough should be to trusted domains -because the overall system security depends on factors outside of Xen's -control. - -As such, Xen, in a supported configuration, is not vulnerable to DRPW/SBDR. - -However, users who have risk assessed their configuration may be happy with -the risk of DoS, but unhappy with the risk of cross-domain data leakage. Such -users should enable this option. - -On CPUs vulnerable to MDS, the existing mitigations are the best we can do to -mitigate MMIO cross-domain data leakage. - -On CPUs fixed to MDS but vulnerable MMIO stale data leakage, this option: - - * On CPUs susceptible to FBSDP, mitigates cross-domain fill buffer leakage - using FB_CLEAR. - * On CPUs susceptible to SBDR, mitigates RNG data recovery by engaging the - srb-lock, previously used to mitigate SRBDS. - -Both mitigations require microcode from IPU 2022.1, May 2022. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -(cherry picked from commit 8c24b70fedcb52633b2370f834d8a2be3f7fa38e) ---- - docs/misc/xen-command-line.pandoc | 14 +++++++-- - xen/arch/x86/spec_ctrl.c | 48 ++++++++++++++++++++++++------- - 2 files changed, 48 insertions(+), 14 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index d5cb09f86541..a642e43476a2 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2235,7 +2235,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - ### spec-ctrl (x86) - > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, --> l1d-flush,branch-harden,srb-lock}=<bool> ]` -+> l1d-flush,branch-harden,srb-lock,unpriv-mmio}=<bool> ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2314,8 +2314,16 @@ Xen will enable this mitigation. - On hardware supporting SRBDS_CTRL, the `srb-lock=` option can be used to force - or prevent Xen from protect the Special Register Buffer from leaking stale - data. By default, Xen will enable this mitigation, except on parts where MDS --is fixed and TAA is fixed/mitigated (in which case, there is believed to be no --way for an attacker to obtain the stale data). -+is fixed and TAA is fixed/mitigated and there are no unprivileged MMIO -+mappings (in which case, there is believed to be no way for an attacker to -+obtain stale data). -+ -+The `unpriv-mmio=` boolean indicates whether the system has (or will have) -+less than fully privileged domains granted access to MMIO devices. By -+default, this option is disabled. If enabled, Xen will use the `FB_CLEAR` -+and/or `SRBDS_CTRL` functionality available in the Intel May 2022 microcode -+release to mitigate cross-domain leakage of data via the MMIO Stale Data -+vulnerabilities. - - ### sync_console - > `= <boolean>` -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index d285538bde9f..099113ba41e6 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -67,6 +67,8 @@ static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ - static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */ - - static int8_t __initdata opt_srb_lock = -1; -+static bool __initdata opt_unpriv_mmio; -+static bool __read_mostly opt_fb_clear_mmio; - - static int __init parse_spec_ctrl(const char *s) - { -@@ -184,6 +186,8 @@ static int __init parse_spec_ctrl(const char *s) - opt_branch_harden = val; - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; -+ else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) -+ opt_unpriv_mmio = val; - else - rc = -EINVAL; - -@@ -392,7 +396,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb ? " IBPB" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -- opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : "", -+ opt_md_clear_pv || opt_md_clear_hvm || -+ opt_fb_clear_mmio ? " VERW" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ -@@ -941,7 +946,9 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm; -+ d->arch.verw = -+ (pv ? opt_md_clear_pv : opt_md_clear_hvm) || -+ (opt_fb_clear_mmio && is_iommu_enabled(d)); - } - - void __init init_speculation_mitigations(void) -@@ -1195,6 +1202,18 @@ void __init init_speculation_mitigations(void) - - mds_calculations(caps); - -+ /* -+ * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -+ * reintroduced the VERW fill buffer flushing side effect because of a -+ * susceptibility to FBSDP. -+ * -+ * If unprivileged guests have (or will have) MMIO mappings, we can -+ * mitigate cross-domain leakage of fill buffer data by issuing VERW on -+ * the return-to-guest path. -+ */ -+ if ( opt_unpriv_mmio ) -+ opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR; -+ - /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. - * This will only be a token effort for MLPDS/MFBDS when HT is enabled, -@@ -1208,18 +1227,20 @@ void __init init_speculation_mitigations(void) - boot_cpu_has(X86_FEATURE_MD_CLEAR)); - - /* -- * Enable MDS defences as applicable. The Idle blocks need using if -- * either PV or HVM defences are used. -+ * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -+ * either the PV or HVM MDS defences are used, or if we may give MMIO -+ * access to untrusted guests. - * - * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with - * equivalent semantics to avoid needing to perform both flushes on the -- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH. -+ * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for -+ * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) - * - * After calculating the appropriate idle setting, simplify - * opt_md_clear_hvm to mean just "should we VERW on the way into HVM - * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv || opt_md_clear_hvm ) -+ if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); - opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; - -@@ -1284,14 +1305,19 @@ void __init init_speculation_mitigations(void) - * On some SRBDS-affected hardware, it may be safe to relax srb-lock by - * default. - * -- * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only known -- * way to access the Fill Buffer. If TSX isn't available (inc. SKU -- * reasons on some models), or TSX is explicitly disabled, then there is -- * no need for the extra overhead to protect RDRAND/RDSEED. -+ * All parts with SRBDS_CTRL suffer SSDP, the mechanism by which stale RNG -+ * data becomes available to other contexts. To recover the data, an -+ * attacker needs to use: -+ * - SBDS (MDS or TAA to sample the cores fill buffer) -+ * - SBDR (Architecturally retrieve stale transaction buffer contents) -+ * - DRPW (Architecturally latch stale fill buffer data) -+ * -+ * On MDS_NO parts, and with TAA_NO or TSX unavailable/disabled, and there -+ * is no unprivileged MMIO access, the RNG data doesn't need protecting. - */ - if ( cpu_has_srbds_ctrl ) - { -- if ( opt_srb_lock == -1 && -+ if ( opt_srb_lock == -1 && !opt_unpriv_mmio && - (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && - (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) - opt_srb_lock = 0; --- -2.35.1 - diff --git a/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch b/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch deleted file mode 100644 index 07f488d..0000000 --- a/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 460b08d6c6c16b3f32aa138e772b759ae02a4479 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 12 Jul 2022 11:10:34 +0200 -Subject: [PATCH 33/51] IOMMU/x86: work around bogus gcc12 warning in - hvm_gsi_eoi() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -As per [1] the expansion of the pirq_dpci() macro causes a -Waddress -controlled warning (enabled implicitly in our builds, if not by default) -tying the middle part of the involved conditional expression to the -surrounding boolean context. Work around this by introducing a local -inline function in the affected source file. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Roger Pau Monné <roger.pau@citrix.com> - -[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102967 -master commit: 80ad8db8a4d9bb24952f0aea788ce6f47566fa76 -master date: 2022-06-15 10:19:32 +0200 ---- - xen/drivers/passthrough/x86/hvm.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/xen/drivers/passthrough/x86/hvm.c b/xen/drivers/passthrough/x86/hvm.c -index 0b37cd145b60..ba0f6c53d742 100644 ---- a/xen/drivers/passthrough/x86/hvm.c -+++ b/xen/drivers/passthrough/x86/hvm.c -@@ -25,6 +25,18 @@ - #include <asm/hvm/support.h> - #include <asm/io_apic.h> - -+/* -+ * Gcc12 takes issue with pirq_dpci() being used in boolean context (see gcc -+ * bug 102967). While we can't replace the macro definition in the header by an -+ * inline function, we can do so here. -+ */ -+static inline struct hvm_pirq_dpci *_pirq_dpci(struct pirq *pirq) -+{ -+ return pirq_dpci(pirq); -+} -+#undef pirq_dpci -+#define pirq_dpci(pirq) _pirq_dpci(pirq) -+ - static DEFINE_PER_CPU(struct list_head, dpci_list); - - /* --- -2.35.1 - diff --git a/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch b/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch deleted file mode 100644 index ac71ab8..0000000 --- a/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 5cb8142076ce1ce53eafd7e00acb4d0eac4e7784 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - <marmarek@invisiblethingslab.com> -Date: Tue, 12 Jul 2022 11:11:35 +0200 -Subject: [PATCH 34/51] ehci-dbgp: fix selecting n-th ehci controller -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The ehci<n> number was parsed but ignored. - -Fixes: 322ecbe4ac85 ("console: add EHCI debug port based serial console") -Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: d6d0cb659fda64430d4649f8680c5cead32da8fd -master date: 2022-06-16 14:23:37 +0100 ---- - xen/drivers/char/ehci-dbgp.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/drivers/char/ehci-dbgp.c b/xen/drivers/char/ehci-dbgp.c -index c893d246defa..66b4811af24a 100644 ---- a/xen/drivers/char/ehci-dbgp.c -+++ b/xen/drivers/char/ehci-dbgp.c -@@ -1478,7 +1478,7 @@ void __init ehci_dbgp_init(void) - unsigned int num = 0; - - if ( opt_dbgp[4] ) -- simple_strtoul(opt_dbgp + 4, &e, 10); -+ num = simple_strtoul(opt_dbgp + 4, &e, 10); - - dbgp->cap = find_dbgp(dbgp, num); - if ( !dbgp->cap ) --- -2.35.1 - diff --git a/0035-tools-xenstored-Harden-corrupt.patch b/0035-tools-xenstored-Harden-corrupt.patch deleted file mode 100644 index bb0f7f1..0000000 --- a/0035-tools-xenstored-Harden-corrupt.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 81ee3d08351be1ef2a14d371993604098d6a4673 Mon Sep 17 00:00:00 2001 -From: Julien Grall <jgrall@amazon.com> -Date: Tue, 12 Jul 2022 11:12:13 +0200 -Subject: [PATCH 35/51] tools/xenstored: Harden corrupt() - -At the moment, corrupt() is neither checking for allocation failure -nor freeing the allocated memory. - -Harden the code by printing ENOMEM if the allocation failed and -free 'str' after the last use. - -This is not considered to be a security issue because corrupt() should -only be called when Xenstored thinks the database is corrupted. Note -that the trigger (i.e. a guest reliably provoking the call) would be -a security issue. - -Fixes: 06d17943f0cd ("Added a basic integrity checker, and some basic ability to recover from store") -Signed-off-by: Julien Grall <jgrall@amazon.com> -Reviewed-by: Juergen Gross <jgross@suse.com> -master commit: db3382dd4f468c763512d6bf91c96773395058fb -master date: 2022-06-23 13:44:10 +0100 ---- - tools/xenstore/xenstored_core.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c -index 91d093a12ea6..0c8ee276f837 100644 ---- a/tools/xenstore/xenstored_core.c -+++ b/tools/xenstore/xenstored_core.c -@@ -2087,7 +2087,10 @@ void corrupt(struct connection *conn, const char *fmt, ...) - va_end(arglist); - - log("corruption detected by connection %i: err %s: %s", -- conn ? (int)conn->id : -1, strerror(saved_errno), str); -+ conn ? (int)conn->id : -1, strerror(saved_errno), -+ str ?: "ENOMEM"); -+ -+ talloc_free(str); - - check_store(); - } --- -2.35.1 - diff --git a/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch b/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch deleted file mode 100644 index 8bc0768..0000000 --- a/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 09d533f4c80b7eaf9fb4e36ebba8259580857a9d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 12 Jul 2022 11:12:46 +0200 -Subject: [PATCH 36/51] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with - legacy IBRS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Back at the time of the original Spectre-v2 fixes, it was recommended to clear -MSR_SPEC_CTRL when going idle. This is because of the side effects on the -sibling thread caused by the microcode IBRS and STIBP implementations which -were retrofitted to existing CPUs. - -However, there are no relevant cross-thread impacts for the hardware -IBRS/STIBP implementations, so this logic should not be used on Intel CPUs -supporting eIBRS, or any AMD CPUs; doing so only adds unnecessary latency to -the idle path. - -Furthermore, there's no point playing with MSR_SPEC_CTRL in the idle paths if -SMT is disabled for other reasons. - -Fixes: 8d03080d2a33 ("x86/spec-ctrl: Cease using thunk=lfence on AMD") -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: ffc7694e0c99eea158c32aa164b7d1e1bb1dc46b -master date: 2022-06-30 18:07:13 +0100 ---- - xen/arch/x86/spec_ctrl.c | 10 ++++++++-- - xen/include/asm-x86/cpufeatures.h | 2 +- - xen/include/asm-x86/spec_ctrl.h | 5 +++-- - 3 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 099113ba41e6..1ed5ceda8b46 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1150,8 +1150,14 @@ void __init init_speculation_mitigations(void) - /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ - init_shadow_spec_ctrl_state(); - -- /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */ -- if ( default_xen_spec_ctrl ) -+ /* -+ * For microcoded IBRS only (i.e. Intel, pre eIBRS), it is recommended to -+ * clear MSR_SPEC_CTRL before going idle, to avoid impacting sibling -+ * threads. Activate this if SMT is enabled, and Xen is using a non-zero -+ * MSR_SPEC_CTRL setting. -+ */ -+ if ( boot_cpu_has(X86_FEATURE_IBRSB) && !(caps & ARCH_CAPS_IBRS_ALL) && -+ hw_smt_enabled && default_xen_spec_ctrl ) - setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); - - xpti_init_default(caps); -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index bd45a144ee78..493d338a085e 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -33,7 +33,7 @@ XEN_CPUFEATURE(SC_MSR_HVM, X86_SYNTH(17)) /* MSR_SPEC_CTRL used by Xen fo - XEN_CPUFEATURE(SC_RSB_PV, X86_SYNTH(18)) /* RSB overwrite needed for PV */ - XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM */ - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ --XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ -+XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ - /* Bits 23,24 unused. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 751355f471f4..7e83e0179fb9 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -78,7 +78,8 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) - uint32_t val = 0; - - /* -- * Branch Target Injection: -+ * It is recommended in some cases to clear MSR_SPEC_CTRL when going idle, -+ * to avoid impacting sibling threads. - * - * Latch the new shadow value, then enable shadowing, then update the MSR. - * There are no SMP issues here; only local processor ordering concerns. -@@ -114,7 +115,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) - uint32_t val = info->xen_spec_ctrl; - - /* -- * Branch Target Injection: -+ * Restore MSR_SPEC_CTRL on exit from idle. - * - * Disable shadowing before updating the MSR. There are no SMP issues - * here; only local processor ordering concerns. --- -2.35.1 - diff --git a/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch b/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch deleted file mode 100644 index 156aa58..0000000 --- a/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch +++ /dev/null @@ -1,234 +0,0 @@ -From db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 12 Jul 2022 11:13:33 +0200 -Subject: [PATCH 37/51] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow - hardware STIBP hint -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -STIBP and PSFD are slightly weird bits, because they're both implied by other -bits in MSR_SPEC_CTRL. Add fine grain controls for them, and take the -implications into account when setting IBRS/SSBD. - -Rearrange the IBPB text/variables/logic to keep all the MSR_SPEC_CTRL bits -together, for consistency. - -However, AMD have a hardware hint CPUID bit recommending that STIBP be set -unilaterally. This is advertised on Zen3, so follow the recommendation. -Furthermore, in such cases, set STIBP behind the guest's back for now. This -has negligible overhead for the guest, but saves a WRMSR on vmentry. This is -the only default change. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: fef244b179c06fcdfa581f7d57fa6e578c49ff50 -master date: 2022-06-30 18:07:13 +0100 ---- - docs/misc/xen-command-line.pandoc | 21 +++++++--- - xen/arch/x86/hvm/svm/vmcb.c | 9 +++++ - xen/arch/x86/spec_ctrl.c | 67 ++++++++++++++++++++++++++----- - 3 files changed, 82 insertions(+), 15 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index a642e43476a2..46e9c58d35cd 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2234,8 +2234,9 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>, --> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, --> l1d-flush,branch-harden,srb-lock,unpriv-mmio}=<bool> ]` -+> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, -+> eager-fpu,l1d-flush,branch-harden,srb-lock, -+> unpriv-mmio}=<bool> ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2285,9 +2286,10 @@ On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - If Xen is not using IBRS itself, functionality is still set up so IBRS can be - virtualised for guests. - --On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` --option can be used to force (the default) or prevent Xen from issuing branch --prediction barriers on vcpu context switches. -+On hardware supporting STIBP (Single Thread Indirect Branch Predictors), the -+`stibp=` option can be used to force or prevent Xen using the feature itself. -+By default, Xen will use STIBP when IBRS is in use (IBRS implies STIBP), and -+when hardware hints recommend using it as a blanket setting. - - On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=` - option can be used to force or prevent Xen using the feature itself. On AMD -@@ -2295,6 +2297,15 @@ hardware, this is a global option applied at boot, and not virtualised for - guest use. On Intel hardware, the feature is virtualised for guests, - independently of Xen's choice of setting. - -+On hardware supporting PSFD (Predictive Store Forwarding Disable), the `psfd=` -+option can be used to force or prevent Xen using the feature itself. By -+default, Xen will not use PSFD. PSFD is implied by SSBD, and SSBD is off by -+default. -+ -+On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` -+option can be used to force (the default) or prevent Xen from issuing branch -+prediction barriers on vcpu context switches. -+ - On all hardware, the `eager-fpu=` option can be used to force or prevent Xen - from using fully eager FPU context switches. This is currently implemented as - a global control. By default, Xen will choose to use fully eager context -diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c -index 565e997155f2..ef7224eb5dd7 100644 ---- a/xen/arch/x86/hvm/svm/vmcb.c -+++ b/xen/arch/x86/hvm/svm/vmcb.c -@@ -29,6 +29,7 @@ - #include <asm/hvm/support.h> - #include <asm/hvm/svm/svm.h> - #include <asm/hvm/svm/svmdebug.h> -+#include <asm/spec_ctrl.h> - - struct vmcb_struct *alloc_vmcb(void) - { -@@ -176,6 +177,14 @@ static int construct_vmcb(struct vcpu *v) - vmcb->_pause_filter_thresh = SVM_PAUSETHRESH_INIT; - } - -+ /* -+ * When default_xen_spec_ctrl simply SPEC_CTRL_STIBP, default this behind -+ * the back of the VM too. Our SMT topology isn't accurate, the overhead -+ * is neglegable, and doing this saves a WRMSR on the vmentry path. -+ */ -+ if ( default_xen_spec_ctrl == SPEC_CTRL_STIBP ) -+ v->arch.msrs->spec_ctrl.raw = SPEC_CTRL_STIBP; -+ - return 0; - } - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 1ed5ceda8b46..dfdd45c358c4 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -48,9 +48,13 @@ static enum ind_thunk { - THUNK_LFENCE, - THUNK_JMP, - } opt_thunk __initdata = THUNK_DEFAULT; -+ - static int8_t __initdata opt_ibrs = -1; -+int8_t __initdata opt_stibp = -1; -+bool __read_mostly opt_ssbd; -+int8_t __initdata opt_psfd = -1; -+ - bool __read_mostly opt_ibpb = true; --bool __read_mostly opt_ssbd = false; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -172,12 +176,20 @@ static int __init parse_spec_ctrl(const char *s) - else - rc = -EINVAL; - } -+ -+ /* Bits in MSR_SPEC_CTRL. */ - else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) - opt_ibrs = val; -- else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -- opt_ibpb = val; -+ else if ( (val = parse_boolean("stibp", s, ss)) >= 0 ) -+ opt_stibp = val; - else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 ) - opt_ssbd = val; -+ else if ( (val = parse_boolean("psfd", s, ss)) >= 0 ) -+ opt_psfd = val; -+ -+ /* Misc settings. */ -+ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -+ opt_ibpb = val; - else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 ) - opt_eager_fpu = val; - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) -@@ -376,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s, Other:%s%s%s%s%s\n", -+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : -@@ -390,6 +402,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (!boot_cpu_has(X86_FEATURE_SSBD) && - !boot_cpu_has(X86_FEATURE_AMD_SSBD)) ? "" : - (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", -+ (!boot_cpu_has(X86_FEATURE_PSFD) && -+ !boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ? "" : -+ (default_xen_spec_ctrl & SPEC_CTRL_PSFD) ? " PSFD+" : " PSFD-", - !(caps & ARCH_CAPS_TSX_CTRL) ? "" : - (opt_tsx & 1) ? " TSX+" : " TSX-", - !cpu_has_srbds_ctrl ? "" : -@@ -979,10 +994,7 @@ void __init init_speculation_mitigations(void) - if ( !has_spec_ctrl ) - printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n"); - else if ( opt_ibrs == -1 ) -- { - opt_ibrs = ibrs = true; -- default_xen_spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_STIBP; -- } - - if ( opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE ) - thunk = THUNK_JMP; -@@ -1086,14 +1098,49 @@ void __init init_speculation_mitigations(void) - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - -- /* If we have IBRS available, see whether we should use it. */ -+ /* Figure out default_xen_spec_ctrl. */ - if ( has_spec_ctrl && ibrs ) -- default_xen_spec_ctrl |= SPEC_CTRL_IBRS; -+ { -+ /* IBRS implies STIBP. */ -+ if ( opt_stibp == -1 ) -+ opt_stibp = 1; -+ -+ default_xen_spec_ctrl |= SPEC_CTRL_IBRS; -+ } -+ -+ /* -+ * Use STIBP by default if the hardware hint is set. Otherwise, leave it -+ * off as it a severe performance pentalty on pre-eIBRS Intel hardware -+ * where it was retrofitted in microcode. -+ */ -+ if ( opt_stibp == -1 ) -+ opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS); -+ -+ if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) || -+ boot_cpu_has(X86_FEATURE_AMD_STIBP)) ) -+ default_xen_spec_ctrl |= SPEC_CTRL_STIBP; - -- /* If we have SSBD available, see whether we should use it. */ - if ( opt_ssbd && (boot_cpu_has(X86_FEATURE_SSBD) || - boot_cpu_has(X86_FEATURE_AMD_SSBD)) ) -+ { -+ /* SSBD implies PSFD */ -+ if ( opt_psfd == -1 ) -+ opt_psfd = 1; -+ - default_xen_spec_ctrl |= SPEC_CTRL_SSBD; -+ } -+ -+ /* -+ * Don't use PSFD by default. AMD designed the predictor to -+ * auto-clear on privilege change. PSFD is implied by SSBD, which is -+ * off by default. -+ */ -+ if ( opt_psfd == -1 ) -+ opt_psfd = 0; -+ -+ if ( opt_psfd && (boot_cpu_has(X86_FEATURE_PSFD) || -+ boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ) -+ default_xen_spec_ctrl |= SPEC_CTRL_PSFD; - - /* - * PV guests can create RSB entries for any linear address they control, --- -2.35.1 - diff --git a/0038-libxc-fix-compilation-error-with-gcc13.patch b/0038-libxc-fix-compilation-error-with-gcc13.patch deleted file mode 100644 index 8056742..0000000 --- a/0038-libxc-fix-compilation-error-with-gcc13.patch +++ /dev/null @@ -1,33 +0,0 @@ -From cd3d6b4cd46cd05590805b4a6c0b6654af60106e Mon Sep 17 00:00:00 2001 -From: Charles Arnold <carnold@suse.com> -Date: Tue, 12 Jul 2022 11:14:07 +0200 -Subject: [PATCH 38/51] libxc: fix compilation error with gcc13 - -xc_psr.c:161:5: error: conflicting types for 'xc_psr_cmt_get_data' -due to enum/integer mismatch; - -Signed-off-by: Charles Arnold <carnold@suse.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Anthony PERARD <anthony.perard@citrix.com> -master commit: 8eeae8c2b4efefda8e946461e86cf2ae9c18e5a9 -master date: 2022-07-06 13:06:40 +0200 ---- - tools/include/xenctrl.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h -index 07b96e6671a5..893ae39e4a95 100644 ---- a/tools/include/xenctrl.h -+++ b/tools/include/xenctrl.h -@@ -2516,7 +2516,7 @@ int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask); - int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu, - uint32_t *l3_cache_size); - int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu, -- uint32_t psr_cmt_type, uint64_t *monitor_data, -+ xc_psr_cmt_type type, uint64_t *monitor_data, - uint64_t *tsc); - int xc_psr_cmt_enabled(xc_interface *xch); - --- -2.35.1 - diff --git a/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch b/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch deleted file mode 100644 index 1797a8f..0000000 --- a/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 61b9c2ceeb94b0cdaff01023cc5523b1f13e66e2 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 12 Jul 2022 11:14:34 +0200 -Subject: [PATCH 39/51] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio - sub-option - -This was an oversight from when unpriv-mmio was introduced. - -Fixes: 8c24b70fedcb ("x86/spec-ctrl: Add spec-ctrl=unpriv-mmio") -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 4cdb519d797c19ebb8fadc5938cdb47479d5a21b -master date: 2022-07-11 15:21:35 +0100 ---- - xen/arch/x86/spec_ctrl.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index dfdd45c358c4..ae74943c1053 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -122,6 +122,7 @@ static int __init parse_spec_ctrl(const char *s) - opt_l1d_flush = 0; - opt_branch_harden = false; - opt_srb_lock = 0; -+ opt_unpriv_mmio = false; - } - else if ( val > 0 ) - rc = -EINVAL; --- -2.35.1 - diff --git a/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch b/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch deleted file mode 100644 index 3512590..0000000 --- a/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch +++ /dev/null @@ -1,87 +0,0 @@ -From eec5b02403a9df2523527caad24f17af5060fbe7 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 12 Jul 2022 11:15:03 +0200 -Subject: [PATCH 40/51] xen/cmdline: Extend parse_boolean() to signal a name - match - -This will help parsing a sub-option which has boolean and non-boolean options -available. - -First, rework 'int val' into 'bool has_neg_prefix'. This inverts it's value, -but the resulting logic is far easier to follow. - -Second, reject anything of the form 'no-$FOO=' which excludes ambiguous -constructs such as 'no-$foo=yes' which have never been valid. - -This just leaves the case where everything is otherwise fine, but parse_bool() -can't interpret the provided string. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 382326cac528dd1eb0d04efd5c05363c453e29f4 -master date: 2022-07-11 15:21:35 +0100 ---- - xen/common/kernel.c | 20 ++++++++++++++++---- - xen/include/xen/lib.h | 3 ++- - 2 files changed, 18 insertions(+), 5 deletions(-) - -diff --git a/xen/common/kernel.c b/xen/common/kernel.c -index e119e5401f9d..7ed96521f97a 100644 ---- a/xen/common/kernel.c -+++ b/xen/common/kernel.c -@@ -272,9 +272,9 @@ int parse_bool(const char *s, const char *e) - int parse_boolean(const char *name, const char *s, const char *e) - { - size_t slen, nlen; -- int val = !!strncmp(s, "no-", 3); -+ bool has_neg_prefix = !strncmp(s, "no-", 3); - -- if ( !val ) -+ if ( has_neg_prefix ) - s += 3; - - slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); -@@ -286,11 +286,23 @@ int parse_boolean(const char *name, const char *s, const char *e) - - /* Exact, unadorned name? Result depends on the 'no-' prefix. */ - if ( slen == nlen ) -- return val; -+ return !has_neg_prefix; -+ -+ /* Inexact match with a 'no-' prefix? Not valid. */ -+ if ( has_neg_prefix ) -+ return -1; - - /* =$SOMETHING? Defer to the regular boolean parsing. */ - if ( s[nlen] == '=' ) -- return parse_bool(&s[nlen + 1], e); -+ { -+ int b = parse_bool(&s[nlen + 1], e); -+ -+ if ( b >= 0 ) -+ return b; -+ -+ /* Not a boolean, but the name matched. Signal specially. */ -+ return -2; -+ } - - /* Unrecognised. Give up. */ - return -1; -diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h -index c6987973bf88..2296044caf79 100644 ---- a/xen/include/xen/lib.h -+++ b/xen/include/xen/lib.h -@@ -80,7 +80,8 @@ int parse_bool(const char *s, const char *e); - /** - * Given a specific name, parses a string of the form: - * [no-]$NAME[=...] -- * returning 0 or 1 for a recognised boolean, or -1 for an error. -+ * returning 0 or 1 for a recognised boolean. Returns -1 for general errors, -+ * and -2 for "not a boolean, but $NAME= matches". - */ - int parse_boolean(const char *name, const char *s, const char *e); - --- -2.35.1 - diff --git a/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch b/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch deleted file mode 100644 index 9964bb9..0000000 --- a/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch +++ /dev/null @@ -1,137 +0,0 @@ -From f066c8bb3e5686141cef6fa1dc86ea9f37c5388a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 12 Jul 2022 11:15:37 +0200 -Subject: [PATCH 41/51] x86/spec-ctrl: Add fine-grained cmdline suboptions for - primitives - -Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which -previously wasn't possible. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 27357c394ba6e1571a89105b840ce1c6f026485c -master date: 2022-07-11 15:21:35 +0100 ---- - docs/misc/xen-command-line.pandoc | 12 ++++-- - xen/arch/x86/spec_ctrl.c | 66 ++++++++++++++++++++++++++----- - 2 files changed, 66 insertions(+), 12 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 46e9c58d35cd..1bbdb55129cc 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2233,7 +2233,8 @@ not be able to control the state of the mitigation. - By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) --> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>, -+> `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>, -+> {msr-sc,rsb,md-clear}=<bool>|{pv,hvm}=<bool>, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio}=<bool> ]` -@@ -2258,12 +2259,17 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine - grained control over the primitives by Xen. These impact Xen's ability to --protect itself, and Xen's ability to virtualise support for guests to use. -+protect itself, and/or Xen's ability to virtualise support for guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -+* Each other option can be used either as a plain boolean -+ (e.g. `spec-ctrl=rsb` to control both the PV and HVM sub-options), or with -+ `pv=` or `hvm=` subsuboptions (e.g. `spec-ctrl=rsb=no-hvm` to disable HVM -+ RSB only). -+ - * `msr-sc=` offers control over Xen's support for manipulating `MSR_SPEC_CTRL` - on entry and exit. These blocks are necessary to virtualise support for - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index ae74943c1053..9507e5da60a9 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -147,20 +147,68 @@ static int __init parse_spec_ctrl(const char *s) - opt_rsb_hvm = val; - opt_md_clear_hvm = val; - } -- else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) - { -- opt_msr_sc_pv = val; -- opt_msr_sc_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_msr_sc_pv = opt_msr_sc_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("msr-sc="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_msr_sc_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_msr_sc_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } -- else if ( (val = parse_boolean("rsb", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("rsb", s, ss)) != -1 ) - { -- opt_rsb_pv = val; -- opt_rsb_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_rsb_pv = opt_rsb_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("rsb="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_rsb_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_rsb_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } -- else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) - { -- opt_md_clear_pv = val; -- opt_md_clear_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_md_clear_pv = opt_md_clear_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("md-clear="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_md_clear_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_md_clear_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } - - /* Xen's speculative sidechannel mitigation settings. */ --- -2.35.1 - diff --git a/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch b/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch deleted file mode 100644 index eea790a..0000000 --- a/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 14fd97e3de939a63a6e467f240efb49fe226a5dc Mon Sep 17 00:00:00 2001 -From: Anthony PERARD <anthony.perard@citrix.com> -Date: Tue, 12 Jul 2022 11:16:10 +0200 -Subject: [PATCH 42/51] tools/helpers: fix build of xen-init-dom0 with -Werror - -Missing prototype of asprintf() without _GNU_SOURCE. - -Signed-off-by: Anthony PERARD <anthony.perard@citrix.com> -Reviewed-by: Henry Wang <Henry.Wang@arm.com> -master commit: d693b22733044d68e9974766b5c9e6259c9b1708 -master date: 2022-07-12 08:38:35 +0200 ---- - tools/helpers/xen-init-dom0.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/helpers/xen-init-dom0.c b/tools/helpers/xen-init-dom0.c -index c99224a4b607..b4861c9e8041 100644 ---- a/tools/helpers/xen-init-dom0.c -+++ b/tools/helpers/xen-init-dom0.c -@@ -1,3 +1,5 @@ -+#define _GNU_SOURCE -+ - #include <stdlib.h> - #include <stdint.h> - #include <string.h> --- -2.35.1 - diff --git a/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch b/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch deleted file mode 100644 index 0c2470a..0000000 --- a/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 744accad1b73223b3261e3e678e16e030d83b179 Mon Sep 17 00:00:00 2001 -From: Anthony PERARD <anthony.perard@citrix.com> -Date: Tue, 12 Jul 2022 11:16:30 +0200 -Subject: [PATCH 43/51] libxl: check return value of libxl__xs_directory in - name2bdf - -libxl__xs_directory() can potentially return NULL without setting `n`. -As `n` isn't initialised, we need to check libxl__xs_directory() -return value before checking `n`. Otherwise, `n` might be non-zero -with `bdfs` NULL which would lead to a segv. - -Fixes: 57bff091f4 ("libxl: add 'name' field to 'libxl_device_pci' in the IDL...") -Reported-by: "G.R." <firemeteor@users.sourceforge.net> -Signed-off-by: Anthony PERARD <anthony.perard@citrix.com> -Reviewed-by: Juergen Gross <jgross@suse.com> -Tested-by: "G.R." <firemeteor@users.sourceforge.net> -master commit: d778089ac70e5b8e3bdea0c85fc8c0b9ed0eaf2f -master date: 2022-07-12 08:38:51 +0200 ---- - tools/libs/light/libxl_pci.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libs/light/libxl_pci.c b/tools/libs/light/libxl_pci.c -index 4bbbfe9f168f..ce3bf7c0ae81 100644 ---- a/tools/libs/light/libxl_pci.c -+++ b/tools/libs/light/libxl_pci.c -@@ -859,7 +859,7 @@ static int name2bdf(libxl__gc *gc, libxl_device_pci *pci) - int rc = ERROR_NOTFOUND; - - bdfs = libxl__xs_directory(gc, XBT_NULL, PCI_INFO_PATH, &n); -- if (!n) -+ if (!bdfs || !n) - goto out; - - for (i = 0; i < n; i++) { --- -2.35.1 - diff --git a/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch b/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch deleted file mode 100644 index d8517f8..0000000 --- a/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 3a280cbae7022b83af91c27a8e2211ba3b1234f5 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 1 Jul 2022 15:59:40 +0100 -Subject: [PATCH 44/51] x86/spec-ctrl: Rework spec_ctrl_flags context switching - -We are shortly going to need to context switch new bits in both the vcpu and -S3 paths. Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw -into d->arch.spec_ctrl_flags to accommodate. - -No functional change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 5796912f7279d9348a3166655588d30eae9f72cc) ---- - xen/arch/x86/acpi/power.c | 8 ++++---- - xen/arch/x86/domain.c | 8 ++++---- - xen/arch/x86/spec_ctrl.c | 9 ++++++--- - xen/include/asm-x86/domain.h | 3 +-- - xen/include/asm-x86/spec_ctrl.h | 30 ++++++++++++++++++++++++++++- - xen/include/asm-x86/spec_ctrl_asm.h | 3 --- - 6 files changed, 44 insertions(+), 17 deletions(-) - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index 5eaa77f66a28..dd397f713067 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -248,8 +248,8 @@ static int enter_state(u32 state) - error = 0; - - ci = get_cpu_info(); -- /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */ -- ci->spec_ctrl_flags &= ~SCF_ist_wrmsr; -+ /* Avoid NMI/#MC using unsafe MSRs until we've reloaded microcode. */ -+ ci->spec_ctrl_flags &= ~SCF_IST_MASK; - - ACPI_FLUSH_CPU_CACHE(); - -@@ -292,8 +292,8 @@ static int enter_state(u32 state) - if ( !recheck_cpu_features(0) ) - panic("Missing previously available feature(s)\n"); - -- /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */ -- ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr); -+ /* Re-enabled default NMI/#MC use of MSRs now microcode is loaded. */ -+ ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_IST_MASK); - - if ( boot_cpu_has(X86_FEATURE_IBRSB) || boot_cpu_has(X86_FEATURE_IBRS) ) - { -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 1fe6644a71ae..82a0b73cf6ef 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2092,10 +2092,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - } - } - -- /* Update the top-of-stack block with the VERW disposition. */ -- info->spec_ctrl_flags &= ~SCF_verw; -- if ( nextd->arch.verw ) -- info->spec_ctrl_flags |= SCF_verw; -+ /* Update the top-of-stack block with the new spec_ctrl settings. */ -+ info->spec_ctrl_flags = -+ (info->spec_ctrl_flags & ~SCF_DOM_MASK) | -+ (nextd->arch.spec_ctrl_flags & SCF_DOM_MASK); - } - - sched_context_switched(prev, next); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 9507e5da60a9..7e646680f1c7 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1010,9 +1010,12 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- d->arch.verw = -- (pv ? opt_md_clear_pv : opt_md_clear_hvm) || -- (opt_fb_clear_mmio && is_iommu_enabled(d)); -+ bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || -+ (opt_fb_clear_mmio && is_iommu_enabled(d))); -+ -+ d->arch.spec_ctrl_flags = -+ (verw ? SCF_verw : 0) | -+ 0; - } - - void __init init_speculation_mitigations(void) -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 2398a1d99da9..e4c099262cb7 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -319,8 +319,7 @@ struct arch_domain - uint32_t pci_cf8; - uint8_t cmos_idx; - -- /* Use VERW on return-to-guest for its flushing side effect. */ -- bool verw; -+ uint8_t spec_ctrl_flags; /* See SCF_DOM_MASK */ - - union { - struct pv_domain pv; -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 7e83e0179fb9..3cd72e40305f 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -20,12 +20,40 @@ - #ifndef __X86_SPEC_CTRL_H__ - #define __X86_SPEC_CTRL_H__ - --/* Encoding of cpuinfo.spec_ctrl_flags */ -+/* -+ * Encoding of: -+ * cpuinfo.spec_ctrl_flags -+ * default_spec_ctrl_flags -+ * domain.spec_ctrl_flags -+ * -+ * Live settings are in the top-of-stack block, because they need to be -+ * accessable when XPTI is active. Some settings are fixed from boot, some -+ * context switched per domain, and some inhibited in the S3 path. -+ */ - #define SCF_use_shadow (1 << 0) - #define SCF_ist_wrmsr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) - -+/* -+ * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some -+ * functionality requires updated microcode to work. -+ * -+ * On boot, this is easy; we load microcode before figuring out which -+ * speculative protections to apply. However, on the S3 resume path, we must -+ * be able to disable the configured mitigations until microcode is reloaded. -+ * -+ * These are the controls to inhibit on the S3 resume path until microcode has -+ * been reloaded. -+ */ -+#define SCF_IST_MASK (SCF_ist_wrmsr) -+ -+/* -+ * Some speculative protections are per-domain. These settings are merged -+ * into the top-of-stack block in the context switch path. -+ */ -+#define SCF_DOM_MASK (SCF_verw) -+ - #ifndef __ASSEMBLY__ - - #include <asm/alternative.h> -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 5a590bac44aa..66b00d511fc6 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -248,9 +248,6 @@ - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. -- * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume -- * path to avoid using MSR_SPEC_CTRL before the microcode introducing it has -- * been reloaded. - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* --- -2.35.1 - diff --git a/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch b/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch deleted file mode 100644 index 5b841a6..0000000 --- a/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 31aa2a20bfefc3a8a200da54a56471bf99f9630e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 28 Jun 2022 14:36:56 +0100 -Subject: [PATCH 45/51] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr - -We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes -ambiguous. - -No functional change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 76d6a36f645dfdbad8830559d4d52caf36efc75e) ---- - xen/arch/x86/spec_ctrl.c | 6 +++--- - xen/include/asm-x86/spec_ctrl.h | 4 ++-- - xen/include/asm-x86/spec_ctrl_asm.h | 8 ++++---- - 3 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 7e646680f1c7..89f95c083e1b 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1115,7 +1115,7 @@ void __init init_speculation_mitigations(void) - { - if ( opt_msr_sc_pv ) - { -- default_spec_ctrl_flags |= SCF_ist_wrmsr; -+ default_spec_ctrl_flags |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); - } - -@@ -1126,7 +1126,7 @@ void __init init_speculation_mitigations(void) - * Xen's value is not restored atomically. An early NMI hitting - * the VMExit path needs to restore Xen's value for safety. - */ -- default_spec_ctrl_flags |= SCF_ist_wrmsr; -+ default_spec_ctrl_flags |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - } -@@ -1139,7 +1139,7 @@ void __init init_speculation_mitigations(void) - * on real hardware matches the availability of MSR_SPEC_CTRL in the - * first place. - * -- * No need for SCF_ist_wrmsr because Xen's value is restored -+ * No need for SCF_ist_sc_msr because Xen's value is restored - * atomically WRT NMIs in the VMExit path. - * - * TODO: Adjust cpu_has_svm_spec_ctrl to be usable earlier on boot. -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 3cd72e40305f..f8f0ac47e759 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -31,7 +31,7 @@ - * context switched per domain, and some inhibited in the S3 path. - */ - #define SCF_use_shadow (1 << 0) --#define SCF_ist_wrmsr (1 << 1) -+#define SCF_ist_sc_msr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) - -@@ -46,7 +46,7 @@ - * These are the controls to inhibit on the S3 resume path until microcode has - * been reloaded. - */ --#define SCF_IST_MASK (SCF_ist_wrmsr) -+#define SCF_IST_MASK (SCF_ist_sc_msr) - - /* - * Some speculative protections are per-domain. These settings are merged -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 66b00d511fc6..0ff1b118f882 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -266,8 +266,8 @@ - - .L\@_skip_rsb: - -- test $SCF_ist_wrmsr, %al -- jz .L\@_skip_wrmsr -+ test $SCF_ist_sc_msr, %al -+ jz .L\@_skip_msr_spec_ctrl - - xor %edx, %edx - testb $3, UREGS_cs(%rsp) -@@ -290,7 +290,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * to speculate around the WRMSR. As a result, we need a dispatch - * serialising instruction in the else clause. - */ --.L\@_skip_wrmsr: -+.L\@_skip_msr_spec_ctrl: - lfence - UNLIKELY_END(\@_serialise) - .endm -@@ -301,7 +301,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * Requires %rbx=stack_end - * Clobbers %rax, %rcx, %rdx - */ -- testb $SCF_ist_wrmsr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) - jz .L\@_skip - - DO_SPEC_CTRL_EXIT_TO_XEN --- -2.35.1 - diff --git a/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch b/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch deleted file mode 100644 index a950639..0000000 --- a/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch +++ /dev/null @@ -1,97 +0,0 @@ -From e7671561c84322860875745e57b228a7a310f2bf Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 4 Jul 2022 21:32:17 +0100 -Subject: [PATCH 46/51] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch - -We are about to introduce the use of IBPB at different points in Xen, making -opt_ibpb ambiguous. Rename it to opt_ibpb_ctxt_switch. - -No functional change. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit a8e5ef079d6f5c88c472e3e620db5a8d1402a50d) ---- - xen/arch/x86/domain.c | 2 +- - xen/arch/x86/spec_ctrl.c | 10 +++++----- - xen/include/asm-x86/spec_ctrl.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 82a0b73cf6ef..0d39981550ca 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2064,7 +2064,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - - ctxt_switch_levelling(next); - -- if ( opt_ibpb && !is_idle_domain(nextd) ) -+ if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) - { - static DEFINE_PER_CPU(unsigned int, last); - unsigned int *last_id = &this_cpu(last); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 89f95c083e1b..f4ae36eae2d0 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -54,7 +54,7 @@ int8_t __initdata opt_stibp = -1; - bool __read_mostly opt_ssbd; - int8_t __initdata opt_psfd = -1; - --bool __read_mostly opt_ibpb = true; -+bool __read_mostly opt_ibpb_ctxt_switch = true; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -117,7 +117,7 @@ static int __init parse_spec_ctrl(const char *s) - - opt_thunk = THUNK_JMP; - opt_ibrs = 0; -- opt_ibpb = false; -+ opt_ibpb_ctxt_switch = false; - opt_ssbd = false; - opt_l1d_flush = 0; - opt_branch_harden = false; -@@ -238,7 +238,7 @@ static int __init parse_spec_ctrl(const char *s) - - /* Misc settings. */ - else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -- opt_ibpb = val; -+ opt_ibpb_ctxt_switch = val; - else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 ) - opt_eager_fpu = val; - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) -@@ -458,7 +458,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (opt_tsx & 1) ? " TSX+" : " TSX-", - !cpu_has_srbds_ctrl ? "" : - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", -- opt_ibpb ? " IBPB" : "", -+ opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", - opt_md_clear_pv || opt_md_clear_hvm || - opt_fb_clear_mmio ? " VERW" : "", -@@ -1240,7 +1240,7 @@ void __init init_speculation_mitigations(void) - - /* Check we have hardware IBPB support before using it... */ - if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -- opt_ibpb = false; -+ opt_ibpb_ctxt_switch = false; - - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index f8f0ac47e759..fb4365575620 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -63,7 +63,7 @@ - void init_speculation_mitigations(void); - void spec_ctrl_init_domain(struct domain *d); - --extern bool opt_ibpb; -+extern bool opt_ibpb_ctxt_switch; - extern bool opt_ssbd; - extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; --- -2.35.1 - diff --git a/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch b/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch deleted file mode 100644 index 3ce9fd9..0000000 --- a/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 2a9e690a0ad5d54dca4166e089089a07bbe7fc85 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 1 Jul 2022 15:59:40 +0100 -Subject: [PATCH 47/51] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST - -We are shortly going to add a conditional IBPB in this path. - -Therefore, we cannot hold spec_ctrl_flags in %eax, and rely on only clobbering -it after we're done with its contents. %rbx is available for use, and the -more normal register to hold preserved information in. - -With %rax freed up, use it instead of %rdx for the RSB tmp register, and for -the adjustment to spec_ctrl_flags. - -This leaves no use of %rdx, except as 0 for the upper half of WRMSR. In -practice, %rdx is 0 from SAVE_ALL on all paths and isn't likely to change in -the foreseeable future, so update the macro entry requirements to state this -dependency. This marginal optimisation can be revisited if circumstances -change. - -No practical change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit e9b8d31981f184c6539f91ec54bd9cae29cdae36) ---- - xen/arch/x86/x86_64/entry.S | 4 ++-- - xen/include/asm-x86/spec_ctrl_asm.h | 21 ++++++++++----------- - 2 files changed, 12 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 2a86938f1f32..a1810bf4d311 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -932,7 +932,7 @@ ENTRY(double_fault) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx -@@ -968,7 +968,7 @@ handle_ist_exception: - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 0ff1b118f882..15e24cde00d1 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -251,34 +251,33 @@ - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* -- * Requires %rsp=regs, %r14=stack_end -- * Clobbers %rax, %rcx, %rdx -+ * Requires %rsp=regs, %r14=stack_end, %rdx=0 -+ * Clobbers %rax, %rbx, %rcx, %rdx - * - * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY - * maybexen=1, but with conditionals rather than alternatives. - */ -- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %eax -+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx - -- test $SCF_ist_rsb, %al -+ test $SCF_ist_rsb, %bl - jz .L\@_skip_rsb - -- DO_OVERWRITE_RSB tmp=rdx /* Clobbers %rcx/%rdx */ -+ DO_OVERWRITE_RSB /* Clobbers %rax/%rcx */ - - .L\@_skip_rsb: - -- test $SCF_ist_sc_msr, %al -+ test $SCF_ist_sc_msr, %bl - jz .L\@_skip_msr_spec_ctrl - -- xor %edx, %edx -+ xor %eax, %eax - testb $3, UREGS_cs(%rsp) -- setnz %dl -- not %edx -- and %dl, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ setnz %al -+ not %eax -+ and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - - /* Load Xen's intended value. */ - mov $MSR_SPEC_CTRL, %ecx - movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax -- xor %edx, %edx - wrmsr - - /* Opencoded UNLIKELY_START() with no condition. */ --- -2.35.1 - diff --git a/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch b/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch deleted file mode 100644 index d5ad043..0000000 --- a/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch +++ /dev/null @@ -1,300 +0,0 @@ -From 76c5fcee9027fb8823dd501086f0ff3ee3c4231c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 24 Feb 2022 13:44:33 +0000 -Subject: [PATCH 48/51] x86/spec-ctrl: Support IBPB-on-entry - -We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs, -but as we've talked about using it in other cases too, arrange to support it -generally. However, this is also very expensive in some cases, so we're going -to want per-domain controls. - -Introduce SCF_ist_ibpb and SCF_entry_ibpb controls, adding them to the IST and -DOM masks as appropriate. Also introduce X86_FEATURE_IBPB_ENTRY_{PV,HVM} to -to patch the code blocks. - -For SVM, the STGI is serialising enough to protect against Spectre-v1 attacks, -so no "else lfence" is necessary. VT-x will use use the MSR host load list, -so doesn't need any code in the VMExit path. - -For the IST path, we can't safely check CPL==0 to skip a flush, as we might -have hit an entry path before it's IBPB. As IST hitting Xen is rare, flush -irrespective of CPL. A later path, SCF_ist_sc_msr, provides Spectre-v1 -safety. - -For the PV paths, we know we're interrupting CPL>0, while for the INTR paths, -we can safely check CPL==0. Only flush when interrupting guest context. - -An "else lfence" is needed for safety, but we want to be able to skip it on -unaffected CPUs, so the block wants to be an alternative, which means the -lfence has to be inline rather than UNLIKELY() (the replacement block doesn't -have displacements fixed up for anything other than the first instruction). - -As with SPEC_CTRL_ENTRY_FROM_INTR_IST, %rdx is 0 on entry so rely on this to -shrink the logic marginally. Update the comments to specify this new -dependency. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 53a570b285694947776d5190f591a0d5b9b18de7) ---- - xen/arch/x86/hvm/svm/entry.S | 18 ++++++++++- - xen/arch/x86/hvm/vmx/vmcs.c | 4 +++ - xen/arch/x86/x86_64/compat/entry.S | 2 +- - xen/arch/x86/x86_64/entry.S | 12 +++---- - xen/include/asm-x86/cpufeatures.h | 2 ++ - xen/include/asm-x86/spec_ctrl.h | 6 ++-- - xen/include/asm-x86/spec_ctrl_asm.h | 49 +++++++++++++++++++++++++++-- - 7 files changed, 81 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 4ae55a2ef605..0ff4008060fa 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -97,7 +97,19 @@ __UNLIKELY_END(nsvm_hap) - - GET_CURRENT(bx) - -- /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo Clob: acd */ -+ /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ -+ -+ .macro svm_vmexit_cond_ibpb -+ testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ jz .L_skip_ibpb -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+.L_skip_ibpb: -+ .endm -+ ALTERNATIVE "", svm_vmexit_cond_ibpb, X86_FEATURE_IBPB_ENTRY_HVM -+ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM - - .macro svm_vmexit_spec_ctrl -@@ -114,6 +126,10 @@ __UNLIKELY_END(nsvm_hap) - ALTERNATIVE "", svm_vmexit_spec_ctrl, X86_FEATURE_SC_MSR_HVM - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -+ /* -+ * STGI is executed unconditionally, and is sufficiently serialising -+ * to safely resolve any Spectre-v1 concerns in the above logic. -+ */ - stgi - GLOBAL(svm_stgi_label) - mov %rsp,%rdi -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index f9f9bc18cdbc..dd817cee4e69 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -1345,6 +1345,10 @@ static int construct_vmcs(struct vcpu *v) - rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D, - VMX_MSR_GUEST_LOADONLY); - -+ if ( !rc && (d->arch.spec_ctrl_flags & SCF_entry_ibpb) ) -+ rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB, -+ VMX_MSR_HOST); -+ - out: - vmx_vmcs_exit(v); - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 5fd6dbbd4513..b86d38d1c50d 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -18,7 +18,7 @@ ENTRY(entry_int82) - movl $HYPERCALL_VECTOR, 4(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - CR4_PV32_RESTORE -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index a1810bf4d311..fba8ae498f74 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -260,7 +260,7 @@ ENTRY(lstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -298,7 +298,7 @@ ENTRY(cstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -338,7 +338,7 @@ GLOBAL(sysenter_eflags_saved) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -392,7 +392,7 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -674,7 +674,7 @@ ENTRY(common_interrupt) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -708,7 +708,7 @@ GLOBAL(handle_exception) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 493d338a085e..672c9ee22ba2 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -39,6 +39,8 @@ XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ - XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ -+XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen for PV */ -+XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */ - - /* Bug words follow the synthetic words. */ - #define X86_NR_BUG 1 -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index fb4365575620..3fc599a817c4 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -34,6 +34,8 @@ - #define SCF_ist_sc_msr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) -+#define SCF_ist_ibpb (1 << 4) -+#define SCF_entry_ibpb (1 << 5) - - /* - * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some -@@ -46,13 +48,13 @@ - * These are the controls to inhibit on the S3 resume path until microcode has - * been reloaded. - */ --#define SCF_IST_MASK (SCF_ist_sc_msr) -+#define SCF_IST_MASK (SCF_ist_sc_msr | SCF_ist_ibpb) - - /* - * Some speculative protections are per-domain. These settings are merged - * into the top-of-stack block in the context switch path. - */ --#define SCF_DOM_MASK (SCF_verw) -+#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb) - - #ifndef __ASSEMBLY__ - -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 15e24cde00d1..9eb4ad9ab71d 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -88,6 +88,35 @@ - * - SPEC_CTRL_EXIT_TO_{SVM,VMX} - */ - -+.macro DO_SPEC_CTRL_COND_IBPB maybexen:req -+/* -+ * Requires %rsp=regs (also cpuinfo if !maybexen) -+ * Requires %r14=stack_end (if maybexen), %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * Conditionally issue IBPB if SCF_entry_ibpb is active. In the maybexen -+ * case, we can safely look at UREGS_cs to skip taking the hit when -+ * interrupting Xen. -+ */ -+ .if \maybexen -+ testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ jz .L\@_skip -+ testb $3, UREGS_cs(%rsp) -+ .else -+ testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ .endif -+ jz .L\@_skip -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+ jmp .L\@_done -+ -+.L\@_skip: -+ lfence -+.L\@_done: -+.endm -+ - .macro DO_OVERWRITE_RSB tmp=rax - /* - * Requires nothing -@@ -225,12 +254,16 @@ - - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ - #define SPEC_CTRL_ENTRY_FROM_PV \ -+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ -+ X86_FEATURE_IBPB_ENTRY_PV; \ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV - - /* Use in interrupt/exception context. May interrupt Xen or PV context. */ - #define SPEC_CTRL_ENTRY_FROM_INTR \ -+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ -+ X86_FEATURE_IBPB_ENTRY_PV; \ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV -@@ -254,11 +287,23 @@ - * Requires %rsp=regs, %r14=stack_end, %rdx=0 - * Clobbers %rax, %rbx, %rcx, %rdx - * -- * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY -- * maybexen=1, but with conditionals rather than alternatives. -+ * This is logical merge of: -+ * DO_SPEC_CTRL_COND_IBPB maybexen=0 -+ * DO_OVERWRITE_RSB -+ * DO_SPEC_CTRL_ENTRY maybexen=1 -+ * but with conditionals rather than alternatives. - */ - movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx - -+ test $SCF_ist_ibpb, %bl -+ jz .L\@_skip_ibpb -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+ -+.L\@_skip_ibpb: -+ - test $SCF_ist_rsb, %bl - jz .L\@_skip_rsb - --- -2.35.1 - diff --git a/0049-x86-cpuid-Enumeration-for-BTC_NO.patch b/0049-x86-cpuid-Enumeration-for-BTC_NO.patch deleted file mode 100644 index 0e5d119..0000000 --- a/0049-x86-cpuid-Enumeration-for-BTC_NO.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 0826c7596d35c887b3b7858137c7ac374d9ef17a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 16 May 2022 15:48:24 +0100 -Subject: [PATCH 49/51] x86/cpuid: Enumeration for BTC_NO - -BTC_NO indicates that hardware is not succeptable to Branch Type Confusion. - -Zen3 CPUs don't suffer BTC. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 76cb04ad64f3ab9ae785988c40655a71dde9c319) ---- - tools/libs/light/libxl_cpuid.c | 1 + - tools/misc/xen-cpuid.c | 2 +- - xen/arch/x86/cpu/amd.c | 10 ++++++++++ - xen/arch/x86/spec_ctrl.c | 5 +++-- - xen/include/public/arch-x86/cpufeatureset.h | 1 + - 5 files changed, 16 insertions(+), 3 deletions(-) - -diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c -index d462f9e421ed..bf6fdee360a9 100644 ---- a/tools/libs/light/libxl_cpuid.c -+++ b/tools/libs/light/libxl_cpuid.c -@@ -288,6 +288,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) - {"virt-ssbd", 0x80000008, NA, CPUID_REG_EBX, 25, 1}, - {"ssb-no", 0x80000008, NA, CPUID_REG_EBX, 26, 1}, - {"psfd", 0x80000008, NA, CPUID_REG_EBX, 28, 1}, -+ {"btc-no", 0x80000008, NA, CPUID_REG_EBX, 29, 1}, - - {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8}, - {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4}, -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index bc7dcf55757a..fe22f5f5b68b 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -158,7 +158,7 @@ static const char *const str_e8b[32] = - /* [22] */ [23] = "ppin", - [24] = "amd-ssbd", [25] = "virt-ssbd", - [26] = "ssb-no", -- [28] = "psfd", -+ [28] = "psfd", [29] = "btc-no", - }; - - static const char *const str_7d0[32] = -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index b3b9a0df5fed..b158e3acb5c7 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -847,6 +847,16 @@ static void init_amd(struct cpuinfo_x86 *c) - warning_add(text); - } - break; -+ -+ case 0x19: -+ /* -+ * Zen3 (Fam19h model < 0x10) parts are not susceptible to -+ * Branch Type Confusion, but predate the allocation of the -+ * BTC_NO bit. Fill it back in if we're not virtualised. -+ */ -+ if (!cpu_has_hypervisor && !cpu_has(c, X86_FEATURE_BTC_NO)) -+ __set_bit(X86_FEATURE_BTC_NO, c->x86_capability); -+ break; - } - - display_cacheinfo(c); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index f4ae36eae2d0..0f101c057f3e 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -388,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -403,7 +403,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", -- (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : ""); -+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "", -+ (e8b & cpufeat_mask(X86_FEATURE_BTC_NO)) ? " BTC_NO" : ""); - - /* Hardware features which need driving to mitigate issues. */ - printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 743b857dcd5c..e7b8167800a2 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -266,6 +266,7 @@ XEN_CPUFEATURE(AMD_SSBD, 8*32+24) /*S MSR_SPEC_CTRL.SSBD available */ - XEN_CPUFEATURE(VIRT_SSBD, 8*32+25) /* MSR_VIRT_SPEC_CTRL.SSBD */ - XEN_CPUFEATURE(SSB_NO, 8*32+26) /*A Hardware not vulnerable to SSB */ - XEN_CPUFEATURE(PSFD, 8*32+28) /*S MSR_SPEC_CTRL.PSFD */ -+XEN_CPUFEATURE(BTC_NO, 8*32+29) /*A Hardware not vulnerable to Branch Type Confusion */ - - /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ - XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ --- -2.35.1 - diff --git a/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch b/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch deleted file mode 100644 index c83844d..0000000 --- a/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 5457a6870eb1369b868f7b8e833966ed43a773ad Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 15 Mar 2022 18:30:25 +0000 -Subject: [PATCH 50/51] x86/spec-ctrl: Enable Zen2 chickenbit - -... as instructed in the Branch Type Confusion whitepaper. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -(cherry picked from commit 9deaf2d932f08c16c6b96a1c426e4b1142c0cdbe) ---- - xen/arch/x86/cpu/amd.c | 28 ++++++++++++++++++++++++++++ - xen/arch/x86/cpu/cpu.h | 1 + - xen/arch/x86/cpu/hygon.c | 6 ++++++ - xen/include/asm-x86/msr-index.h | 1 + - 4 files changed, 36 insertions(+) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index b158e3acb5c7..37ac84ddd74d 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -731,6 +731,31 @@ void amd_init_ssbd(const struct cpuinfo_x86 *c) - printk_once(XENLOG_ERR "No SSBD controls available\n"); - } - -+/* -+ * On Zen2 we offer this chicken (bit) on the altar of Speculation. -+ * -+ * Refer to the AMD Branch Type Confusion whitepaper: -+ * https://XXX -+ * -+ * Setting this unnamed bit supposedly causes prediction information on -+ * non-branch instructions to be ignored. It is to be set unilaterally in -+ * newer microcode. -+ * -+ * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a -+ * simple model number comparison, so use STIBP as a heuristic to separate the -+ * two uarches in Fam17h(AMD)/18h(Hygon). -+ */ -+void amd_init_spectral_chicken(void) -+{ -+ uint64_t val, chickenbit = 1 << 1; -+ -+ if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ return; -+ -+ if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) -+ wrmsr_safe(MSR_AMD64_DE_CFG2, val | chickenbit); -+} -+ - void __init detect_zen2_null_seg_behaviour(void) - { - uint64_t base; -@@ -796,6 +821,9 @@ static void init_amd(struct cpuinfo_x86 *c) - - amd_init_ssbd(c); - -+ if (c->x86 == 0x17) -+ amd_init_spectral_chicken(); -+ - /* Probe for NSCB on Zen2 CPUs when not virtualised */ - if (!cpu_has_hypervisor && !cpu_has_nscb && c == &boot_cpu_data && - c->x86 == 0x17) -diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h -index b593bd85f04f..145bc5156a86 100644 ---- a/xen/arch/x86/cpu/cpu.h -+++ b/xen/arch/x86/cpu/cpu.h -@@ -22,4 +22,5 @@ void early_init_amd(struct cpuinfo_x86 *c); - void amd_log_freq(const struct cpuinfo_x86 *c); - void amd_init_lfence(struct cpuinfo_x86 *c); - void amd_init_ssbd(const struct cpuinfo_x86 *c); -+void amd_init_spectral_chicken(void); - void detect_zen2_null_seg_behaviour(void); -diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c -index cdc94130dd2e..6f8d491297e8 100644 ---- a/xen/arch/x86/cpu/hygon.c -+++ b/xen/arch/x86/cpu/hygon.c -@@ -40,6 +40,12 @@ static void init_hygon(struct cpuinfo_x86 *c) - c->x86 == 0x18) - detect_zen2_null_seg_behaviour(); - -+ /* -+ * TODO: Check heuristic safety with Hygon first -+ if (c->x86 == 0x18) -+ amd_init_spectral_chicken(); -+ */ -+ - /* - * Hygon CPUs before Zen2 don't clear segment bases/limits when - * loading a NULL selector. -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 72bc32ba04ff..d3735e499e0f 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -361,6 +361,7 @@ - #define MSR_AMD64_DE_CFG 0xc0011029 - #define AMD64_DE_CFG_LFENCE_SERIALISE (_AC(1, ULL) << 1) - #define MSR_AMD64_EX_CFG 0xc001102c -+#define MSR_AMD64_DE_CFG2 0xc00110e3 - - #define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027 - #define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019 --- -2.35.1 - diff --git a/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch b/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch deleted file mode 100644 index e313ede..0000000 --- a/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch +++ /dev/null @@ -1,305 +0,0 @@ -From 0a5387a01165b46c8c85e7f7e2ddbe60a7f5db44 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 27 Jun 2022 19:29:40 +0100 -Subject: [PATCH 51/51] x86/spec-ctrl: Mitigate Branch Type Confusion when - possible - -Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier. To -mitigate, we require SMT safety (STIBP on Zen2, no-SMT on Zen1), and to issue -an IBPB on each entry to Xen, to flush the BTB. - -Due to performance concerns, dom0 (which is trusted in most configurations) is -excluded from protections by default. - -Therefore: - * Use STIBP by default on Zen2 too, which now means we want it on by default - on all hardware supporting STIBP. - * Break the current IBPB logic out into a new function, extending it with - IBPB-at-entry logic. - * Change the existing IBPB-at-ctxt-switch boolean to be tristate, and disable - it by default when IBPB-at-entry is providing sufficient safety. - -If all PV guests on the system are trusted, then it is recommended to boot -with `spec-ctrl=ibpb-entry=no-pv`, as this will provide an additional marginal -perf improvement. - -This is part of XSA-407 / CVE-2022-23825. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit d8cb7e0f069e0f106d24941355b59b45a731eabe) ---- - docs/misc/xen-command-line.pandoc | 14 ++-- - xen/arch/x86/spec_ctrl.c | 113 ++++++++++++++++++++++++++---- - xen/include/asm-x86/spec_ctrl.h | 2 +- - 3 files changed, 112 insertions(+), 17 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 1bbdb55129cc..bd6826d0ae05 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2234,7 +2234,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>, --> {msr-sc,rsb,md-clear}=<bool>|{pv,hvm}=<bool>, -+> {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio}=<bool> ]` -@@ -2259,9 +2259,10 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine --grained control over the primitives by Xen. These impact Xen's ability to --protect itself, and/or Xen's ability to virtualise support for guests to use. -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options -+offer fine grained control over the primitives by Xen. These impact Xen's -+ability to protect itself, and/or Xen's ability to virtualise support for -+guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -@@ -2280,6 +2281,11 @@ protect itself, and/or Xen's ability to virtualise support for guests to use. - compatibility with development versions of this fix, `mds=` is also accepted - on Xen 4.12 and earlier as an alias. Consult vendor documentation in - preference to here.* -+* `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction -+ Barrier) is used on entry to Xen. This is used by default on hardware -+ vulnerable to Branch Type Confusion, but for performance reasons, dom0 is -+ unprotected by default. If it necessary to protect dom0 too, boot with -+ `spec-ctrl=ibpb-entry`. - - If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to - select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 0f101c057f3e..1d9796c34d71 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -39,6 +39,10 @@ static bool __initdata opt_rsb_hvm = true; - static int8_t __read_mostly opt_md_clear_pv = -1; - static int8_t __read_mostly opt_md_clear_hvm = -1; - -+static int8_t __read_mostly opt_ibpb_entry_pv = -1; -+static int8_t __read_mostly opt_ibpb_entry_hvm = -1; -+static bool __read_mostly opt_ibpb_entry_dom0; -+ - /* Cmdline controls for Xen's speculative settings. */ - static enum ind_thunk { - THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ -@@ -54,7 +58,7 @@ int8_t __initdata opt_stibp = -1; - bool __read_mostly opt_ssbd; - int8_t __initdata opt_psfd = -1; - --bool __read_mostly opt_ibpb_ctxt_switch = true; -+int8_t __read_mostly opt_ibpb_ctxt_switch = -1; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -114,6 +118,9 @@ static int __init parse_spec_ctrl(const char *s) - opt_rsb_hvm = false; - opt_md_clear_pv = 0; - opt_md_clear_hvm = 0; -+ opt_ibpb_entry_pv = 0; -+ opt_ibpb_entry_hvm = 0; -+ opt_ibpb_entry_dom0 = false; - - opt_thunk = THUNK_JMP; - opt_ibrs = 0; -@@ -140,12 +147,14 @@ static int __init parse_spec_ctrl(const char *s) - opt_msr_sc_pv = val; - opt_rsb_pv = val; - opt_md_clear_pv = val; -+ opt_ibpb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { - opt_msr_sc_hvm = val; - opt_rsb_hvm = val; - opt_md_clear_hvm = val; -+ opt_ibpb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) - { -@@ -210,6 +219,28 @@ static int __init parse_spec_ctrl(const char *s) - break; - } - } -+ else if ( (val = parse_boolean("ibpb-entry", s, ss)) != -1 ) -+ { -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_ibpb_entry_pv = opt_ibpb_entry_hvm = -+ opt_ibpb_entry_dom0 = val; -+ break; -+ -+ case -2: -+ s += strlen("ibpb-entry="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_ibpb_entry_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_ibpb_entry_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } -+ } - - /* Xen's speculative sidechannel mitigation settings. */ - else if ( !strncmp(s, "bti-thunk=", 10) ) -@@ -477,27 +508,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * mitigation support for guests. - */ - #ifdef CONFIG_HVM -- printk(" Support for HVM VMs:%s%s%s%s%s\n", -+ printk(" Support for HVM VMs:%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_MD_CLEAR) || -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || - opt_eager_fpu) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); -+ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "", -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); - - #endif - #ifdef CONFIG_PV -- printk(" Support for PV VMs:%s%s%s%s%s\n", -+ printk(" Support for PV VMs:%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_MD_CLEAR) || -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || - opt_eager_fpu) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); -+ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "", -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", - opt_xpti_hwdom ? "enabled" : "disabled", -@@ -759,6 +794,55 @@ static bool __init should_use_eager_fpu(void) - } - } - -+static void __init ibpb_calculations(void) -+{ -+ /* Check we have hardware IBPB support before using it... */ -+ if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -+ { -+ opt_ibpb_entry_hvm = opt_ibpb_entry_pv = opt_ibpb_ctxt_switch = 0; -+ opt_ibpb_entry_dom0 = false; -+ return; -+ } -+ -+ /* -+ * IBPB-on-entry mitigations for Branch Type Confusion. -+ * -+ * IBPB && !BTC_NO selects all AMD/Hygon hardware, not known to be safe, -+ * that we can provide some form of mitigation on. -+ */ -+ if ( opt_ibpb_entry_pv == -1 ) -+ opt_ibpb_entry_pv = (IS_ENABLED(CONFIG_PV) && -+ boot_cpu_has(X86_FEATURE_IBPB) && -+ !boot_cpu_has(X86_FEATURE_BTC_NO)); -+ if ( opt_ibpb_entry_hvm == -1 ) -+ opt_ibpb_entry_hvm = (IS_ENABLED(CONFIG_HVM) && -+ boot_cpu_has(X86_FEATURE_IBPB) && -+ !boot_cpu_has(X86_FEATURE_BTC_NO)); -+ -+ if ( opt_ibpb_entry_pv ) -+ { -+ setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_PV); -+ -+ /* -+ * We only need to flush in IST context if we're protecting against PV -+ * guests. HVM IBPB-on-entry protections are both atomic with -+ * NMI/#MC, so can't interrupt Xen ahead of having already flushed the -+ * BTB. -+ */ -+ default_spec_ctrl_flags |= SCF_ist_ibpb; -+ } -+ if ( opt_ibpb_entry_hvm ) -+ setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_HVM); -+ -+ /* -+ * If we're using IBPB-on-entry to protect against PV and HVM guests -+ * (ignoring dom0 if trusted), then there's no need to also issue IBPB on -+ * context switch too. -+ */ -+ if ( opt_ibpb_ctxt_switch == -1 ) -+ opt_ibpb_ctxt_switch = !(opt_ibpb_entry_hvm && opt_ibpb_entry_pv); -+} -+ - /* Calculate whether this CPU is vulnerable to L1TF. */ - static __init void l1tf_calculations(uint64_t caps) - { -@@ -1014,8 +1098,12 @@ void spec_ctrl_init_domain(struct domain *d) - bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || - (opt_fb_clear_mmio && is_iommu_enabled(d))); - -+ bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && -+ (d->domain_id != 0 || opt_ibpb_entry_dom0)); -+ - d->arch.spec_ctrl_flags = - (verw ? SCF_verw : 0) | -+ (ibpb ? SCF_entry_ibpb : 0) | - 0; - } - -@@ -1162,12 +1250,15 @@ void __init init_speculation_mitigations(void) - } - - /* -- * Use STIBP by default if the hardware hint is set. Otherwise, leave it -- * off as it a severe performance pentalty on pre-eIBRS Intel hardware -- * where it was retrofitted in microcode. -+ * Use STIBP by default on all AMD systems. Zen3 and later enumerate -+ * STIBP_ALWAYS, but STIBP is needed on Zen2 as part of the mitigations -+ * for Branch Type Confusion. -+ * -+ * Leave STIBP off by default on Intel. Pre-eIBRS systems suffer a -+ * substantial perf hit when it was implemented in microcode. - */ - if ( opt_stibp == -1 ) -- opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS); -+ opt_stibp = !!boot_cpu_has(X86_FEATURE_AMD_STIBP); - - if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) || - boot_cpu_has(X86_FEATURE_AMD_STIBP)) ) -@@ -1239,9 +1330,7 @@ void __init init_speculation_mitigations(void) - if ( opt_rsb_hvm ) - setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM); - -- /* Check we have hardware IBPB support before using it... */ -- if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -- opt_ibpb_ctxt_switch = false; -+ ibpb_calculations(); - - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 3fc599a817c4..9403b81dc7af 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -65,7 +65,7 @@ - void init_speculation_mitigations(void); - void spec_ctrl_init_domain(struct domain *d); - --extern bool opt_ibpb_ctxt_switch; -+extern int8_t opt_ibpb_ctxt_switch; - extern bool opt_ssbd; - extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; --- -2.35.1 - @@ -1,4 +1,4 @@ -Xen upstream patchset #0 for 4.15.4-pre +Xen upstream patchset #0.1 for 4.15.4-pre Containing patches from RELEASE-4.15.3 (feecaf4abf733e83b7a297190819eca7a7f65168) |