summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Mózes <hydrapolic@gmail.com>2024-02-03 19:12:02 +0100
committerTomáš Mózes <hydrapolic@gmail.com>2024-02-03 19:12:02 +0100
commit0fbc09bbe820146fd857c79bb150028703342c87 (patch)
tree94332b5d49d2af5bf62da5afd18776a9b8b87450
parentXen 4.17.3-pre-patchset-0 (diff)
downloadxen-upstream-patches-0fbc09bbe820146fd857c79bb150028703342c87.tar.gz
xen-upstream-patches-0fbc09bbe820146fd857c79bb150028703342c87.tar.bz2
xen-upstream-patches-0fbc09bbe820146fd857c79bb150028703342c87.zip
Xen 4.17.4-pre-patchset-04.17.4-pre-patchset-0
Signed-off-by: Tomáš Mózes <hydrapolic@gmail.com>
-rw-r--r--0001-update-Xen-version-to-4.17.4-pre.patch (renamed from 0001-update-Xen-version-to-4.17.3-pre.patch)14
-rw-r--r--0002-pci-fail-device-assignment-if-phantom-functions-cann.patch91
-rw-r--r--0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch84
-rw-r--r--0003-VT-d-Fix-else-vs-endif-misplacement.patch70
-rw-r--r--0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch45
-rw-r--r--0004-build-define-ARCH-and-SRCARCH-later.patch67
-rw-r--r--0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch123
-rw-r--r--0005-CirrusCI-drop-FreeBSD-12.patch39
-rw-r--r--0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch50
-rw-r--r--0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch123
-rw-r--r--0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch74
-rw-r--r--0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch58
-rw-r--r--0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch65
-rw-r--r--0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch50
-rw-r--r--0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch126
-rw-r--r--0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch70
-rw-r--r--0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch245
-rw-r--r--0010-rombios-Work-around-GCC-issue-99578.patch43
-rw-r--r--0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch36
-rw-r--r--0011-rombios-Avoid-using-K-R-function-syntax.patch74
-rw-r--r--0012-rombios-Remove-the-use-of-egrep.patch34
-rw-r--r--0013-CI-Resync-FreeBSD-config-with-staging.patch62
-rw-r--r--0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch43
-rw-r--r--0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch143
-rw-r--r--0016-x86-head-check-base-address-alignment.patch85
-rw-r--r--0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch275
-rw-r--r--0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch113
-rw-r--r--0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch147
-rw-r--r--0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch180
-rw-r--r--0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch64
-rw-r--r--0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch462
-rw-r--r--0023-build-correct-gas-noexecstack-check.patch34
-rw-r--r--0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch38
-rw-r--r--0025-tboot-Disable-CET-at-shutdown.patch53
-rw-r--r--0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch29
-rw-r--r--0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch100
-rw-r--r--0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch41
-rw-r--r--0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch111
-rw-r--r--0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch48
-rw-r--r--0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch74
-rw-r--r--0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch85
-rw-r--r--0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch83
-rw-r--r--0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch106
-rw-r--r--0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch74
-rw-r--r--0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch109
-rw-r--r--0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch89
-rw-r--r--0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch91
-rw-r--r--0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch228
-rw-r--r--0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch455
-rw-r--r--0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch64
-rw-r--r--0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch186
-rw-r--r--0043-libfsimage-xfs-Remove-dead-code.patch71
-rw-r--r--0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch33
-rw-r--r--0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch137
-rw-r--r--0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch62
-rw-r--r--0047-tools-pygrub-Remove-unnecessary-hypercall.patch60
-rw-r--r--0048-tools-pygrub-Small-refactors.patch65
-rw-r--r--0049-tools-pygrub-Open-the-output-files-earlier.patch105
-rw-r--r--0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch126
-rw-r--r--0051-tools-pygrub-Deprivilege-pygrub.patch307
-rw-r--r--0052-libxl-add-support-for-running-bootloader-in-restrict.patch251
-rw-r--r--0053-libxl-limit-bootloader-execution-in-restricted-mode.patch158
-rw-r--r--0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch104
-rw-r--r--0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch86
-rw-r--r--info.txt6
65 files changed, 704 insertions, 6120 deletions
diff --git a/0001-update-Xen-version-to-4.17.3-pre.patch b/0001-update-Xen-version-to-4.17.4-pre.patch
index 1be1cd1..b532743 100644
--- a/0001-update-Xen-version-to-4.17.3-pre.patch
+++ b/0001-update-Xen-version-to-4.17.4-pre.patch
@@ -1,25 +1,25 @@
-From 2f337a04bfc2dda794ae0fc108577ec72932f83b Mon Sep 17 00:00:00 2001
+From 4f6e9d4327eb5252f1e8cac97a095d8b8485dadb Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
-Date: Mon, 21 Aug 2023 15:52:13 +0200
-Subject: [PATCH 01/55] update Xen version to 4.17.3-pre
+Date: Tue, 30 Jan 2024 14:36:44 +0100
+Subject: [PATCH 01/10] update Xen version to 4.17.4-pre
---
xen/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xen/Makefile b/xen/Makefile
-index fbada570b8..f6005bd536 100644
+index a46e6330db..dd0b004e1c 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -6,7 +6,7 @@ this-makefile := $(call lastword,$(MAKEFILE_LIST))
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 4
export XEN_SUBVERSION = 17
--export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION)
-+export XEN_EXTRAVERSION ?= .3-pre$(XEN_VENDORVERSION)
+-export XEN_EXTRAVERSION ?= .3$(XEN_VENDORVERSION)
++export XEN_EXTRAVERSION ?= .4-pre$(XEN_VENDORVERSION)
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-include xen-version
--
-2.42.0
+2.43.0
diff --git a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
new file mode 100644
index 0000000..d91802f
--- /dev/null
+++ b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
@@ -0,0 +1,91 @@
+From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 30 Jan 2024 14:37:39 +0100
+Subject: [PATCH 02/10] pci: fail device assignment if phantom functions cannot
+ be assigned
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current behavior is that no error is reported if (some) phantom functions
+fail to be assigned during device add or assignment, so the operation succeeds
+even if some phantom functions are not correctly setup.
+
+This can lead to devices possibly being successfully assigned to a domU while
+some of the device phantom functions are still assigned to dom0. Even when the
+device is assigned domIO before being assigned to a domU phantom functions
+might fail to be assigned to domIO, and also fail to be assigned to the domU,
+leaving them assigned to dom0.
+
+Since the device can generate requests using the IDs of those phantom
+functions, given the scenario above a device in such state would be in control
+of a domU, but still capable of generating transactions that use a context ID
+targeting dom0 owned memory.
+
+Modify device assign in order to attempt to deassign the device if phantom
+functions failed to be assigned.
+
+Note that device addition is not modified in the same way, as in that case the
+device is assigned to a trusted domain, and hence partial assign can lead to
+device malfunction but not a security issue.
+
+This is XSA-449 / CVE-2023-46839
+
+Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e
+master date: 2024-01-30 14:28:01 +0100
+---
+ xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 07d1986d33..8c62b14d19 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -1444,11 +1444,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+
+ pdev->fault.count = 0;
+
+- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
+- pci_to_dev(pdev), flag)) )
+- goto done;
++ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev),
++ flag);
+
+- for ( ; pdev->phantom_stride; rc = 0 )
++ while ( pdev->phantom_stride && !rc )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+@@ -1459,8 +1458,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+
+ done:
+ if ( rc )
+- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n",
+- d, &PCI_SBDF(seg, bus, devfn), rc);
++ {
++ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n",
++ d, devfn != pdev->devfn ? "phantom function " : "",
++ &PCI_SBDF(seg, bus, devfn), rc);
++
++ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) )
++ {
++ /*
++ * Device with phantom functions that failed to both assign and
++ * rollback. Mark the device as broken and crash the target domain,
++ * as the state of the functions at this point is unknown and Xen
++ * has no way to assert consistent context assignment among them.
++ */
++ pdev->broken = true;
++ if ( !is_hardware_domain(d) && d != dom_io )
++ domain_crash(d);
++ }
++ }
+ /* The device is assigned to dom_io so mark it as quarantined */
+ else if ( d == dom_io )
+ pdev->quarantine = true;
+--
+2.43.0
+
diff --git a/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch b/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch
deleted file mode 100644
index 1b62572..0000000
--- a/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From 7d8897984927a51495e9a1b827aa4bce1d779b87 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Mon, 21 Aug 2023 15:53:17 +0200
-Subject: [PATCH 02/55] x86: fix build with old gcc after CPU policy changes
-
-Old gcc won't cope with initializers involving unnamed struct/union
-fields.
-
-Fixes: 441b1b2a50ea ("x86/emul: Switch x86_emulate_ctxt to cpu_policy")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 768846690d64bc730c1a1123e8de3af731bb2eb3
-master date: 2023-04-19 11:02:47 +0200
----
- tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 4 +++-
- xen/arch/x86/pv/emul-priv-op.c | 4 +++-
- xen/arch/x86/pv/ro-page-fault.c | 4 +++-
- 3 files changed, 9 insertions(+), 3 deletions(-)
-
-diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
-index 4885a68210..eeeb6931f4 100644
---- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
-+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
-@@ -893,12 +893,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
- struct x86_emulate_ctxt ctxt = {
- .data = &state,
- .regs = &input.regs,
-- .cpu_policy = &cp,
- .addr_size = 8 * sizeof(void *),
- .sp_size = 8 * sizeof(void *),
- };
- int rc;
-
-+ /* Not part of the initializer, for old gcc to cope. */
-+ ctxt.cpu_policy = &cp;
-+
- /* Reset all global state variables */
- memset(&input, 0, sizeof(input));
-
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index 04416f1979..2c94beb10e 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -1327,12 +1327,14 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs)
- struct domain *currd = curr->domain;
- struct priv_op_ctxt ctxt = {
- .ctxt.regs = regs,
-- .ctxt.cpu_policy = currd->arch.cpu_policy,
- .ctxt.lma = !is_pv_32bit_domain(currd),
- };
- int rc;
- unsigned int eflags, ar;
-
-+ /* Not part of the initializer, for old gcc to cope. */
-+ ctxt.ctxt.cpu_policy = currd->arch.cpu_policy;
-+
- if ( !pv_emul_read_descriptor(regs->cs, curr, &ctxt.cs.base,
- &ctxt.cs.limit, &ar, 1) ||
- !(ar & _SEGMENT_S) ||
-diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
-index 0d02c7d2ab..f23ad5d184 100644
---- a/xen/arch/x86/pv/ro-page-fault.c
-+++ b/xen/arch/x86/pv/ro-page-fault.c
-@@ -356,7 +356,6 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
- unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG;
- struct x86_emulate_ctxt ctxt = {
- .regs = regs,
-- .cpu_policy = currd->arch.cpu_policy,
- .addr_size = addr_size,
- .sp_size = addr_size,
- .lma = addr_size > 32,
-@@ -364,6 +363,9 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
- int rc;
- bool mmio_ro;
-
-+ /* Not part of the initializer, for old gcc to cope. */
-+ ctxt.cpu_policy = currd->arch.cpu_policy;
-+
- /* Attempt to read the PTE that maps the VA being accessed. */
- pte = guest_get_eff_kern_l1e(addr);
-
---
-2.42.0
-
diff --git a/0003-VT-d-Fix-else-vs-endif-misplacement.patch b/0003-VT-d-Fix-else-vs-endif-misplacement.patch
new file mode 100644
index 0000000..2e7f78d
--- /dev/null
+++ b/0003-VT-d-Fix-else-vs-endif-misplacement.patch
@@ -0,0 +1,70 @@
+From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 30 Jan 2024 14:38:38 +0100
+Subject: [PATCH 03/10] VT-d: Fix "else" vs "#endif" misplacement
+
+In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This
+generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body
+is executed unconditionally.
+
+Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's
+clearer to follow. This in turn involves adjusting p2m_get_pagetable() to
+compile when CONFIG_HVM is disabled.
+
+This is XSA-450 / CVE-2023-46840.
+
+Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only")
+Reported-by: Teddy Astie <teddy.astie@vates.tech>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426
+master date: 2024-01-30 14:29:15 +0100
+---
+ xen/arch/x86/include/asm/p2m.h | 9 ++++++++-
+ xen/drivers/passthrough/vtd/iommu.c | 4 +---
+ 2 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
+index cd43d8621a..4f691533d5 100644
+--- a/xen/arch/x86/include/asm/p2m.h
++++ b/xen/arch/x86/include/asm/p2m.h
+@@ -447,7 +447,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
+ return p2m->p2m_class == p2m_alternate;
+ }
+
+-#define p2m_get_pagetable(p2m) ((p2m)->phys_table)
++#ifdef CONFIG_HVM
++static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m)
++{
++ return p2m->phys_table;
++}
++#else
++pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m);
++#endif
+
+ /*
+ * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
+diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
+index b4c11a6b48..908b3ba6ee 100644
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -441,15 +441,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr,
+
+ if ( pgd_maddr )
+ /* nothing */;
+-#ifdef CONFIG_HVM
+- else if ( iommu_use_hap_pt(d) )
++ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) )
+ {
+ pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d));
+
+ pgd_maddr = pagetable_get_paddr(pgt);
+ }
+ else
+-#endif
+ {
+ if ( !hd->arch.vtd.pgd_maddr )
+ {
+--
+2.43.0
+
diff --git a/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch b/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch
deleted file mode 100644
index a395d7a..0000000
--- a/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 8d84be5b557b27e9cc53e48285aebad28a48468c Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Mon, 21 Aug 2023 15:53:47 +0200
-Subject: [PATCH 03/55] libxl: Use XEN_LIB_DIR to store bootloader from pygrub
-
-In osstest, the jobs using pygrub on arm64 on the branch linux-linus
-started to fails with:
- [Errno 28] No space left on device
- Error writing temporary copy of ramdisk
-
-This is because /var/run is small when dom0 has only 512MB to work
-with, /var/run is only 40MB. The size of both kernel and ramdisk on
-this jobs is now about 42MB, so not enough space in /var/run.
-
-So, to avoid writing a big binary in ramfs, we will use /var/lib
-instead, like we already do when saving the device model state on
-migration.
-
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
-master commit: ad89640ad766d3cb6c92fc8b6406ca6bbab44136
-master date: 2023-08-08 09:45:20 +0200
----
- tools/libs/light/libxl_bootloader.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
-index 1bc6e51827..108329b4a5 100644
---- a/tools/libs/light/libxl_bootloader.c
-+++ b/tools/libs/light/libxl_bootloader.c
-@@ -245,8 +245,8 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
- static void bootloader_setpaths(libxl__gc *gc, libxl__bootloader_state *bl)
- {
- uint32_t domid = bl->domid;
-- bl->outputdir = GCSPRINTF(XEN_RUN_DIR "/bootloader.%"PRIu32".d", domid);
-- bl->outputpath = GCSPRINTF(XEN_RUN_DIR "/bootloader.%"PRIu32".out", domid);
-+ bl->outputdir = GCSPRINTF(XEN_LIB_DIR "/bootloader.%"PRIu32".d", domid);
-+ bl->outputpath = GCSPRINTF(XEN_LIB_DIR "/bootloader.%"PRIu32".out", domid);
- }
-
- /* Callbacks */
---
-2.42.0
-
diff --git a/0004-build-define-ARCH-and-SRCARCH-later.patch b/0004-build-define-ARCH-and-SRCARCH-later.patch
deleted file mode 100644
index aebcbb7..0000000
--- a/0004-build-define-ARCH-and-SRCARCH-later.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 1c3927f8f6743538a35aa45a91a2d4adbde9f277 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Wed, 5 Jul 2023 08:25:03 +0200
-Subject: [PATCH 04/55] build: define ARCH and SRCARCH later
-
-Defining ARCH and SRCARCH later in xen/Makefile allows to switch to
-immediate evaluation variable type.
-
-ARCH and SRCARCH depend on value defined in Config.mk and aren't used
-for e.g. TARGET_SUBARCH or TARGET_ARCH, and not before they're needed in
-a sub-make or a rule.
-
-This will help reduce the number of times the shell rune is been
-run.
-
-With GNU make 4.4, the number of execution of the command present in
-these $(shell ) increased greatly. This is probably because as of make
-4.4, exported variable are also added to the environment of $(shell )
-construct.
-
-Also, `make -d` shows a lot of these:
- Makefile:39: not recursively expanding SRCARCH to export to shell function
- Makefile:38: not recursively expanding ARCH to export to shell function
-
-Reported-by: Jason Andryuk <jandryuk@gmail.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Tested-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 58e0a3f3b2c430f8640ef9df67ac857b0008ebc8)
----
- xen/Makefile | 13 +++++++------
- 1 file changed, 7 insertions(+), 6 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index f6005bd536..7ecfa6e8e9 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -35,12 +35,6 @@ MAKEFLAGS += -rR
-
- EFI_MOUNTPOINT ?= $(BOOT_DIR)/efi
-
--ARCH=$(XEN_TARGET_ARCH)
--SRCARCH=$(shell echo $(ARCH) | \
-- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
-- -e s'/riscv.*/riscv/g')
--export ARCH SRCARCH
--
- # Allow someone to change their config file
- export KCONFIG_CONFIG ?= .config
-
-@@ -241,6 +235,13 @@ include scripts/Kbuild.include
- include $(XEN_ROOT)/Config.mk
-
- # Set ARCH/SUBARCH appropriately.
-+
-+ARCH := $(XEN_TARGET_ARCH)
-+SRCARCH := $(shell echo $(ARCH) | \
-+ sed -e 's/x86.*/x86/' -e 's/arm\(32\|64\)/arm/g' \
-+ -e 's/riscv.*/riscv/g')
-+export ARCH SRCARCH
-+
- export TARGET_SUBARCH := $(XEN_TARGET_ARCH)
- export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
---
-2.42.0
-
diff --git a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
new file mode 100644
index 0000000..f1289aa
--- /dev/null
+++ b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
@@ -0,0 +1,123 @@
+From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Fri, 2 Feb 2024 08:01:09 +0100
+Subject: [PATCH 04/10] x86/amd: Extend CPU erratum #1474 fix to more affected
+ models
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Erratum #1474 has now been extended to cover models from family 17h ranges
+00-2Fh, so the errata now covers all the models released under Family
+17h (Zen, Zen+ and Zen2).
+
+Additionally extend the workaround to Family 18h (Hygon), since it's based on
+the Zen architecture and very likely affected.
+
+Rename all the zen2 related symbols to fam17, since the errata doesn't
+exclusively affect Zen2 anymore.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba
+master date: 2023-12-21 12:19:40 +0000
+---
+ xen/arch/x86/cpu/amd.c | 27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 29ae97e7c0..3d85e9797d 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk;
+ bool __ro_after_init amd_legacy_ssbd;
+ bool __initdata amd_virt_spec_ctrl;
+
+-static bool __read_mostly zen2_c6_disabled;
++static bool __read_mostly fam17_c6_disabled;
+
+ static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
+ unsigned int *hi)
+@@ -951,24 +951,24 @@ void amd_check_zenbleed(void)
+ val & chickenbit ? "chickenbit" : "microcode");
+ }
+
+-static void cf_check zen2_disable_c6(void *arg)
++static void cf_check fam17_disable_c6(void *arg)
+ {
+ /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */
+ const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22));
+ uint64_t val;
+
+- if (!zen2_c6_disabled) {
++ if (!fam17_c6_disabled) {
+ printk(XENLOG_WARNING
+ "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n");
+- zen2_c6_disabled = true;
++ fam17_c6_disabled = true;
+ /*
+ * Prevent CPU hotplug so that started CPUs will either see
+- * zen2_c6_disabled set, or will be handled by
++ * zen_c6_disabled set, or will be handled by
+ * smp_call_function().
+ */
+ while (!get_cpu_maps())
+ process_pending_softirqs();
+- smp_call_function(zen2_disable_c6, NULL, 0);
++ smp_call_function(fam17_disable_c6, NULL, 0);
+ put_cpu_maps();
+ }
+
+@@ -1273,8 +1273,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c)
+ amd_check_zenbleed();
+ amd_check_erratum_1485();
+
+- if (zen2_c6_disabled)
+- zen2_disable_c6(NULL);
++ if (fam17_c6_disabled)
++ fam17_disable_c6(NULL);
+
+ check_syscfg_dram_mod_en();
+
+@@ -1286,7 +1286,7 @@ const struct cpu_dev amd_cpu_dev = {
+ .c_init = init_amd,
+ };
+
+-static int __init cf_check zen2_c6_errata_check(void)
++static int __init cf_check amd_check_erratum_1474(void)
+ {
+ /*
+ * Errata #1474: A Core May Hang After About 1044 Days
+@@ -1294,7 +1294,8 @@ static int __init cf_check zen2_c6_errata_check(void)
+ */
+ s_time_t delta;
+
+- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
++ if (cpu_has_hypervisor ||
++ (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18))
+ return 0;
+
+ /*
+@@ -1309,10 +1310,10 @@ static int __init cf_check zen2_c6_errata_check(void)
+ if (delta > 0) {
+ static struct timer errata_c6;
+
+- init_timer(&errata_c6, zen2_disable_c6, NULL, 0);
++ init_timer(&errata_c6, fam17_disable_c6, NULL, 0);
+ set_timer(&errata_c6, NOW() + delta);
+ } else
+- zen2_disable_c6(NULL);
++ fam17_disable_c6(NULL);
+
+ return 0;
+ }
+@@ -1320,4 +1321,4 @@ static int __init cf_check zen2_c6_errata_check(void)
+ * Must be executed after early_time_init() for tsc_ticks2ns() to have been
+ * calibrated. That prevents us doing the check in init_amd().
+ */
+-presmp_initcall(zen2_c6_errata_check);
++presmp_initcall(amd_check_erratum_1474);
+--
+2.43.0
+
diff --git a/0005-CirrusCI-drop-FreeBSD-12.patch b/0005-CirrusCI-drop-FreeBSD-12.patch
new file mode 100644
index 0000000..cca7bb0
--- /dev/null
+++ b/0005-CirrusCI-drop-FreeBSD-12.patch
@@ -0,0 +1,39 @@
+From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Fri, 2 Feb 2024 08:01:50 +0100
+Subject: [PATCH 05/10] CirrusCI: drop FreeBSD 12
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Went EOL by the end of December 2023, and the pkg repos have been shut down.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20
+master date: 2024-01-15 12:20:11 +0100
+---
+ .cirrus.yml | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/.cirrus.yml b/.cirrus.yml
+index 7e0beb200d..63f3afb104 100644
+--- a/.cirrus.yml
++++ b/.cirrus.yml
+@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE
+ - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin
+ - gmake -j`sysctl -n hw.ncpu` clang=y
+
+-task:
+- name: 'FreeBSD 12'
+- freebsd_instance:
+- image_family: freebsd-12-4
+- << : *FREEBSD_TEMPLATE
+-
+ task:
+ name: 'FreeBSD 13'
+ freebsd_instance:
+--
+2.43.0
+
diff --git a/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch b/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch
deleted file mode 100644
index 4f31614..0000000
--- a/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 56076ef445073458c39c481f9b70c3b4ff848839 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Wed, 5 Jul 2023 08:27:51 +0200
-Subject: [PATCH 05/55] build: remove TARGET_SUBARCH, a duplicate of ARCH
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a6ab7dd061338c33faef629cbe52ed1608571d84)
----
- xen/Makefile | 3 +--
- xen/build.mk | 2 +-
- 2 files changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 7ecfa6e8e9..6e89bcf348 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -234,7 +234,7 @@ include scripts/Kbuild.include
- # we need XEN_TARGET_ARCH to generate the proper config
- include $(XEN_ROOT)/Config.mk
-
--# Set ARCH/SUBARCH appropriately.
-+# Set ARCH/SRCARCH appropriately.
-
- ARCH := $(XEN_TARGET_ARCH)
- SRCARCH := $(shell echo $(ARCH) | \
-@@ -242,7 +242,6 @@ SRCARCH := $(shell echo $(ARCH) | \
- -e 's/riscv.*/riscv/g')
- export ARCH SRCARCH
-
--export TARGET_SUBARCH := $(XEN_TARGET_ARCH)
- export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
- -e s'/riscv.*/riscv/g')
-diff --git a/xen/build.mk b/xen/build.mk
-index 758590c68e..d049d3a53a 100644
---- a/xen/build.mk
-+++ b/xen/build.mk
-@@ -41,7 +41,7 @@ include/xen/compile.h: include/xen/compile.h.in .banner FORCE
- targets += include/xen/compile.h
-
- -include $(wildcard .asm-offsets.s.d)
--asm-offsets.s: arch/$(TARGET_ARCH)/$(TARGET_SUBARCH)/asm-offsets.c
-+asm-offsets.s: arch/$(TARGET_ARCH)/$(ARCH)/asm-offsets.c
- $(CC) $(call cpp_flags,$(c_flags)) -S -g0 -o $@.new -MQ $@ $<
- $(call move-if-changed,$@.new,$@)
-
---
-2.42.0
-
diff --git a/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch b/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch
deleted file mode 100644
index 9eef37a..0000000
--- a/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 36e84ea02e1e8dce8f3a4e9351ab1c72dec3c11e Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Wed, 5 Jul 2023 08:29:49 +0200
-Subject: [PATCH 06/55] build: remove TARGET_ARCH, a duplicate of SRCARCH
-
-The same command is used to generate the value of both $(TARGET_ARCH)
-and $(SRCARCH), as $(ARCH) is an alias for $(XEN_TARGET_ARCH).
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit ac27b3beb9b7b423d5563768de890c7594c21b4e)
----
- xen/Makefile | 20 ++++++++------------
- xen/Rules.mk | 2 +-
- xen/build.mk | 6 +++---
- 3 files changed, 12 insertions(+), 16 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 6e89bcf348..1a3b9a081f 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -242,10 +242,6 @@ SRCARCH := $(shell echo $(ARCH) | \
- -e 's/riscv.*/riscv/g')
- export ARCH SRCARCH
-
--export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
-- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
-- -e s'/riscv.*/riscv/g')
--
- export CONFIG_SHELL := $(SHELL)
- export CC CXX LD NM OBJCOPY OBJDUMP ADDR2LINE
- export YACC = $(if $(BISON),$(BISON),bison)
-@@ -262,7 +258,7 @@ export XEN_TREEWIDE_CFLAGS := $(CFLAGS)
- ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
- CLANG_FLAGS :=
-
--ifeq ($(TARGET_ARCH),x86)
-+ifeq ($(SRCARCH),x86)
- # The tests to select whether the integrated assembler is usable need to happen
- # before testing any assembler features, or else the result of the tests would
- # be stale if the integrated assembler is not used.
-@@ -430,22 +426,22 @@ endif
-
- ifdef building_out_of_srctree
- CFLAGS += -I$(objtree)/include
-- CFLAGS += -I$(objtree)/arch/$(TARGET_ARCH)/include
-+ CFLAGS += -I$(objtree)/arch/$(SRCARCH)/include
- endif
- CFLAGS += -I$(srctree)/include
--CFLAGS += -I$(srctree)/arch/$(TARGET_ARCH)/include
-+CFLAGS += -I$(srctree)/arch/$(SRCARCH)/include
-
- # Note that link order matters!
- ALL_OBJS-y := common/built_in.o
- ALL_OBJS-y += drivers/built_in.o
- ALL_OBJS-y += lib/built_in.o
- ALL_OBJS-y += xsm/built_in.o
--ALL_OBJS-y += arch/$(TARGET_ARCH)/built_in.o
-+ALL_OBJS-y += arch/$(SRCARCH)/built_in.o
- ALL_OBJS-$(CONFIG_CRYPTO) += crypto/built_in.o
-
- ALL_LIBS-y := lib/lib.a
-
--include $(srctree)/arch/$(TARGET_ARCH)/arch.mk
-+include $(srctree)/arch/$(SRCARCH)/arch.mk
-
- # define new variables to avoid the ones defined in Config.mk
- export XEN_CFLAGS := $(CFLAGS)
-@@ -587,11 +583,11 @@ $(TARGET): outputmakefile FORCE
- $(Q)$(MAKE) $(build)=tools
- $(Q)$(MAKE) $(build)=. include/xen/compile.h
- $(Q)$(MAKE) $(build)=include all
-- $(Q)$(MAKE) $(build)=arch/$(TARGET_ARCH) include
-- $(Q)$(MAKE) $(build)=. arch/$(TARGET_ARCH)/include/asm/asm-offsets.h
-+ $(Q)$(MAKE) $(build)=arch/$(SRCARCH) include
-+ $(Q)$(MAKE) $(build)=. arch/$(SRCARCH)/include/asm/asm-offsets.h
- $(Q)$(MAKE) $(build)=. MKRELOC=$(MKRELOC) 'ALL_OBJS=$(ALL_OBJS-y)' 'ALL_LIBS=$(ALL_LIBS-y)' $@
-
--SUBDIRS = xsm arch/$(TARGET_ARCH) common drivers lib test
-+SUBDIRS = xsm arch/$(SRCARCH) common drivers lib test
- define all_sources
- ( find include -type f -name '*.h' -print; \
- find $(SUBDIRS) -type f -name '*.[chS]' -print )
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index 59072ae8df..8af3dd7277 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -180,7 +180,7 @@ cpp_flags = $(filter-out -Wa$(comma)% -flto,$(1))
- c_flags = -MMD -MP -MF $(depfile) $(XEN_CFLAGS)
- a_flags = -MMD -MP -MF $(depfile) $(XEN_AFLAGS)
-
--include $(srctree)/arch/$(TARGET_ARCH)/Rules.mk
-+include $(srctree)/arch/$(SRCARCH)/Rules.mk
-
- c_flags += $(_c_flags)
- a_flags += $(_c_flags)
-diff --git a/xen/build.mk b/xen/build.mk
-index d049d3a53a..9ecb104f1e 100644
---- a/xen/build.mk
-+++ b/xen/build.mk
-@@ -41,11 +41,11 @@ include/xen/compile.h: include/xen/compile.h.in .banner FORCE
- targets += include/xen/compile.h
-
- -include $(wildcard .asm-offsets.s.d)
--asm-offsets.s: arch/$(TARGET_ARCH)/$(ARCH)/asm-offsets.c
-+asm-offsets.s: arch/$(SRCARCH)/$(ARCH)/asm-offsets.c
- $(CC) $(call cpp_flags,$(c_flags)) -S -g0 -o $@.new -MQ $@ $<
- $(call move-if-changed,$@.new,$@)
-
--arch/$(TARGET_ARCH)/include/asm/asm-offsets.h: asm-offsets.s
-+arch/$(SRCARCH)/include/asm/asm-offsets.h: asm-offsets.s
- @(set -e; \
- echo "/*"; \
- echo " * DO NOT MODIFY."; \
-@@ -87,4 +87,4 @@ endif
- targets += prelink.o
-
- $(TARGET): prelink.o FORCE
-- $(Q)$(MAKE) $(build)=arch/$(TARGET_ARCH) $@
-+ $(Q)$(MAKE) $(build)=arch/$(SRCARCH) $@
---
-2.42.0
-
diff --git a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
new file mode 100644
index 0000000..dc64ad6
--- /dev/null
+++ b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
@@ -0,0 +1,74 @@
+From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Fri, 2 Feb 2024 08:02:20 +0100
+Subject: [PATCH 06/10] x86/intel: ensure Global Performance Counter Control is
+ setup correctly
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL
+MSR contains per-counter enable bits that is ANDed with the enable bit in the
+counter EVNTSEL MSR in order for a PMC counter to be enabled.
+
+So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable
+bits being set by default, but at least on some Intel Sapphire and Emerald
+Rapids this is no longer the case, and Xen reports:
+
+Testing NMI watchdog on all CPUs: 0 40 stuck
+
+The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0
+doesn't start counting when the enable bit in EVNTSEL0 is set, due to the
+relevant enable bit in PERF_GLOBAL_CTRL not being set.
+
+Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the
+general-purpose PMCs are enabled. Doing so brings the state of the package-BSP
+PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c
+master date: 2024-01-17 10:40:52 +0100
+---
+ xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
+index b40ac696e6..96723b5d44 100644
+--- a/xen/arch/x86/cpu/intel.c
++++ b/xen/arch/x86/cpu/intel.c
+@@ -528,9 +528,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c)
+ init_intel_cacheinfo(c);
+ if (c->cpuid_level > 9) {
+ unsigned eax = cpuid_eax(10);
++ unsigned int cnt = (eax >> 8) & 0xff;
++
+ /* Check for version and the number of counters */
+- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
++ if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) {
++ uint64_t global_ctrl;
++ unsigned int cnt_mask = (1UL << cnt) - 1;
++
++ /*
++ * On (some?) Sapphire/Emerald Rapids platforms each
++ * package-BSP starts with all the enable bits for the
++ * general-purpose PMCs cleared. Adjust so counters
++ * can be enabled from EVNTSEL.
++ */
++ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
++ if ((global_ctrl & cnt_mask) != cnt_mask) {
++ printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#"
++ PRIx64 " adjusting to %#" PRIx64 "\n",
++ smp_processor_id(), global_ctrl,
++ global_ctrl | cnt_mask);
++ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
++ global_ctrl | cnt_mask);
++ }
+ __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
++ }
+ }
+
+ if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
+--
+2.43.0
+
diff --git a/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch b/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch
deleted file mode 100644
index 81e5ca4..0000000
--- a/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From a1f68fb56710c507f9c1ec8e8d784f5b1e4088f1 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Mon, 31 Jul 2023 15:02:18 +0200
-Subject: [PATCH 07/55] build: evaluate XEN_BUILD_* and XEN_DOMAIN immediately
-
-With GNU make 4.4, the number of execution of the command present in
-these $(shell ) increased greatly. This is probably because as of make
-4.4, exported variable are also added to the environment of $(shell )
-construct.
-
-Also, `make -d` shows a lot of these:
- Makefile:15: not recursively expanding XEN_BUILD_DATE to export to shell function
- Makefile:16: not recursively expanding XEN_BUILD_TIME to export to shell function
- Makefile:17: not recursively expanding XEN_BUILD_HOST to export to shell function
- Makefile:14: not recursively expanding XEN_DOMAIN to export to shell function
-
-So to avoid having these command been run more than necessary, we
-will replace ?= by an equivalent but with immediate expansion.
-
-Reported-by: Jason Andryuk <jandryuk@gmail.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Tested-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 0c594c1b57ee2ecec5f70826c53a2cf02a9c2acb)
----
- xen/Makefile | 16 ++++++++++++----
- 1 file changed, 12 insertions(+), 4 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 1a3b9a081f..7bb9de7bdc 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -11,10 +11,18 @@ export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
- -include xen-version
-
- export XEN_WHOAMI ?= $(USER)
--export XEN_DOMAIN ?= $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))
--export XEN_BUILD_DATE ?= $(shell LC_ALL=C date)
--export XEN_BUILD_TIME ?= $(shell LC_ALL=C date +%T)
--export XEN_BUILD_HOST ?= $(shell hostname)
-+ifeq ($(origin XEN_DOMAIN), undefined)
-+export XEN_DOMAIN := $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))
-+endif
-+ifeq ($(origin XEN_BUILD_DATE), undefined)
-+export XEN_BUILD_DATE := $(shell LC_ALL=C date)
-+endif
-+ifeq ($(origin XEN_BUILD_TIME), undefined)
-+export XEN_BUILD_TIME := $(shell LC_ALL=C date +%T)
-+endif
-+ifeq ($(origin XEN_BUILD_HOST), undefined)
-+export XEN_BUILD_HOST := $(shell hostname)
-+endif
-
- # Best effort attempt to find a python interpreter, defaulting to Python 3 if
- # available. Fall back to just `python` if `which` is nowhere to be found.
---
-2.42.0
-
diff --git a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
new file mode 100644
index 0000000..a1937a7
--- /dev/null
+++ b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
@@ -0,0 +1,65 @@
+From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 2 Feb 2024 08:02:51 +0100
+Subject: [PATCH 07/10] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT
+
+When receiving an INIT, a prior bugfix tried to ignore the INIT and continue
+onwards.
+
+Unfortunately it's not safe to return at that point in vmx_vmexit_handler().
+Just out of context in the first hunk is a local_irqs_enabled() which is
+depended-upon by the return-to-guest path, causing the following checklock
+failure in debug builds:
+
+ (XEN) Error: INIT received - ignoring
+ (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1
+ (XEN) Xen BUG at common/spinlock.c:132
+ (XEN) ----[ Xen-4.19-unstable x86_64 debug=y Tainted: H ]----
+ ...
+ (XEN) Xen call trace:
+ (XEN) [<ffff82d040238e10>] R check_lock+0xcd/0xe1
+ (XEN) [<ffff82d040238fe3>] F _spin_lock+0x1b/0x60
+ (XEN) [<ffff82d0402ed6a8>] F pt_update_irq+0x32/0x3bb
+ (XEN) [<ffff82d0402b9632>] F vmx_intr_assist+0x3b/0x51d
+ (XEN) [<ffff82d040206447>] F vmx_asm_vmexit_handler+0xf7/0x210
+
+Luckily, this is benign in release builds. Accidentally having IRQs disabled
+when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern.
+
+Drop the problematic early return. In hindsight, it's wrong to skip other
+normal VMExit steps.
+
+Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received")
+Reported-by: Reima ISHII <ishiir@g.ecc.u-tokyo.ac.jp>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4
+master date: 2024-01-18 20:59:06 +0000
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 072288a5ef..31f4a861c6 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -4037,7 +4037,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+
+ case EXIT_REASON_INIT:
+ printk(XENLOG_ERR "Error: INIT received - ignoring\n");
+- return; /* Renter the guest without further processing */
++ break;
+ }
+
+ /* Now enable interrupts so it's safe to take locks. */
+@@ -4323,6 +4323,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+ break;
+ }
+ case EXIT_REASON_EXTERNAL_INTERRUPT:
++ case EXIT_REASON_INIT:
+ /* Already handled above. */
+ break;
+ case EXIT_REASON_TRIPLE_FAULT:
+--
+2.43.0
+
diff --git a/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch b/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch
deleted file mode 100644
index 8a4cb7d..0000000
--- a/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 476d2624ec3cf3e60709580ff1df208bb8f616e2 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Mon, 31 Jul 2023 15:02:34 +0200
-Subject: [PATCH 08/55] Config.mk: evaluate XEN_COMPILE_ARCH and XEN_OS
- immediately
-
-With GNU make 4.4, the number of execution of the command present in
-these $(shell ) increased greatly. This is probably because as of make
-4.4, exported variable are also added to the environment of $(shell )
-construct.
-
-So to avoid having these command been run more than necessary, we
-will replace ?= by an equivalent but with immediate expansion.
-
-Reported-by: Jason Andryuk <jandryuk@gmail.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Tested-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a07414d989cf52e5e84192b78023bee1589bbda4)
----
- Config.mk | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/Config.mk b/Config.mk
-index 8bc2bcd5f6..4864033c73 100644
---- a/Config.mk
-+++ b/Config.mk
-@@ -19,13 +19,17 @@ or = $(if $(strip $(1)),$(1),$(if $(strip $(2)),$(2),$(if $(strip $(3)),$(
-
- -include $(XEN_ROOT)/.config
-
--XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
-+ifeq ($(origin XEN_COMPILE_ARCH), undefined)
-+XEN_COMPILE_ARCH := $(shell uname -m | sed -e s/i.86/x86_32/ \
- -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \
- -e s/armv7.*/arm32/ -e s/armv8.*/arm64/ \
- -e s/aarch64/arm64/)
-+endif
-
- XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH)
--XEN_OS ?= $(shell uname -s)
-+ifeq ($(origin XEN_OS), undefined)
-+XEN_OS := $(shell uname -s)
-+endif
-
- CONFIG_$(XEN_OS) := y
-
---
-2.42.0
-
diff --git a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
new file mode 100644
index 0000000..12c2d59
--- /dev/null
+++ b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
@@ -0,0 +1,126 @@
+From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 2 Feb 2024 08:03:26 +0100
+Subject: [PATCH 08/10] x86/vmx: Disallow the use of inactivity states
+
+Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
+enter the vCPU. Luckily for us, nested-virt is explicitly unsupported for
+security bugs.
+
+The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the
+SDM in Vol3 27.7 "Special Features of VM Entry":
+
+ If VM entry ends with the logical processor in an inactive activity state,
+ the VM entry generates any special bus cycle that is normally generated when
+ that activity state is entered from the active state.
+
+Also,
+
+ Some activity states unconditionally block certain events.
+
+I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a
+VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than
+SIPIs.
+
+Both of these activity states are for the TXT ACM to use, not for regular
+hypervisors, and Xen doesn't support dropping the HLT intercept either.
+
+There are two paths in Xen which operate on ACTIVITY_STATE.
+
+1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork.
+
+ As regular VMs can't use any inactivity states, this is just duplicating
+ the 0 from construct_vmcs(). Retain the ability to query activity_state,
+ but crash the domain on any attempt to set an inactivity state.
+
+2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[].
+
+ Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC,
+ and remove ACTIVITY_STATE from vmcs_gstate_field[].
+
+ In virtual_vmentry(), we should trigger a VMEntry failure for the use of
+ any inactivity states, but there's no support for that in the code at all
+ so leave a TODO for when we finally start working on nested-virt in
+ earnest.
+
+Reported-by: Reima Ishii <ishiir@g.ecc.u-tokyo.ac.jp>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
+master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2
+master date: 2024-01-18 20:59:06 +0000
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 8 +++++++-
+ xen/arch/x86/hvm/vmx/vvmx.c | 9 +++++++--
+ xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 +
+ 3 files changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 31f4a861c6..35d391d8e5 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -1499,7 +1499,13 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v,
+ {
+ vmx_vmcs_enter(v);
+
+- __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
++ if ( nrs->vmx.activity_state )
++ {
++ printk("Attempt to set %pv activity_state %#lx\n",
++ v, nrs->vmx.activity_state);
++ domain_crash(v->domain);
++ }
++
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info);
+ __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
+
+diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
+index f8fe8d0c14..515cb5ae77 100644
+--- a/xen/arch/x86/hvm/vmx/vvmx.c
++++ b/xen/arch/x86/hvm/vmx/vvmx.c
+@@ -910,7 +910,10 @@ static const u16 vmcs_gstate_field[] = {
+ GUEST_LDTR_AR_BYTES,
+ GUEST_TR_AR_BYTES,
+ GUEST_INTERRUPTIBILITY_INFO,
++ /*
++ * ACTIVITY_STATE is handled specially.
+ GUEST_ACTIVITY_STATE,
++ */
+ GUEST_SYSENTER_CS,
+ GUEST_PREEMPTION_TIMER,
+ /* natural */
+@@ -1211,6 +1214,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs)
+ nvcpu->nv_vmentry_pending = 0;
+ nvcpu->nv_vmswitch_in_progress = 1;
+
++ /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */
++
+ /*
+ * EFER handling:
+ * hvm_set_efer won't work if CR0.PG = 1, so we change the value
+@@ -2327,8 +2332,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+ data = hvm_cr4_guest_valid_bits(d);
+ break;
+ case MSR_IA32_VMX_MISC:
+- /* Do not support CR3-target feature now */
+- data = host_data & ~VMX_MISC_CR3_TARGET;
++ /* Do not support CR3-targets or activity states. */
++ data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK);
+ break;
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ data = nept_get_ept_vpid_cap();
+diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
+index 78404e42b3..0af021d5f5 100644
+--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
++++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
+@@ -288,6 +288,7 @@ extern u32 vmx_secondary_exec_control;
+ #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
+ extern u64 vmx_ept_vpid_cap;
+
++#define VMX_MISC_ACTIVITY_MASK 0x000001c0
+ #define VMX_MISC_PROC_TRACE 0x00004000
+ #define VMX_MISC_CR3_TARGET 0x01ff0000
+ #define VMX_MISC_VMWRITE_ALL 0x20000000
+--
+2.43.0
+
diff --git a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
new file mode 100644
index 0000000..9ee7104
--- /dev/null
+++ b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
@@ -0,0 +1,70 @@
+From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001
+From: Michal Orzel <michal.orzel@amd.com>
+Date: Fri, 2 Feb 2024 08:04:07 +0100
+Subject: [PATCH 09/10] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps
+ to $(targets)
+
+At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op
+under the hood) results in a crash. This is due to a profiler trying to
+access data in the .init.* sections (libfdt for Arm and libelf for x86)
+that are stripped after boot. Normally, the build system compiles any
+*.init.o file without COV_FLAGS. However, these two libraries are
+handled differently as sections will be renamed to init after linking.
+
+To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were
+added to nocov-y. This worked until e321576f4047 ("xen/build: start using
+if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y.
+This way, even though these objects appear as prerequisites of
+lib{fdt,elf}.o and the settings should propagate to them, make can also
+build them as a prerequisite of __build, in which case COV_FLAGS would
+still have the unwanted flags. Fix it by switching to $(targets) instead.
+
+Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB
+is not set. Otherwise, there is no section renaming and we should be able
+to run the coverage.
+
+Fixes: e321576f4047 ("xen/build: start using if_changed")
+Signed-off-by: Michal Orzel <michal.orzel@amd.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68
+master date: 2024-01-23 12:02:44 +0100
+---
+ xen/common/libelf/Makefile | 2 +-
+ xen/common/libfdt/Makefile | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile
+index 8a4522e4e1..917d12b006 100644
+--- a/xen/common/libelf/Makefile
++++ b/xen/common/libelf/Makefile
+@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE
+ $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE
+ $(call if_changed,ld)
+
+-extra-y += libelf-temp.o $(libelf-objs)
++targets += libelf-temp.o $(libelf-objs)
+diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile
+index 75aaefa2e3..4d14fd61ba 100644
+--- a/xen/common/libfdt/Makefile
++++ b/xen/common/libfdt/Makefile
+@@ -2,9 +2,9 @@ include $(src)/Makefile.libfdt
+
+ SECTIONS := text data $(SPECIAL_DATA_SECTIONS)
+ OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s))
++nocov-y += libfdt.o
+
+ obj-y += libfdt.o
+-nocov-y += libfdt.o
+
+ CFLAGS-y += -I$(srctree)/include/xen/libfdt/
+
+@@ -14,4 +14,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE
+ $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE
+ $(call if_changed,ld)
+
+-extra-y += libfdt-temp.o $(LIBFDT_OBJS)
++targets += libfdt-temp.o $(LIBFDT_OBJS)
+--
+2.43.0
+
diff --git a/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch b/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch
deleted file mode 100644
index 4f9c0bb..0000000
--- a/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch
+++ /dev/null
@@ -1,245 +0,0 @@
-From 37f1d68fa34220600f1e4ec82af5da70127757e5 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 18 Aug 2023 15:04:28 +0200
-Subject: [PATCH 09/55] x86emul: rework wrapping of libc functions in test and
- fuzzing harnesses
-
-Our present approach is working fully behind the compiler's back. This
-was found to not work with LTO. Employ ld's --wrap= option instead. Note
-that while this makes the build work at least with new enough gcc (it
-doesn't with gcc7, for example, due to tool chain side issues afaict),
-according to my testing things still won't work when building the
-fuzzing harness with afl-cc: While with the gcc7 tool chain I see afl-as
-getting invoked, this does not happen with gcc13. Yet without using that
-assembler wrapper the resulting binary will look uninstrumented to
-afl-fuzz.
-
-While checking the resulting binaries I noticed that we've gained uses
-of snprintf() and strstr(), which only just so happen to not cause any
-problems. Add a wrappers for them as well.
-
-Since we don't have any actual uses of v{,sn}printf(), no definitions of
-their wrappers appear (just yet). But I think we want
-__wrap_{,sn}printf() to properly use __real_v{,sn}printf() right away,
-which means we need delarations of the latter.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 6fba45ca3be1c5d46cddb1eaf371d9e69550b244)
----
- tools/fuzz/x86_instruction_emulator/Makefile | 6 ++-
- tools/tests/x86_emulator/Makefile | 4 +-
- tools/tests/x86_emulator/wrappers.c | 55 ++++++++++++++------
- tools/tests/x86_emulator/x86-emulate.h | 14 +++--
- 4 files changed, 53 insertions(+), 26 deletions(-)
-
-diff --git a/tools/fuzz/x86_instruction_emulator/Makefile b/tools/fuzz/x86_instruction_emulator/Makefile
-index 13aa238503..c83959c847 100644
---- a/tools/fuzz/x86_instruction_emulator/Makefile
-+++ b/tools/fuzz/x86_instruction_emulator/Makefile
-@@ -29,6 +29,8 @@ GCOV_FLAGS := --coverage
- %-cov.o: %.c
- $(CC) -c $(CFLAGS) $(GCOV_FLAGS) $< -o $@
-
-+WRAPPED = $(shell sed -n 's,^ *WRAP(\([[:alnum:]_]*\));,\1,p' x86-emulate.h)
-+
- x86-emulate.h: x86_emulate/x86_emulate.h
- x86-emulate.o x86-emulate-cov.o: x86-emulate.h x86_emulate/x86_emulate.c
- fuzz-emul.o fuzz-emul-cov.o wrappers.o: x86-emulate.h
-@@ -37,10 +39,10 @@ x86-insn-fuzzer.a: fuzz-emul.o x86-emulate.o cpuid.o
- $(AR) rc $@ $^
-
- afl-harness: afl-harness.o fuzz-emul.o x86-emulate.o cpuid.o wrappers.o
-- $(CC) $(CFLAGS) $^ -o $@
-+ $(CC) $(CFLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) $^ -o $@
-
- afl-harness-cov: afl-harness-cov.o fuzz-emul-cov.o x86-emulate-cov.o cpuid.o wrappers.o
-- $(CC) $(CFLAGS) $(GCOV_FLAGS) $^ -o $@
-+ $(CC) $(CFLAGS) $(GCOV_FLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) $^ -o $@
-
- # Common targets
- .PHONY: all
-diff --git a/tools/tests/x86_emulator/Makefile b/tools/tests/x86_emulator/Makefile
-index bd82598f97..a2fd6607c6 100644
---- a/tools/tests/x86_emulator/Makefile
-+++ b/tools/tests/x86_emulator/Makefile
-@@ -250,8 +250,10 @@ xop.h avx512f.h: simd-fma.c
-
- endif # 32-bit override
-
-+WRAPPED := $(shell sed -n 's,^ *WRAP(\([[:alnum:]_]*\));,\1,p' x86-emulate.h)
-+
- $(TARGET): x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
-- $(HOSTCC) $(HOSTCFLAGS) -o $@ $^
-+ $(HOSTCC) $(HOSTCFLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) -o $@ $^
-
- .PHONY: clean
- clean:
-diff --git a/tools/tests/x86_emulator/wrappers.c b/tools/tests/x86_emulator/wrappers.c
-index eba7cc93c5..3829a6f416 100644
---- a/tools/tests/x86_emulator/wrappers.c
-+++ b/tools/tests/x86_emulator/wrappers.c
-@@ -1,78 +1,103 @@
- #include <stdarg.h>
-
--#define WRAP(x) typeof(x) emul_##x
-+#define WRAP(x) typeof(x) __wrap_ ## x, __real_ ## x
- #include "x86-emulate.h"
-
--size_t emul_fwrite(const void *src, size_t sz, size_t n, FILE *f)
-+size_t __wrap_fwrite(const void *src, size_t sz, size_t n, FILE *f)
- {
- emul_save_fpu_state();
-- sz = fwrite(src, sz, n, f);
-+ sz = __real_fwrite(src, sz, n, f);
- emul_restore_fpu_state();
-
- return sz;
- }
-
--int emul_memcmp(const void *p1, const void *p2, size_t sz)
-+int __wrap_memcmp(const void *p1, const void *p2, size_t sz)
- {
- int rc;
-
- emul_save_fpu_state();
-- rc = memcmp(p1, p2, sz);
-+ rc = __real_memcmp(p1, p2, sz);
- emul_restore_fpu_state();
-
- return rc;
- }
-
--void *emul_memcpy(void *dst, const void *src, size_t sz)
-+void *__wrap_memcpy(void *dst, const void *src, size_t sz)
- {
- emul_save_fpu_state();
-- memcpy(dst, src, sz);
-+ __real_memcpy(dst, src, sz);
- emul_restore_fpu_state();
-
- return dst;
- }
-
--void *emul_memset(void *dst, int c, size_t sz)
-+void *__wrap_memset(void *dst, int c, size_t sz)
- {
- emul_save_fpu_state();
-- memset(dst, c, sz);
-+ __real_memset(dst, c, sz);
- emul_restore_fpu_state();
-
- return dst;
- }
-
--int emul_printf(const char *fmt, ...)
-+int __wrap_printf(const char *fmt, ...)
- {
- va_list varg;
- int rc;
-
- emul_save_fpu_state();
- va_start(varg, fmt);
-- rc = vprintf(fmt, varg);
-+ rc = __real_vprintf(fmt, varg);
- va_end(varg);
- emul_restore_fpu_state();
-
- return rc;
- }
-
--int emul_putchar(int c)
-+int __wrap_putchar(int c)
- {
- int rc;
-
- emul_save_fpu_state();
-- rc = putchar(c);
-+ rc = __real_putchar(c);
- emul_restore_fpu_state();
-
- return rc;
- }
-
--int emul_puts(const char *str)
-+int __wrap_puts(const char *str)
- {
- int rc;
-
- emul_save_fpu_state();
-- rc = puts(str);
-+ rc = __real_puts(str);
- emul_restore_fpu_state();
-
- return rc;
- }
-+
-+int __wrap_snprintf(char *buf, size_t n, const char *fmt, ...)
-+{
-+ va_list varg;
-+ int rc;
-+
-+ emul_save_fpu_state();
-+ va_start(varg, fmt);
-+ rc = __real_vsnprintf(buf, n, fmt, varg);
-+ va_end(varg);
-+ emul_restore_fpu_state();
-+
-+ return rc;
-+}
-+
-+char *__wrap_strstr(const char *s1, const char *s2)
-+{
-+ char *s;
-+
-+ emul_save_fpu_state();
-+ s = __real_strstr(s1, s2);
-+ emul_restore_fpu_state();
-+
-+ return s;
-+}
-diff --git a/tools/tests/x86_emulator/x86-emulate.h b/tools/tests/x86_emulator/x86-emulate.h
-index 19bea9c38d..58760f096d 100644
---- a/tools/tests/x86_emulator/x86-emulate.h
-+++ b/tools/tests/x86_emulator/x86-emulate.h
-@@ -29,9 +29,7 @@
- #ifdef EOF
- # error "Must not include <stdio.h> before x86-emulate.h"
- #endif
--#ifdef WRAP
--# include <stdio.h>
--#endif
-+#include <stdio.h>
-
- #include <xen/xen.h>
-
-@@ -85,11 +83,7 @@ void emul_restore_fpu_state(void);
- * around the actual function.
- */
- #ifndef WRAP
--# if 0 /* This only works for explicit calls, not for compiler generated ones. */
--# define WRAP(x) typeof(x) x asm("emul_" #x)
--# else
--# define WRAP(x) asm(".equ " #x ", emul_" #x)
--# endif
-+# define WRAP(x) typeof(x) __wrap_ ## x
- #endif
-
- WRAP(fwrite);
-@@ -99,6 +93,10 @@ WRAP(memset);
- WRAP(printf);
- WRAP(putchar);
- WRAP(puts);
-+WRAP(snprintf);
-+WRAP(strstr);
-+WRAP(vprintf);
-+WRAP(vsnprintf);
-
- #undef WRAP
-
---
-2.42.0
-
diff --git a/0010-rombios-Work-around-GCC-issue-99578.patch b/0010-rombios-Work-around-GCC-issue-99578.patch
deleted file mode 100644
index 3995f02..0000000
--- a/0010-rombios-Work-around-GCC-issue-99578.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From ae1045c42954772e48862162d0e95fbc9393c91e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 17 Aug 2023 21:32:53 +0100
-Subject: [PATCH 10/55] rombios: Work around GCC issue 99578
-
-GCC 12 objects to pointers derived from a constant:
-
- util.c: In function 'find_rsdp':
- util.c:429:16: error: array subscript 0 is outside array bounds of 'uint16_t[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds]
- 429 | ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
- cc1: all warnings being treated as errors
-
-This is a GCC bug, but work around it rather than turning array-bounds
-checking off generally.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit e35138a2ffbe1fe71edaaaaae71063dc545a8416)
----
- tools/firmware/rombios/32bit/util.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/tools/firmware/rombios/32bit/util.c b/tools/firmware/rombios/32bit/util.c
-index 6c1c480514..a47e000a26 100644
---- a/tools/firmware/rombios/32bit/util.c
-+++ b/tools/firmware/rombios/32bit/util.c
-@@ -424,10 +424,10 @@ static struct acpi_20_rsdp *__find_rsdp(const void *start, unsigned int len)
- struct acpi_20_rsdp *find_rsdp(void)
- {
- struct acpi_20_rsdp *rsdp;
-- uint16_t ebda_seg;
-+ uint16_t *volatile /* GCC issue 99578 */ ebda_seg =
-+ ADDR_FROM_SEG_OFF(0x40, 0xe);
-
-- ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
-- rsdp = __find_rsdp((void *)(ebda_seg << 16), 1024);
-+ rsdp = __find_rsdp((void *)(*ebda_seg << 16), 1024);
- if (!rsdp)
- rsdp = __find_rsdp((void *)0xE0000, 0x20000);
-
---
-2.42.0
-
diff --git a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
new file mode 100644
index 0000000..ba99063
--- /dev/null
+++ b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
@@ -0,0 +1,36 @@
+From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Fri, 2 Feb 2024 08:04:33 +0100
+Subject: [PATCH 10/10] x86/p2m-pt: fix off by one in entry check assert
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the
+passed end mfn should be the last mfn to be mapped (not last + 1).
+
+Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: George Dunlap <george.dunlap@cloud.com>
+master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd
+master date: 2024-01-25 16:09:04 +0100
+---
+ xen/arch/x86/mm/p2m-pt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
+index eaba2b0fb4..f02ebae372 100644
+--- a/xen/arch/x86/mm/p2m-pt.c
++++ b/xen/arch/x86/mm/p2m-pt.c
+@@ -564,7 +564,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
+ if ( new == p2m_mmio_direct )
+ ASSERT(!mfn_eq(mfn, INVALID_MFN) &&
+ !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
+- mfn_x(mfn) + (1ul << order)));
++ mfn_x(mfn) + (1UL << order) - 1));
+ else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid ||
+ new == p2m_mmio_dm )
+ ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
+--
+2.43.0
+
diff --git a/0011-rombios-Avoid-using-K-R-function-syntax.patch b/0011-rombios-Avoid-using-K-R-function-syntax.patch
deleted file mode 100644
index 0bd761f..0000000
--- a/0011-rombios-Avoid-using-K-R-function-syntax.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 24487fec3bbebbc1fd3f00d16bca7fb0f56a5f30 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 18 Aug 2023 10:47:46 +0100
-Subject: [PATCH 11/55] rombios: Avoid using K&R function syntax
-
-Clang-15 complains:
-
- tcgbios.c:598:25: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
- void tcpa_calling_int19h()
- ^
- void
-
-C2x formally removes K&R syntax. The declarations for these functions in
-32bitprotos.h are already ANSI compatible. Update the definitions to match.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a562afa5679d4a7ceb9cb9222fec1fea9a61f738)
----
- tools/firmware/rombios/32bit/tcgbios/tcgbios.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
-index fa22c4460a..ad0eac0d20 100644
---- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
-+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
-@@ -595,7 +595,7 @@ static void tcpa_add_measurement(uint32_t pcrIndex,
- /*
- * Add measurement to log about call of int 19h
- */
--void tcpa_calling_int19h()
-+void tcpa_calling_int19h(void)
- {
- tcpa_add_measurement(4, EV_ACTION, 0);
- }
-@@ -603,7 +603,7 @@ void tcpa_calling_int19h()
- /*
- * Add measurement to log about retuning from int 19h
- */
--void tcpa_returned_int19h()
-+void tcpa_returned_int19h(void)
- {
- tcpa_add_measurement(4, EV_ACTION, 1);
- }
-@@ -611,7 +611,7 @@ void tcpa_returned_int19h()
- /*
- * Add event separators for PCRs 0 to 7; specs 8.2.3
- */
--void tcpa_add_event_separators()
-+void tcpa_add_event_separators(void)
- {
- uint32_t pcrIndex = 0;
- while (pcrIndex <= 7) {
-@@ -624,7 +624,7 @@ void tcpa_add_event_separators()
- /*
- * Add a wake event to the log
- */
--void tcpa_wake_event()
-+void tcpa_wake_event(void)
- {
- tcpa_add_measurement_to_log(6,
- EV_ACTION,
-@@ -659,7 +659,7 @@ void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
- * Add measurement to the log about option rom scan
- * 10.4.3 : action 14
- */
--void tcpa_start_option_rom_scan()
-+void tcpa_start_option_rom_scan(void)
- {
- tcpa_add_measurement(2, EV_ACTION, 14);
- }
---
-2.42.0
-
diff --git a/0012-rombios-Remove-the-use-of-egrep.patch b/0012-rombios-Remove-the-use-of-egrep.patch
deleted file mode 100644
index 44702b4..0000000
--- a/0012-rombios-Remove-the-use-of-egrep.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From e418a77295e6b512d212b57123c11e4d4fb23e8c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 18 Aug 2023 11:05:00 +0100
-Subject: [PATCH 12/55] rombios: Remove the use of egrep
-
-As the Alpine 3.18 container notes:
-
- egrep: warning: egrep is obsolescent; using grep -E
-
-Adjust it.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 5ddac3c2852ecc120acab86fc403153a2097c5dc)
----
- tools/firmware/rombios/32bit/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/firmware/rombios/32bit/Makefile b/tools/firmware/rombios/32bit/Makefile
-index c058c71551..50d45647c2 100644
---- a/tools/firmware/rombios/32bit/Makefile
-+++ b/tools/firmware/rombios/32bit/Makefile
-@@ -26,7 +26,7 @@ $(TARGET): 32bitbios_all.o
- 32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
- $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
- @nm 32bitbios_all.o | \
-- egrep '^ +U ' >/dev/null && { \
-+ grep -E '^ +U ' >/dev/null && { \
- echo "There are undefined symbols in the BIOS:"; \
- nm -u 32bitbios_all.o; \
- exit 11; \
---
-2.42.0
-
diff --git a/0013-CI-Resync-FreeBSD-config-with-staging.patch b/0013-CI-Resync-FreeBSD-config-with-staging.patch
deleted file mode 100644
index dcd867b..0000000
--- a/0013-CI-Resync-FreeBSD-config-with-staging.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From f00d56309533427981f09ef2614f1bae4bcab62e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 17 Feb 2023 11:16:32 +0000
-Subject: [PATCH 13/55] CI: Resync FreeBSD config with staging
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CI: Update FreeBSD to 13.1
-
-Also print the compiler version before starting. It's not easy to find
-otherwise, and does change from time to time.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-(cherry picked from commit 5e7667ea2dd33e0e5e0f3a96db37fdb4ecd98fba)
-
-CI: Update FreeBSD to 13.2
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit f872a624cbf92de9944483eea7674ef80ced1380)
-
-CI: Update FreeBSD to 12.4
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-(cherry picked from commit a73560896ce3c513460f26bd1c205060d6ec4f8a)
----
- .cirrus.yml | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/.cirrus.yml b/.cirrus.yml
-index c38333e736..7e0beb200d 100644
---- a/.cirrus.yml
-+++ b/.cirrus.yml
-@@ -10,19 +10,20 @@ freebsd_template: &FREEBSD_TEMPLATE
- libxml2 glib git
-
- build_script:
-+ - cc --version
- - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin
- - gmake -j`sysctl -n hw.ncpu` clang=y
-
- task:
- name: 'FreeBSD 12'
- freebsd_instance:
-- image_family: freebsd-12-3
-+ image_family: freebsd-12-4
- << : *FREEBSD_TEMPLATE
-
- task:
- name: 'FreeBSD 13'
- freebsd_instance:
-- image_family: freebsd-13-0
-+ image_family: freebsd-13-2
- << : *FREEBSD_TEMPLATE
-
- task:
---
-2.42.0
-
diff --git a/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch b/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch
deleted file mode 100644
index 6e29490..0000000
--- a/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 052a8d24bc670ab6503e21dfd2fb8bccfc22aa73 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 8 Aug 2023 14:53:42 +0100
-Subject: [PATCH 14/55] tools/vchan: Fix
- -Wsingle-bit-bitfield-constant-conversion
-
-Gitlab reports:
-
- node.c:158:17: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion]
-
- ctrl->blocking = 1;
- ^ ~
- 1 error generated.
- make[4]: *** [/builds/xen-project/people/andyhhp/xen/tools/vchan/../../tools/Rules.mk:188: node.o] Error 1
-
-In Xen 4.18, this was fixed with c/s 99ab02f63ea8 ("tools: convert bitfields
-to unsigned type") but this is an ABI change which can't be backported.
-
-Swich 1 for -1 to provide a minimally invasive way to fix the build.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
----
- tools/vchan/node.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/vchan/node.c b/tools/vchan/node.c
-index f1638f013d..a28293b720 100644
---- a/tools/vchan/node.c
-+++ b/tools/vchan/node.c
-@@ -155,7 +155,7 @@ int main(int argc, char **argv)
- perror("libxenvchan_*_init");
- exit(1);
- }
-- ctrl->blocking = 1;
-+ ctrl->blocking = -1;
-
- srand(seed);
- fprintf(stderr, "seed=%d\n", seed);
---
-2.42.0
-
diff --git a/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch b/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch
deleted file mode 100644
index 81e010b..0000000
--- a/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch
+++ /dev/null
@@ -1,143 +0,0 @@
-From 7b5155a79ea946dd513847d4e7ad2b7e6a4ebb73 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:45:29 +0200
-Subject: [PATCH 15/55] xen/vcpu: ignore VCPU_SSHOTTMR_future
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The usage of VCPU_SSHOTTMR_future in Linux prior to 4.7 is bogus.
-When the hypervisor returns -ETIME (timeout in the past) Linux keeps
-retrying to setup the timer with a higher timeout instead of
-self-injecting a timer interrupt.
-
-On boxes without any hardware assistance for logdirty we have seen HVM
-Linux guests < 4.7 with 32vCPUs give up trying to setup the timer when
-logdirty is enabled:
-
-CE: Reprogramming failure. Giving up
-CE: xen increased min_delta_ns to 1000000 nsec
-CE: Reprogramming failure. Giving up
-CE: Reprogramming failure. Giving up
-CE: xen increased min_delta_ns to 506250 nsec
-CE: xen increased min_delta_ns to 759375 nsec
-CE: xen increased min_delta_ns to 1000000 nsec
-CE: Reprogramming failure. Giving up
-CE: Reprogramming failure. Giving up
-CE: Reprogramming failure. Giving up
-Freezing user space processes ...
-INFO: rcu_sched detected stalls on CPUs/tasks: { 14} (detected by 10, t=60002 jiffies, g=4006, c=4005, q=14130)
-Task dump for CPU 14:
-swapper/14 R running task 0 0 1 0x00000000
-Call Trace:
- [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
- [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
- [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
- [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
- [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
- [<ffffffff900000d5>] ? start_cpu+0x5/0x14
-INFO: rcu_sched detected stalls on CPUs/tasks: { 26} (detected by 24, t=60002 jiffies, g=6922, c=6921, q=7013)
-Task dump for CPU 26:
-swapper/26 R running task 0 0 1 0x00000000
-Call Trace:
- [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
- [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
- [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
- [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
- [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
- [<ffffffff900000d5>] ? start_cpu+0x5/0x14
-INFO: rcu_sched detected stalls on CPUs/tasks: { 26} (detected by 24, t=60002 jiffies, g=8499, c=8498, q=7664)
-Task dump for CPU 26:
-swapper/26 R running task 0 0 1 0x00000000
-Call Trace:
- [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
- [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
- [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
- [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
- [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
- [<ffffffff900000d5>] ? start_cpu+0x5/0x14
-
-Thus leading to CPU stalls and a broken system as a result.
-
-Workaround this bogus usage by ignoring the VCPU_SSHOTTMR_future in
-the hypervisor. Old Linux versions are the only ones known to have
-(wrongly) attempted to use the flag, and ignoring it is compatible
-with the behavior expected by any guests setting that flag.
-
-Note the usage of the flag has been removed from Linux by commit:
-
-c06b6d70feb3 xen/x86: don't lose event interrupts
-
-Which landed in Linux 4.7.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Henry Wang <Henry.Wang@arm.com> # CHANGELOG
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 19c6cbd90965b1440bd551069373d6fa3f2f365d
-master date: 2023-05-03 13:36:05 +0200
----
- CHANGELOG.md | 6 ++++++
- xen/common/domain.c | 13 ++++++++++---
- xen/include/public/vcpu.h | 5 ++++-
- 3 files changed, 20 insertions(+), 4 deletions(-)
-
-diff --git a/CHANGELOG.md b/CHANGELOG.md
-index 7f4d0f25e9..bb0eceb69a 100644
---- a/CHANGELOG.md
-+++ b/CHANGELOG.md
-@@ -4,6 +4,12 @@ Notable changes to Xen will be documented in this file.
-
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
-
-+## [4.17.3](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.17.3)
-+
-+### Changed
-+ - Ignore VCPUOP_set_singleshot_timer's VCPU_SSHOTTMR_future flag. The only
-+ known user doesn't use it properly, leading to in-guest breakage.
-+
- ## [4.17.0](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.17.0) - 2022-12-12
-
- ### Changed
-diff --git a/xen/common/domain.c b/xen/common/domain.c
-index 53f7e734fe..30c2279673 100644
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -1691,9 +1691,16 @@ long common_vcpu_op(int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
- if ( copy_from_guest(&set, arg, 1) )
- return -EFAULT;
-
-- if ( (set.flags & VCPU_SSHOTTMR_future) &&
-- (set.timeout_abs_ns < NOW()) )
-- return -ETIME;
-+ if ( set.timeout_abs_ns < NOW() )
-+ {
-+ /*
-+ * Simplify the logic if the timeout has already expired and just
-+ * inject the event.
-+ */
-+ stop_timer(&v->singleshot_timer);
-+ send_timer_event(v);
-+ break;
-+ }
-
- migrate_timer(&v->singleshot_timer, smp_processor_id());
- set_timer(&v->singleshot_timer, set.timeout_abs_ns);
-diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
-index 81a3b3a743..a836b264a9 100644
---- a/xen/include/public/vcpu.h
-+++ b/xen/include/public/vcpu.h
-@@ -150,7 +150,10 @@ typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
- DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
-
- /* Flags to VCPUOP_set_singleshot_timer. */
-- /* Require the timeout to be in the future (return -ETIME if it's passed). */
-+ /*
-+ * Request the timeout to be in the future (return -ETIME if it's passed)
-+ * but can be ignored by the hypervisor.
-+ */
- #define _VCPU_SSHOTTMR_future (0)
- #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
-
---
-2.42.0
-
diff --git a/0016-x86-head-check-base-address-alignment.patch b/0016-x86-head-check-base-address-alignment.patch
deleted file mode 100644
index 2b9cead..0000000
--- a/0016-x86-head-check-base-address-alignment.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From e5f9987d5f63ecc3cc9884c614aca699a41e7ca7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:46:28 +0200
-Subject: [PATCH 16/55] x86/head: check base address alignment
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Ensure that the base address is 2M aligned, or else the page table
-entries created would be corrupt as reserved bits on the PDE end up
-set.
-
-We have encountered a broken firmware where grub2 would end up loading
-Xen at a non 2M aligned region when using the multiboot2 protocol, and
-that caused a very difficult to debug triple fault.
-
-If the alignment is not as required by the page tables print an error
-message and stop the boot. Also add a build time check that the
-calculation of symbol offsets don't break alignment of passed
-addresses.
-
-The check could be performed earlier, but so far the alignment is
-required by the page tables, and hence feels more natural that the
-check lives near to the piece of code that requires it.
-
-Note that when booted as an EFI application from the PE entry point
-the alignment check is already performed by
-efi_arch_load_addr_check(), and hence there's no need to add another
-check at the point where page tables get built in
-efi_arch_memory_setup().
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 0946068e7faea22868c577d7afa54ba4970ff520
-master date: 2023-05-03 13:36:25 +0200
----
- xen/arch/x86/boot/head.S | 14 ++++++++++++++
- 1 file changed, 14 insertions(+)
-
-diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
-index 245c859dd7..6bc64c9e86 100644
---- a/xen/arch/x86/boot/head.S
-+++ b/xen/arch/x86/boot/head.S
-@@ -1,3 +1,4 @@
-+#include <xen/lib.h>
- #include <xen/multiboot.h>
- #include <xen/multiboot2.h>
- #include <public/xen.h>
-@@ -121,6 +122,7 @@ multiboot2_header:
- .Lbad_ldr_nst: .asciz "ERR: EFI SystemTable is not provided by bootloader!"
- .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
- .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
-+.Lbad_alg_msg: .asciz "ERR: Xen must be loaded at a 2Mb boundary!"
-
- .section .init.data, "aw", @progbits
- .align 4
-@@ -146,6 +148,9 @@ bad_cpu:
- not_multiboot:
- mov $sym_offs(.Lbad_ldr_msg), %ecx
- jmp .Lget_vtb
-+.Lnot_aligned:
-+ mov $sym_offs(.Lbad_alg_msg), %ecx
-+ jmp .Lget_vtb
- .Lmb2_no_st:
- /*
- * Here we are on EFI platform. vga_text_buffer was zapped earlier
-@@ -673,6 +678,15 @@ trampoline_setup:
- cmp %edi, %eax
- jb 1b
-
-+ .if !IS_ALIGNED(sym_offs(0), 1 << L2_PAGETABLE_SHIFT)
-+ .error "Symbol offset calculation breaks alignment"
-+ .endif
-+
-+ /* Check that the image base is aligned. */
-+ lea sym_esi(_start), %eax
-+ test $(1 << L2_PAGETABLE_SHIFT) - 1, %eax
-+ jnz .Lnot_aligned
-+
- /* Map Xen into the higher mappings using 2M superpages. */
- lea _PAGE_PSE + PAGE_HYPERVISOR_RWX + sym_esi(_start), %eax
- mov $sym_offs(_start), %ecx /* %eax = PTE to write ^ */
---
-2.42.0
-
diff --git a/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch b/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch
deleted file mode 100644
index a4501a3..0000000
--- a/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch
+++ /dev/null
@@ -1,275 +0,0 @@
-From f04295dd802fb6cd43a02ec59a5964b2c5950fe1 Mon Sep 17 00:00:00 2001
-From: George Dunlap <george.dunlap@cloud.com>
-Date: Tue, 5 Sep 2023 08:47:14 +0200
-Subject: [PATCH 17/55] xenalyze: Handle start-of-day ->RUNNING transitions
-
-A recent xentrace highlighted an unhandled corner case in the vcpu
-"start-of-day" logic, if the trace starts after the last running ->
-non-running transition, but before the first non-running -> running
-transition. Because start-of-day wasn't handled, vcpu_next_update()
-was expecting p->current to be NULL, and tripping out with the
-following error message when it wasn't:
-
-vcpu_next_update: FATAL: p->current not NULL! (d32768dv$p, runstate RUNSTATE_INIT)
-
-where 32768 is the DEFAULT_DOMAIN, and $p is the pcpu number.
-
-Instead of calling vcpu_start() piecemeal throughout
-sched_runstate_process(), call it at the top of the function if the
-vcpu in question is still in RUNSTATE_INIT, so that we can handle all
-the cases in one place.
-
-Sketch out at the top of the function all cases which we need to
-handle, and what to do in those cases. Some transitions tell us where
-v is running; some transitions tell us about what is (or is not)
-running on p; some transitions tell us neither.
-
-If a transition tells us where v is now running, update its state;
-otherwise leave it in INIT, in order to avoid having to deal with TSC
-skew on start-up.
-
-If a transition tells us what is or is not running on p, update
-p->current (either to v or NULL). Otherwise leave it alone.
-
-If neither, do nothing.
-
-Reifying those rules:
-
-- If we're continuing to run, set v to RUNNING, and use p->first_tsc
- as the runstate time.
-
-- If we're starting to run, set v to RUNNING, and use ri->tsc as the
- runstate time.
-
-- If v is being deschedled, leave v in the INIT state to avoid dealing
- with TSC skew; but set p->current to NULL so that whatever is
- scheduled next won't trigger the assert in vcpu_next_update().
-
-- If a vcpu is waking up (switching from one non-runnable state to
- another non-runnable state), leave v in INIT, and p in whatever
- state it's in (which may be the default domain, or some other vcpu
- which has already run).
-
-While here, fix the comment above vcpu_start; it's called when the
-vcpu state is INIT, not when current is the default domain.
-
-Signed-off-by: George Dunlap <george.dunlap@cloud.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: aab4b38b5d77e3c65f44bacd56427a85b7392a11
-master date: 2023-06-30 11:25:33 +0100
----
- tools/xentrace/xenalyze.c | 159 ++++++++++++++++++++++++--------------
- 1 file changed, 101 insertions(+), 58 deletions(-)
-
-diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
-index e7ec284eea..9b4b62c82f 100644
---- a/tools/xentrace/xenalyze.c
-+++ b/tools/xentrace/xenalyze.c
-@@ -6885,39 +6885,86 @@ void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc)
- p->lost_record.seen_valid_schedule = 1;
- }
-
--/* If current is the default domain, we're fixing up from something
-- * like start-of-day. Update what we can. */
--void vcpu_start(struct pcpu_info *p, struct vcpu_data *v) {
-- /* If vcpus are created, or first show up, in a "dead zone", this will
-- * fail. */
-- if( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
-- fprintf(stderr, "Strange, p->current not default domain!\n");
-- error(ERR_FILE, NULL);
-- return;
-- }
-+/*
-+ * If the vcpu in question is in state INIT, we're fixing up from something
-+ * like start-of-day. Update what we can.
-+ */
-+void vcpu_start(struct pcpu_info *p, struct vcpu_data *v,
-+ int old_runstate, int new_runstate, tsc_t ri_tsc) {
-+ tsc_t tsc;
-+
-+ /*
-+ *
-+ * Cases:
-+ * running -> running:
-+ * v -> running, using p->first_tsc
-+ * {runnable, blocked} -> running:
-+ * v -> running, using ri->tsc
-+ * running -> {runnable, blocked}:
-+ * Leave v INIT, but clear p->current in case another vcpu is scheduled
-+ * blocked -> runnable:
-+ * Leave INIT, and also leave p->current, since we still don't know who's scheduled here
-+ */
-+
-+ /*
-+ * NB that a vcpu won't come out of INIT until it starts running somewhere.
-+ * If this event is pcpu that has already seen a scheduling event, p->current
-+ * should be null; if this is the first scheduling event on this pcpu,
-+ * p->current should be the default domain.
-+ */
-+ if( old_runstate == RUNSTATE_RUNNING ) {
-+ if ( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
-+ fprintf(stderr, "Strange, p->current not default domain!\n");
-+ error(ERR_FILE, NULL);
-+ return;
-
-- if(!p->first_tsc) {
-- fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
-- error(ERR_FILE, NULL);
-+ }
-+
-+ if(!p->first_tsc) {
-+ fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
-+ error(ERR_FILE, NULL);
-+ }
-+
-+ if(p->first_tsc <= p->current->runstate.tsc) {
-+ fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
-+ p->first_tsc,
-+ p->current->runstate.tsc);
-+ error(ERR_FILE, NULL);
-+ }
-+
-+ /* Change default domain to 'queued' */
-+ runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
-+
-+ /*
-+ * Set current to NULL, so that if another vcpu (not in INIT)
-+ * is scheduled here, we don't trip over the check in
-+ * vcpu_next_update()
-+ */
-+ p->current = NULL;
- }
-
-- if(p->first_tsc <= p->current->runstate.tsc) {
-- fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
-- p->first_tsc,
-- p->current->runstate.tsc);
-- error(ERR_FILE, NULL);
-+ /* TSC skew at start-of-day is hard to deal with. Don't
-+ * bring a vcpu out of INIT until it's seen to be actually
-+ * running somewhere. */
-+ if ( new_runstate != RUNSTATE_RUNNING ) {
-+ fprintf(warn, "First schedule for d%dv%d doesn't take us into a running state; leaving INIT\n",
-+ v->d->did, v->vid);
-+
-+ return;
- }
-
-- /* Change default domain to 'queued' */
-- runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
-+ tsc = ri_tsc;
-+ if ( old_runstate == RUNSTATE_RUNNING ) {
-+ /* FIXME: Copy over data from the default domain this interval */
-+ fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
-+ v->d->did, v->vid, p->last_tsc - p->first_tsc);
-
-- /* FIXME: Copy over data from the default domain this interval */
-- fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
-- v->d->did, v->vid, p->last_tsc - p->first_tsc);
-+ tsc = p->first_tsc;
-+ }
-
- /* Simulate the time since the first tsc */
-- runstate_update(v, RUNSTATE_RUNNING, p->first_tsc);
-- p->time.tsc = p->first_tsc;
-+ runstate_update(v, RUNSTATE_RUNNING, tsc);
-+ p->time.tsc = tsc;
- p->current = v;
- pcpu_string_draw(p);
- v->p = p;
-@@ -7021,6 +7068,13 @@ void sched_runstate_process(struct pcpu_info *p)
- last_oldstate = v->runstate.last_oldstate;
- v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
-
-+ /* Handle all "start-of-day" issues in one place. This can be
-+ * done before any of the other tracks or sanity checks. */
-+ if ( v->runstate.state == RUNSTATE_INIT ) {
-+ vcpu_start(p, v, sevt.old_runstate, sevt.new_runstate, ri->tsc);
-+ return;
-+ }
-+
- /* Close vmexits when the putative reason for blocking / &c stops.
- * This way, we don't account cpu contention to some other overhead. */
- if(sevt.new_runstate == RUNSTATE_RUNNABLE
-@@ -7190,32 +7244,27 @@ update:
- * or stopping actually running on a physical cpu. */
- if ( type == CONTINUE )
- {
-- if( v->runstate.state == RUNSTATE_INIT ) {
-- /* Start-of-day; account first tsc -> now to v */
-- vcpu_start(p, v);
-- } else {
-- /* Continue running. First, do some sanity checks */
-- if ( v->runstate.state == RUNSTATE_LOST ) {
-- fprintf(warn, "WARNING: continue with d%dv%d in RUNSTATE_LOST. Resetting current.\n",
-- v->d->did, v->vid);
-- if ( p->current )
-- vcpu_prev_update(p, p->current, ri->tsc, RUNSTATE_LOST);
-- vcpu_next_update(p, v, ri->tsc);
-- }
-- else if( v->runstate.state != RUNSTATE_RUNNING ) {
-- /* This should never happen. */
-- fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
-- v->d->did, v->vid, runstate_name[v->runstate.state]);
-- error(ERR_FILE, NULL);
-- } else if ( v->p != p ) {
-- fprintf(warn, "FATAL: continue on p%d, but d%dv%d p%d!\n",
-- p->pid, v->d->did, v->vid,
-- v->p ? v->p->pid : -1);
-- error(ERR_FILE, NULL);
-- }
--
-- runstate_update(v, RUNSTATE_RUNNING, ri->tsc);
-+ /* Continue running. First, do some sanity checks */
-+ if ( v->runstate.state == RUNSTATE_LOST ) {
-+ fprintf(warn, "WARNING: continue with d%dv%d in RUNSTATE_LOST. Resetting current.\n",
-+ v->d->did, v->vid);
-+ if ( p->current )
-+ vcpu_prev_update(p, p->current, ri->tsc, RUNSTATE_LOST);
-+ vcpu_next_update(p, v, ri->tsc);
-+ }
-+ else if( v->runstate.state != RUNSTATE_RUNNING ) {
-+ /* This should never happen. */
-+ fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
-+ v->d->did, v->vid, runstate_name[v->runstate.state]);
-+ error(ERR_FILE, NULL);
-+ } else if ( v->p != p ) {
-+ fprintf(warn, "FATAL: continue on p%d, but d%dv%d p%d!\n",
-+ p->pid, v->d->did, v->vid,
-+ v->p ? v->p->pid : -1);
-+ error(ERR_FILE, NULL);
- }
-+
-+ runstate_update(v, RUNSTATE_RUNNING, ri->tsc);
- }
- else if ( sevt.old_runstate == RUNSTATE_RUNNING
- || v->runstate.state == RUNSTATE_RUNNING )
-@@ -7232,10 +7281,7 @@ update:
- * # (should never happen)
- */
- if( sevt.old_runstate == RUNSTATE_RUNNING ) {
-- if( v->runstate.state == RUNSTATE_INIT ) {
-- /* Start-of-day; account first tsc -> now to v */
-- vcpu_start(p, v);
-- } else if( v->runstate.state != RUNSTATE_RUNNING
-+ if( v->runstate.state != RUNSTATE_RUNNING
- && v->runstate.state != RUNSTATE_LOST ) {
- /* This should never happen. */
- fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
-@@ -7264,11 +7310,8 @@ update:
-
- vcpu_next_update(p, v, ri->tsc);
- }
-- else if ( v->runstate.state != RUNSTATE_INIT )
-+ else
- {
-- /* TSC skew at start-of-day is hard to deal with. Don't
-- * bring a vcpu out of INIT until it's seen to be actually
-- * running somewhere. */
- runstate_update(v, sevt.new_runstate, ri->tsc);
- }
-
---
-2.42.0
-
diff --git a/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch b/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch
deleted file mode 100644
index a03f86e..0000000
--- a/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch
+++ /dev/null
@@ -1,113 +0,0 @@
-From d0cdd34dd815bf99c3f8a7bddfdde5ae59b0f0db Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:47:34 +0200
-Subject: [PATCH 18/55] x86/ioapic: sanitize IO-APIC pins before enabling lapic
- LVTERR/ESR
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current logic to init the local APIC and the IO-APIC does init the
-local APIC LVTERR/ESR before doing any sanitization on the IO-APIC pin
-configuration. It's already noted on enable_IO_APIC() that Xen
-shouldn't trust the IO-APIC being empty at bootup.
-
-At XenServer we have a system where the IO-APIC 0 is handed to Xen
-with pin 0 unmasked, set to Fixed delivery mode, edge triggered and
-with a vector of 0 (all fields of the RTE are zeroed). Once the local
-APIC LVTERR/ESR is enabled periodic injections from such pin cause the
-local APIC to in turn inject periodic error vectors:
-
-APIC error on CPU0: 00(40), Received illegal vector
-APIC error on CPU0: 40(40), Received illegal vector
-APIC error on CPU0: 40(40), Received illegal vector
-APIC error on CPU0: 40(40), Received illegal vector
-APIC error on CPU0: 40(40), Received illegal vector
-APIC error on CPU0: 40(40), Received illegal vector
-
-That prevents Xen from booting.
-
-Move the masking of the IO-APIC pins ahead of the setup of the local
-APIC. This has the side effect of also moving the detection of the
-pin where the i8259 is connected, as such detection must be done
-before masking any pins.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 813da5f0e73b8cbd2ac3c7922506e58c28cd736d
-master date: 2023-07-17 10:31:10 +0200
----
- xen/arch/x86/apic.c | 4 ++++
- xen/arch/x86/include/asm/irq.h | 1 +
- xen/arch/x86/io_apic.c | 4 +---
- xen/arch/x86/smpboot.c | 5 +++++
- 4 files changed, 11 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
-index 47e6e5fe41..33103d3e91 100644
---- a/xen/arch/x86/apic.c
-+++ b/xen/arch/x86/apic.c
-@@ -1491,6 +1491,10 @@ int __init APIC_init_uniprocessor (void)
- physids_clear(phys_cpu_present_map);
- physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
-
-+ if ( !skip_ioapic_setup && nr_ioapics )
-+ /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
-+ enable_IO_APIC();
-+
- setup_local_APIC(true);
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
-diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
-index 76e6ed6d60..f6a0207a80 100644
---- a/xen/arch/x86/include/asm/irq.h
-+++ b/xen/arch/x86/include/asm/irq.h
-@@ -122,6 +122,7 @@ bool bogus_8259A_irq(unsigned int irq);
- int i8259A_suspend(void);
- int i8259A_resume(void);
-
-+void enable_IO_APIC(void);
- void setup_IO_APIC(void);
- void disable_IO_APIC(void);
- void setup_ioapic_dest(void);
-diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
-index 9b8a972cf5..25a08b1ea6 100644
---- a/xen/arch/x86/io_apic.c
-+++ b/xen/arch/x86/io_apic.c
-@@ -1273,7 +1273,7 @@ static void cf_check _print_IO_APIC_keyhandler(unsigned char key)
- __print_IO_APIC(0);
- }
-
--static void __init enable_IO_APIC(void)
-+void __init enable_IO_APIC(void)
- {
- int i8259_apic, i8259_pin;
- int i, apic;
-@@ -2067,8 +2067,6 @@ static void __init ioapic_pm_state_alloc(void)
-
- void __init setup_IO_APIC(void)
- {
-- enable_IO_APIC();
--
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
-diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
-index b46fd9ab18..41ec3211ac 100644
---- a/xen/arch/x86/smpboot.c
-+++ b/xen/arch/x86/smpboot.c
-@@ -1232,6 +1232,11 @@ void __init smp_prepare_cpus(void)
- verify_local_APIC();
-
- connect_bsp_APIC();
-+
-+ if ( !skip_ioapic_setup && nr_ioapics )
-+ /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
-+ enable_IO_APIC();
-+
- setup_local_APIC(true);
-
- if ( !skip_ioapic_setup && nr_ioapics )
---
-2.42.0
-
diff --git a/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch b/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch
deleted file mode 100644
index 10e5946..0000000
--- a/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch
+++ /dev/null
@@ -1,147 +0,0 @@
-From a885649098e06432939907eee84f735a644883e6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:48:43 +0200
-Subject: [PATCH 19/55] x86/ioapic: add a raw field to RTE struct
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Further changes will require access to the full RTE as a single value
-in order to pass it to IOMMU interrupt remapping handlers.
-
-No functional change intended.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: cdc48cb5a74b10c2b07a09d2f554756d730bfee3
-master date: 2023-07-28 09:39:44 +0200
----
- xen/arch/x86/include/asm/io_apic.h | 57 +++++++++++++-----------
- xen/arch/x86/io_apic.c | 2 +-
- xen/drivers/passthrough/amd/iommu_intr.c | 4 +-
- xen/drivers/passthrough/vtd/intremap.c | 4 +-
- 4 files changed, 35 insertions(+), 32 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/io_apic.h b/xen/arch/x86/include/asm/io_apic.h
-index ef0878b09e..a558bb063c 100644
---- a/xen/arch/x86/include/asm/io_apic.h
-+++ b/xen/arch/x86/include/asm/io_apic.h
-@@ -89,35 +89,38 @@ enum ioapic_irq_destination_types {
- };
-
- struct IO_APIC_route_entry {
-- unsigned int vector:8;
-- unsigned int delivery_mode:3; /*
-- * 000: FIXED
-- * 001: lowest prio
-- * 111: ExtINT
-- */
-- unsigned int dest_mode:1; /* 0: physical, 1: logical */
-- unsigned int delivery_status:1;
-- unsigned int polarity:1; /* 0: low, 1: high */
-- unsigned int irr:1;
-- unsigned int trigger:1; /* 0: edge, 1: level */
-- unsigned int mask:1; /* 0: enabled, 1: disabled */
-- unsigned int __reserved_2:15;
--
- union {
- struct {
-- unsigned int __reserved_1:24;
-- unsigned int physical_dest:4;
-- unsigned int __reserved_2:4;
-- } physical;
--
-- struct {
-- unsigned int __reserved_1:24;
-- unsigned int logical_dest:8;
-- } logical;
--
-- /* used when Interrupt Remapping with EIM is enabled */
-- unsigned int dest32;
-- } dest;
-+ unsigned int vector:8;
-+ unsigned int delivery_mode:3; /*
-+ * 000: FIXED
-+ * 001: lowest prio
-+ * 111: ExtINT
-+ */
-+ unsigned int dest_mode:1; /* 0: physical, 1: logical */
-+ unsigned int delivery_status:1;
-+ unsigned int polarity:1; /* 0: low, 1: high */
-+ unsigned int irr:1;
-+ unsigned int trigger:1; /* 0: edge, 1: level */
-+ unsigned int mask:1; /* 0: enabled, 1: disabled */
-+ unsigned int __reserved_2:15;
-+
-+ union {
-+ struct {
-+ unsigned int __reserved_1:24;
-+ unsigned int physical_dest:4;
-+ unsigned int __reserved_2:4;
-+ } physical;
-+
-+ struct {
-+ unsigned int __reserved_1:24;
-+ unsigned int logical_dest:8;
-+ } logical;
-+ unsigned int dest32;
-+ } dest;
-+ };
-+ uint64_t raw;
-+ };
- };
-
- /*
-diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
-index 25a08b1ea6..aada2ef96c 100644
---- a/xen/arch/x86/io_apic.c
-+++ b/xen/arch/x86/io_apic.c
-@@ -2360,7 +2360,7 @@ int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
- int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
- {
- int apic, pin, irq, ret, pirq;
-- struct IO_APIC_route_entry rte = { 0 };
-+ struct IO_APIC_route_entry rte = { };
- unsigned long flags;
- struct irq_desc *desc;
-
-diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
-index f4de09f431..9e6be3be35 100644
---- a/xen/drivers/passthrough/amd/iommu_intr.c
-+++ b/xen/drivers/passthrough/amd/iommu_intr.c
-@@ -352,8 +352,8 @@ static int update_intremap_entry_from_ioapic(
- void cf_check amd_iommu_ioapic_update_ire(
- unsigned int apic, unsigned int reg, unsigned int value)
- {
-- struct IO_APIC_route_entry old_rte = { 0 };
-- struct IO_APIC_route_entry new_rte = { 0 };
-+ struct IO_APIC_route_entry old_rte = { };
-+ struct IO_APIC_route_entry new_rte = { };
- unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
- unsigned int pin = (reg - 0x10) / 2;
- int seg, bdf, rc;
-diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
-index 1512e4866b..019c21c556 100644
---- a/xen/drivers/passthrough/vtd/intremap.c
-+++ b/xen/drivers/passthrough/vtd/intremap.c
-@@ -419,7 +419,7 @@ unsigned int cf_check io_apic_read_remap_rte(
- {
- unsigned int ioapic_pin = (reg - 0x10) / 2;
- int index;
-- struct IO_xAPIC_route_entry old_rte = { 0 };
-+ struct IO_xAPIC_route_entry old_rte = { };
- int rte_upper = (reg & 1) ? 1 : 0;
- struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
-
-@@ -442,7 +442,7 @@ void cf_check io_apic_write_remap_rte(
- unsigned int apic, unsigned int reg, unsigned int value)
- {
- unsigned int ioapic_pin = (reg - 0x10) / 2;
-- struct IO_xAPIC_route_entry old_rte = { 0 };
-+ struct IO_xAPIC_route_entry old_rte = { };
- struct IO_APIC_route_remap_entry *remap_rte;
- unsigned int rte_upper = (reg & 1) ? 1 : 0;
- struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
---
-2.42.0
-
diff --git a/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch b/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch
deleted file mode 100644
index 43faeeb..0000000
--- a/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch
+++ /dev/null
@@ -1,180 +0,0 @@
-From 1bd4523d696d26976f64a919df8c7a1b3ea32f6f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:49:37 +0200
-Subject: [PATCH 20/55] x86/ioapic: RTE modifications must use
- ioapic_write_entry
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Do not allow to write to RTE registers using io_apic_write and instead
-require changes to RTE to be performed using ioapic_write_entry.
-
-This is in preparation for passing the full contents of the RTE to the
-IOMMU interrupt remapping handlers, so remapping entries for IO-APIC
-RTEs can be updated atomically when possible.
-
-While immediately this commit might expand the number of MMIO accesses
-in order to update an IO-APIC RTE, further changes will benefit from
-getting the full RTE value passed to the IOMMU handlers, as the logic
-is greatly simplified when the IOMMU handlers can get the complete RTE
-value in one go.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ef7995ed1bcd7eac37fb3c3fe56eaa54ea9baf6c
-master date: 2023-07-28 09:40:20 +0200
----
- xen/arch/x86/include/asm/io_apic.h | 8 ++---
- xen/arch/x86/io_apic.c | 43 ++++++++++++------------
- xen/drivers/passthrough/amd/iommu_intr.c | 6 ----
- 3 files changed, 25 insertions(+), 32 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/io_apic.h b/xen/arch/x86/include/asm/io_apic.h
-index a558bb063c..6b514b4e3d 100644
---- a/xen/arch/x86/include/asm/io_apic.h
-+++ b/xen/arch/x86/include/asm/io_apic.h
-@@ -161,8 +161,8 @@ static inline void __io_apic_write(unsigned int apic, unsigned int reg, unsigned
-
- static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
- {
-- if ( ioapic_reg_remapped(reg) )
-- return iommu_update_ire_from_apic(apic, reg, value);
-+ /* RTE writes must use ioapic_write_entry. */
-+ BUG_ON(reg >= 0x10);
- __io_apic_write(apic, reg, value);
- }
-
-@@ -172,8 +172,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
- */
- static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
- {
-- if ( ioapic_reg_remapped(reg) )
-- return iommu_update_ire_from_apic(apic, reg, value);
-+ /* RTE writes must use ioapic_write_entry. */
-+ BUG_ON(reg >= 0x10);
- *(IO_APIC_BASE(apic) + 4) = value;
- }
-
-diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
-index aada2ef96c..041233b9b7 100644
---- a/xen/arch/x86/io_apic.c
-+++ b/xen/arch/x86/io_apic.c
-@@ -237,15 +237,15 @@ struct IO_APIC_route_entry __ioapic_read_entry(
- {
- union entry_union eu;
-
-- if ( raw )
-+ if ( raw || !iommu_intremap )
- {
- eu.w1 = __io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = __io_apic_read(apic, 0x11 + 2 * pin);
- }
- else
- {
-- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-+ eu.w1 = iommu_read_apic_from_ire(apic, 0x10 + 2 * pin);
-+ eu.w2 = iommu_read_apic_from_ire(apic, 0x11 + 2 * pin);
- }
-
- return eu.entry;
-@@ -269,15 +269,15 @@ void __ioapic_write_entry(
- {
- union entry_union eu = { .entry = e };
-
-- if ( raw )
-+ if ( raw || !iommu_intremap )
- {
- __io_apic_write(apic, 0x11 + 2 * pin, eu.w2);
- __io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
- }
- else
- {
-- io_apic_write(apic, 0x11 + 2 * pin, eu.w2);
-- io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
-+ iommu_update_ire_from_apic(apic, 0x11 + 2 * pin, eu.w2);
-+ iommu_update_ire_from_apic(apic, 0x10 + 2 * pin, eu.w1);
- }
- }
-
-@@ -433,16 +433,17 @@ static void modify_IO_APIC_irq(unsigned int irq, unsigned int enable,
- unsigned int disable)
- {
- struct irq_pin_list *entry = irq_2_pin + irq;
-- unsigned int pin, reg;
-
- for (;;) {
-- pin = entry->pin;
-+ unsigned int pin = entry->pin;
-+ struct IO_APIC_route_entry rte;
-+
- if (pin == -1)
- break;
-- reg = io_apic_read(entry->apic, 0x10 + pin*2);
-- reg &= ~disable;
-- reg |= enable;
-- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-+ rte = __ioapic_read_entry(entry->apic, pin, false);
-+ rte.raw &= ~(uint64_t)disable;
-+ rte.raw |= enable;
-+ __ioapic_write_entry(entry->apic, pin, false, rte);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
-@@ -584,16 +585,16 @@ set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask)
- dest = SET_APIC_LOGICAL_ID(dest);
- entry = irq_2_pin + irq;
- for (;;) {
-- unsigned int data;
-+ struct IO_APIC_route_entry rte;
-+
- pin = entry->pin;
- if (pin == -1)
- break;
-
-- io_apic_write(entry->apic, 0x10 + 1 + pin*2, dest);
-- data = io_apic_read(entry->apic, 0x10 + pin*2);
-- data &= ~IO_APIC_REDIR_VECTOR_MASK;
-- data |= MASK_INSR(desc->arch.vector, IO_APIC_REDIR_VECTOR_MASK);
-- io_apic_modify(entry->apic, 0x10 + pin*2, data);
-+ rte = __ioapic_read_entry(entry->apic, pin, false);
-+ rte.dest.dest32 = dest;
-+ rte.vector = desc->arch.vector;
-+ __ioapic_write_entry(entry->apic, pin, false, rte);
-
- if (!entry->next)
- break;
-@@ -2127,10 +2128,8 @@ void ioapic_resume(void)
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
- __io_apic_write(apic, 0, reg_00.raw);
- }
-- for (i = 0; i < nr_ioapic_entries[apic]; i++, entry++) {
-- __io_apic_write(apic, 0x11+2*i, *(((int *)entry)+1));
-- __io_apic_write(apic, 0x10+2*i, *(((int *)entry)+0));
-- }
-+ for (i = 0; i < nr_ioapic_entries[apic]; i++, entry++)
-+ __ioapic_write_entry(apic, i, true, *entry);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
-diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
-index 9e6be3be35..f32c418a7e 100644
---- a/xen/drivers/passthrough/amd/iommu_intr.c
-+++ b/xen/drivers/passthrough/amd/iommu_intr.c
-@@ -361,12 +361,6 @@ void cf_check amd_iommu_ioapic_update_ire(
- struct amd_iommu *iommu;
- unsigned int idx;
-
-- if ( !iommu_intremap )
-- {
-- __io_apic_write(apic, reg, value);
-- return;
-- }
--
- idx = ioapic_id_to_index(IO_APIC_ID(apic));
- if ( idx == MAX_IO_APICS )
- return;
---
-2.42.0
-
diff --git a/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch b/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch
deleted file mode 100644
index 6560452..0000000
--- a/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From e08e7330c58b7ee1efb00e348521a6afc524dc38 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:50:05 +0200
-Subject: [PATCH 21/55] iommu/vtd: rename io_apic_read_remap_rte() local
- variable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Preparatory change to unify the IO-APIC pin variable name between
-io_apic_read_remap_rte() and amd_iommu_ioapic_update_ire(), so that
-the local variable can be made a function parameter with the same name
-across vendors.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: a478b38c01b65fa030303f0324a3380d872eb165
-master date: 2023-07-28 09:40:42 +0200
----
- xen/drivers/passthrough/vtd/intremap.c | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
-index 019c21c556..53c9de9a75 100644
---- a/xen/drivers/passthrough/vtd/intremap.c
-+++ b/xen/drivers/passthrough/vtd/intremap.c
-@@ -441,14 +441,14 @@ unsigned int cf_check io_apic_read_remap_rte(
- void cf_check io_apic_write_remap_rte(
- unsigned int apic, unsigned int reg, unsigned int value)
- {
-- unsigned int ioapic_pin = (reg - 0x10) / 2;
-+ unsigned int pin = (reg - 0x10) / 2;
- struct IO_xAPIC_route_entry old_rte = { };
- struct IO_APIC_route_remap_entry *remap_rte;
- unsigned int rte_upper = (reg & 1) ? 1 : 0;
- struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
- int saved_mask;
-
-- old_rte = __ioapic_read_entry(apic, ioapic_pin, true);
-+ old_rte = __ioapic_read_entry(apic, pin, true);
-
- remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
-
-@@ -458,7 +458,7 @@ void cf_check io_apic_write_remap_rte(
- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
- remap_rte->mask = saved_mask;
-
-- if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
-+ if ( ioapic_rte_to_remap_entry(iommu, apic, pin,
- &old_rte, rte_upper, value) )
- {
- __io_apic_write(apic, reg, value);
-@@ -468,7 +468,7 @@ void cf_check io_apic_write_remap_rte(
- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
- }
- else
-- __ioapic_write_entry(apic, ioapic_pin, true, old_rte);
-+ __ioapic_write_entry(apic, pin, true, old_rte);
- }
-
- static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
---
-2.42.0
-
diff --git a/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch b/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch
deleted file mode 100644
index e06714e..0000000
--- a/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch
+++ /dev/null
@@ -1,462 +0,0 @@
-From 5116fe12d8238cc7d6582ceefd3f7e944bff9a1d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:50:39 +0200
-Subject: [PATCH 22/55] x86/iommu: pass full IO-APIC RTE for remapping table
- update
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-So that the remapping entry can be updated atomically when possible.
-
-Doing such update atomically will avoid Xen having to mask the IO-APIC
-pin prior to performing any interrupt movements (ie: changing the
-destination and vector fields), as the interrupt remapping entry is
-always consistent.
-
-This also simplifies some of the logic on both VT-d and AMD-Vi
-implementations, as having the full RTE available instead of half of
-it avoids to possibly read and update the missing other half from
-hardware.
-
-While there remove the explicit zeroing of new_ire fields in
-ioapic_rte_to_remap_entry() and initialize the variable at definition
-so all fields are zeroed. Note fields could be also initialized with
-final values at definition, but I found that likely too much to be
-done at this time.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 3e033172b0250446bfe119f31c7f0f51684b0472
-master date: 2023-08-01 11:48:39 +0200
----
- xen/arch/x86/include/asm/iommu.h | 3 +-
- xen/arch/x86/io_apic.c | 5 +-
- xen/drivers/passthrough/amd/iommu.h | 2 +-
- xen/drivers/passthrough/amd/iommu_intr.c | 100 ++---------------
- xen/drivers/passthrough/vtd/extern.h | 2 +-
- xen/drivers/passthrough/vtd/intremap.c | 131 +++++++++++------------
- xen/drivers/passthrough/x86/iommu.c | 4 +-
- xen/include/xen/iommu.h | 3 +-
- 8 files changed, 82 insertions(+), 168 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/iommu.h b/xen/arch/x86/include/asm/iommu.h
-index fc0afe35bf..c0d4ad3742 100644
---- a/xen/arch/x86/include/asm/iommu.h
-+++ b/xen/arch/x86/include/asm/iommu.h
-@@ -97,7 +97,8 @@ struct iommu_init_ops {
-
- extern const struct iommu_init_ops *iommu_init_ops;
-
--void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
-+void iommu_update_ire_from_apic(unsigned int apic, unsigned int pin,
-+ uint64_t rte);
- unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
- int iommu_setup_hpet_msi(struct msi_desc *);
-
-diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
-index 041233b9b7..b3afef8933 100644
---- a/xen/arch/x86/io_apic.c
-+++ b/xen/arch/x86/io_apic.c
-@@ -275,10 +275,7 @@ void __ioapic_write_entry(
- __io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
- }
- else
-- {
-- iommu_update_ire_from_apic(apic, 0x11 + 2 * pin, eu.w2);
-- iommu_update_ire_from_apic(apic, 0x10 + 2 * pin, eu.w1);
-- }
-+ iommu_update_ire_from_apic(apic, pin, e.raw);
- }
-
- static void ioapic_write_entry(
-diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
-index 8bc3c35b1b..5429ada58e 100644
---- a/xen/drivers/passthrough/amd/iommu.h
-+++ b/xen/drivers/passthrough/amd/iommu.h
-@@ -300,7 +300,7 @@ int cf_check amd_iommu_free_intremap_table(
- unsigned int amd_iommu_intremap_table_order(
- const void *irt, const struct amd_iommu *iommu);
- void cf_check amd_iommu_ioapic_update_ire(
-- unsigned int apic, unsigned int reg, unsigned int value);
-+ unsigned int apic, unsigned int pin, uint64_t rte);
- unsigned int cf_check amd_iommu_read_ioapic_from_ire(
- unsigned int apic, unsigned int reg);
- int cf_check amd_iommu_msi_msg_update_ire(
-diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
-index f32c418a7e..e83a2a932a 100644
---- a/xen/drivers/passthrough/amd/iommu_intr.c
-+++ b/xen/drivers/passthrough/amd/iommu_intr.c
-@@ -247,11 +247,6 @@ static void update_intremap_entry(const struct amd_iommu *iommu,
- }
- }
-
--static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
--{
-- return rte->vector | (rte->delivery_mode << 8);
--}
--
- static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
- {
- rte->vector = (u8)offset;
-@@ -267,7 +262,6 @@ static int update_intremap_entry_from_ioapic(
- int bdf,
- struct amd_iommu *iommu,
- struct IO_APIC_route_entry *rte,
-- bool_t lo_update,
- u16 *index)
- {
- unsigned long flags;
-@@ -315,31 +309,6 @@ static int update_intremap_entry_from_ioapic(
- spin_lock(lock);
- }
-
-- if ( fresh )
-- /* nothing */;
-- else if ( !lo_update )
-- {
-- /*
-- * Low half of incoming RTE is already in remapped format,
-- * so need to recover vector and delivery mode from IRTE.
-- */
-- ASSERT(get_rte_index(rte) == offset);
-- if ( iommu->ctrl.ga_en )
-- vector = entry.ptr128->full.vector;
-- else
-- vector = entry.ptr32->flds.vector;
-- /* The IntType fields match for both formats. */
-- delivery_mode = entry.ptr32->flds.int_type;
-- }
-- else if ( x2apic_enabled )
-- {
-- /*
-- * High half of incoming RTE was read from the I/O APIC and hence may
-- * not hold the full destination, so need to recover full destination
-- * from IRTE.
-- */
-- dest = get_full_dest(entry.ptr128);
-- }
- update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
-
- spin_unlock_irqrestore(lock, flags);
-@@ -350,14 +319,11 @@ static int update_intremap_entry_from_ioapic(
- }
-
- void cf_check amd_iommu_ioapic_update_ire(
-- unsigned int apic, unsigned int reg, unsigned int value)
-+ unsigned int apic, unsigned int pin, uint64_t rte)
- {
-- struct IO_APIC_route_entry old_rte = { };
-- struct IO_APIC_route_entry new_rte = { };
-- unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
-- unsigned int pin = (reg - 0x10) / 2;
-+ struct IO_APIC_route_entry old_rte;
-+ struct IO_APIC_route_entry new_rte = { .raw = rte };
- int seg, bdf, rc;
-- bool saved_mask, fresh = false;
- struct amd_iommu *iommu;
- unsigned int idx;
-
-@@ -373,58 +339,23 @@ void cf_check amd_iommu_ioapic_update_ire(
- {
- AMD_IOMMU_WARN("failed to find IOMMU for IO-APIC @ %04x:%04x\n",
- seg, bdf);
-- __io_apic_write(apic, reg, value);
-+ __ioapic_write_entry(apic, pin, true, new_rte);
- return;
- }
-
-- /* save io-apic rte lower 32 bits */
-- *((u32 *)&old_rte) = __io_apic_read(apic, rte_lo);
-- saved_mask = old_rte.mask;
--
-- if ( reg == rte_lo )
-- {
-- *((u32 *)&new_rte) = value;
-- /* read upper 32 bits from io-apic rte */
-- *(((u32 *)&new_rte) + 1) = __io_apic_read(apic, reg + 1);
-- }
-- else
-- {
-- *((u32 *)&new_rte) = *((u32 *)&old_rte);
-- *(((u32 *)&new_rte) + 1) = value;
-- }
--
-- if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_MAX_ENTRIES )
-- {
-- ASSERT(saved_mask);
--
-- /*
-- * There's nowhere except the IRTE to store a full 32-bit destination,
-- * so we may not bypass entry allocation and updating of the low RTE
-- * half in the (usual) case of the high RTE half getting written first.
-- */
-- if ( new_rte.mask && !x2apic_enabled )
-- {
-- __io_apic_write(apic, reg, value);
-- return;
-- }
--
-- fresh = true;
-- }
--
-+ old_rte = __ioapic_read_entry(apic, pin, true);
- /* mask the interrupt while we change the intremap table */
-- if ( !saved_mask )
-+ if ( !old_rte.mask )
- {
- old_rte.mask = 1;
-- __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
-+ __ioapic_write_entry(apic, pin, true, old_rte);
- }
-
- /* Update interrupt remapping entry */
- rc = update_intremap_entry_from_ioapic(
-- bdf, iommu, &new_rte, reg == rte_lo,
-+ bdf, iommu, &new_rte,
- &ioapic_sbdf[idx].pin_2_idx[pin]);
-
-- __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
--
- if ( rc )
- {
- /* Keep the entry masked. */
-@@ -433,20 +364,7 @@ void cf_check amd_iommu_ioapic_update_ire(
- return;
- }
-
-- /* For lower bits access, return directly to avoid double writes */
-- if ( reg == rte_lo )
-- return;
--
-- /*
-- * Unmask the interrupt after we have updated the intremap table. Also
-- * write the low half if a fresh entry was allocated for a high half
-- * update in x2APIC mode.
-- */
-- if ( !saved_mask || (x2apic_enabled && fresh) )
-- {
-- old_rte.mask = saved_mask;
-- __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
-- }
-+ __ioapic_write_entry(apic, pin, true, new_rte);
- }
-
- unsigned int cf_check amd_iommu_read_ioapic_from_ire(
-diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
-index 39602d1f88..d49e40c5ce 100644
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -92,7 +92,7 @@ int cf_check intel_iommu_get_reserved_device_memory(
- unsigned int cf_check io_apic_read_remap_rte(
- unsigned int apic, unsigned int reg);
- void cf_check io_apic_write_remap_rte(
-- unsigned int apic, unsigned int reg, unsigned int value);
-+ unsigned int apic, unsigned int pin, uint64_t rte);
-
- struct msi_desc;
- struct msi_msg;
-diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
-index 53c9de9a75..78d7bc139a 100644
---- a/xen/drivers/passthrough/vtd/intremap.c
-+++ b/xen/drivers/passthrough/vtd/intremap.c
-@@ -328,15 +328,14 @@ static int remap_entry_to_ioapic_rte(
-
- static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
- int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
-- unsigned int rte_upper, unsigned int value)
-+ struct IO_xAPIC_route_entry new_rte)
- {
- struct iremap_entry *iremap_entry = NULL, *iremap_entries;
- struct iremap_entry new_ire;
- struct IO_APIC_route_remap_entry *remap_rte;
-- struct IO_xAPIC_route_entry new_rte;
- int index;
- unsigned long flags;
-- bool init = false;
-+ bool init = false, masked = old_rte->mask;
-
- remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
- spin_lock_irqsave(&iommu->intremap.lock, flags);
-@@ -364,48 +363,40 @@ static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
-
- new_ire = *iremap_entry;
-
-- if ( rte_upper )
-- {
-- if ( x2apic_enabled )
-- new_ire.remap.dst = value;
-- else
-- new_ire.remap.dst = (value >> 24) << 8;
-- }
-+ if ( x2apic_enabled )
-+ new_ire.remap.dst = new_rte.dest.dest32;
- else
-- {
-- *(((u32 *)&new_rte) + 0) = value;
-- new_ire.remap.fpd = 0;
-- new_ire.remap.dm = new_rte.dest_mode;
-- new_ire.remap.tm = new_rte.trigger;
-- new_ire.remap.dlm = new_rte.delivery_mode;
-- /* Hardware require RH = 1 for LPR delivery mode */
-- new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
-- new_ire.remap.avail = 0;
-- new_ire.remap.res_1 = 0;
-- new_ire.remap.vector = new_rte.vector;
-- new_ire.remap.res_2 = 0;
--
-- set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-- new_ire.remap.res_3 = 0;
-- new_ire.remap.res_4 = 0;
-- new_ire.remap.p = 1; /* finally, set present bit */
--
-- /* now construct new ioapic rte entry */
-- remap_rte->vector = new_rte.vector;
-- remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
-- remap_rte->index_15 = (index >> 15) & 0x1;
-- remap_rte->index_0_14 = index & 0x7fff;
--
-- remap_rte->delivery_status = new_rte.delivery_status;
-- remap_rte->polarity = new_rte.polarity;
-- remap_rte->irr = new_rte.irr;
-- remap_rte->trigger = new_rte.trigger;
-- remap_rte->mask = new_rte.mask;
-- remap_rte->reserved = 0;
-- remap_rte->format = 1; /* indicate remap format */
-- }
--
-- update_irte(iommu, iremap_entry, &new_ire, !init);
-+ new_ire.remap.dst = GET_xAPIC_ID(new_rte.dest.dest32) << 8;
-+
-+ new_ire.remap.dm = new_rte.dest_mode;
-+ new_ire.remap.tm = new_rte.trigger;
-+ new_ire.remap.dlm = new_rte.delivery_mode;
-+ /* Hardware require RH = 1 for LPR delivery mode. */
-+ new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
-+ new_ire.remap.vector = new_rte.vector;
-+
-+ set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
-+ /* Finally, set present bit. */
-+ new_ire.remap.p = 1;
-+
-+ /* Now construct new ioapic rte entry. */
-+ remap_rte->vector = new_rte.vector;
-+ /* Has to be 0 for remap format. */
-+ remap_rte->delivery_mode = 0;
-+ remap_rte->index_15 = (index >> 15) & 0x1;
-+ remap_rte->index_0_14 = index & 0x7fff;
-+
-+ remap_rte->delivery_status = new_rte.delivery_status;
-+ remap_rte->polarity = new_rte.polarity;
-+ remap_rte->irr = new_rte.irr;
-+ remap_rte->trigger = new_rte.trigger;
-+ remap_rte->mask = new_rte.mask;
-+ remap_rte->reserved = 0;
-+ /* Indicate remap format. */
-+ remap_rte->format = 1;
-+
-+ /* If cmpxchg16b is not available the caller must mask the IO-APIC pin. */
-+ update_irte(iommu, iremap_entry, &new_ire, !init && !masked);
- iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
- iommu_flush_iec_index(iommu, 0, index);
-
-@@ -439,36 +430,42 @@ unsigned int cf_check io_apic_read_remap_rte(
- }
-
- void cf_check io_apic_write_remap_rte(
-- unsigned int apic, unsigned int reg, unsigned int value)
-+ unsigned int apic, unsigned int pin, uint64_t rte)
- {
-- unsigned int pin = (reg - 0x10) / 2;
-+ struct IO_xAPIC_route_entry new_rte = { .raw = rte };
- struct IO_xAPIC_route_entry old_rte = { };
-- struct IO_APIC_route_remap_entry *remap_rte;
-- unsigned int rte_upper = (reg & 1) ? 1 : 0;
- struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
-- int saved_mask;
--
-- old_rte = __ioapic_read_entry(apic, pin, true);
--
-- remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
--
-- /* mask the interrupt while we change the intremap table */
-- saved_mask = remap_rte->mask;
-- remap_rte->mask = 1;
-- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
-- remap_rte->mask = saved_mask;
-+ bool masked = true;
-+ int rc;
-
-- if ( ioapic_rte_to_remap_entry(iommu, apic, pin,
-- &old_rte, rte_upper, value) )
-+ if ( !cpu_has_cx16 )
- {
-- __io_apic_write(apic, reg, value);
-+ /*
-+ * Cannot atomically update the IRTE entry: mask the IO-APIC pin to
-+ * avoid interrupts seeing an inconsistent IRTE entry.
-+ */
-+ old_rte = __ioapic_read_entry(apic, pin, true);
-+ if ( !old_rte.mask )
-+ {
-+ masked = false;
-+ old_rte.mask = 1;
-+ __ioapic_write_entry(apic, pin, true, old_rte);
-+ }
-+ }
-
-- /* Recover the original value of 'mask' bit */
-- if ( rte_upper )
-- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
-+ rc = ioapic_rte_to_remap_entry(iommu, apic, pin, &old_rte, new_rte);
-+ if ( rc )
-+ {
-+ if ( !masked )
-+ {
-+ /* Recover the original value of 'mask' bit */
-+ old_rte.mask = 0;
-+ __ioapic_write_entry(apic, pin, true, old_rte);
-+ }
-+ return;
- }
-- else
-- __ioapic_write_entry(apic, pin, true, old_rte);
-+ /* old_rte will contain the updated IO-APIC RTE on success. */
-+ __ioapic_write_entry(apic, pin, true, old_rte);
- }
-
- static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
-diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
-index f671b0f2bb..8bd0ccb2e9 100644
---- a/xen/drivers/passthrough/x86/iommu.c
-+++ b/xen/drivers/passthrough/x86/iommu.c
-@@ -142,9 +142,9 @@ int iommu_enable_x2apic(void)
- }
-
- void iommu_update_ire_from_apic(
-- unsigned int apic, unsigned int reg, unsigned int value)
-+ unsigned int apic, unsigned int pin, uint64_t rte)
- {
-- iommu_vcall(&iommu_ops, update_ire_from_apic, apic, reg, value);
-+ iommu_vcall(&iommu_ops, update_ire_from_apic, apic, pin, rte);
- }
-
- unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
-diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
-index 4f22fc1bed..f8a52627f7 100644
---- a/xen/include/xen/iommu.h
-+++ b/xen/include/xen/iommu.h
-@@ -274,7 +274,8 @@ struct iommu_ops {
- int (*enable_x2apic)(void);
- void (*disable_x2apic)(void);
-
-- void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
-+ void (*update_ire_from_apic)(unsigned int apic, unsigned int pin,
-+ uint64_t rte);
- unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
-
- int (*setup_hpet_msi)(struct msi_desc *);
---
-2.42.0
-
diff --git a/0023-build-correct-gas-noexecstack-check.patch b/0023-build-correct-gas-noexecstack-check.patch
deleted file mode 100644
index 245d631..0000000
--- a/0023-build-correct-gas-noexecstack-check.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From ba360fbb6413231f84a7d68f5cb34858f81d4d23 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 5 Sep 2023 08:51:50 +0200
-Subject: [PATCH 23/55] build: correct gas --noexecstack check
-
-The check was missing an escape for the inner $, thus breaking things
-in the unlikely event that the underlying assembler doesn't support this
-option.
-
-Fixes: 62d22296a95d ("build: silence GNU ld warning about executable stacks")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: d1f6a58dfdc508c43a51c1865c826d519bf16493
-master date: 2023-08-14 09:58:19 +0200
----
- xen/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 7bb9de7bdc..455916c757 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -405,7 +405,7 @@ endif
-
- AFLAGS += -D__ASSEMBLY__
-
--$(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack)
-+$(call cc-option-add,AFLAGS,CC,-Wa$$(comma)--noexecstack)
-
- LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
-
---
-2.42.0
-
diff --git a/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch b/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch
deleted file mode 100644
index 1ec7335..0000000
--- a/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 042982297802e7b746dc2fac95a453cc88d0aa83 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 5 Sep 2023 08:52:15 +0200
-Subject: [PATCH 24/55] libxl: slightly correct JSON generation of CPU policy
-
-The "cpuid_empty" label is also (in principle; maybe only for rubbish
-input) reachable in the "cpuid_only" case. Hence the label needs to live
-ahead of the check of the variable.
-
-Fixes: 5b80cecb747b ("libxl: introduce MSR data in libxl_cpuid_policy")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: ebce4e3a146c39e57bb7a890e059e89c32b6d547
-master date: 2023-08-17 16:24:17 +0200
----
- tools/libs/light/libxl_cpuid.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
-index 849722541c..5c66d094b2 100644
---- a/tools/libs/light/libxl_cpuid.c
-+++ b/tools/libs/light/libxl_cpuid.c
-@@ -710,10 +710,11 @@ parse_cpuid:
- libxl__strdup(NOGC, libxl__json_object_get_string(r));
- }
- }
-+
-+cpuid_empty:
- if (cpuid_only)
- return 0;
-
--cpuid_empty:
- co = libxl__json_map_get("msr", o, JSON_ARRAY);
- if (!libxl__json_object_is_array(co))
- return ERROR_FAIL;
---
-2.42.0
-
diff --git a/0025-tboot-Disable-CET-at-shutdown.patch b/0025-tboot-Disable-CET-at-shutdown.patch
deleted file mode 100644
index f06db61..0000000
--- a/0025-tboot-Disable-CET-at-shutdown.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 7ca58fbef489fcb17631872a2bdc929823a2a494 Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 5 Sep 2023 08:52:33 +0200
-Subject: [PATCH 25/55] tboot: Disable CET at shutdown
-
-tboot_shutdown() calls into tboot to perform the actual system shutdown.
-tboot isn't built with endbr annotations, and Xen has CET-IBT enabled on
-newer hardware. shutdown_entry isn't annotated with endbr and Xen
-faults:
-
-Panic on CPU 0:
-CONTROL-FLOW PROTECTION FAULT: #CP[0003] endbranch
-
-And Xen hangs at this point.
-
-Disabling CET-IBT let Xen and tboot power off, but reboot was
-perfoming a poweroff instead of a warm reboot. Disabling all of CET,
-i.e. shadow stacks as well, lets tboot reboot properly.
-
-Fixes: cdbe2b0a1aec ("x86: Enable CET Indirect Branch Tracking")
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Daniel P. Smith <dpsmith@apertussolutions.com>
-master commit: 0801868f550539d417d46f82c49307480947ccaa
-master date: 2023-08-17 16:24:49 +0200
----
- xen/arch/x86/tboot.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c
-index fe1abfdf08..a2e9e97ed7 100644
---- a/xen/arch/x86/tboot.c
-+++ b/xen/arch/x86/tboot.c
-@@ -398,6 +398,16 @@ void tboot_shutdown(uint32_t shutdown_type)
- tboot_gen_xenheap_integrity(g_tboot_shared->s3_key, &xenheap_mac);
- }
-
-+ /*
-+ * Disable CET - tboot may not be built with endbr, and it doesn't support
-+ * shadow stacks.
-+ */
-+ if ( read_cr4() & X86_CR4_CET )
-+ {
-+ wrmsrl(MSR_S_CET, 0);
-+ write_cr4(read_cr4() & ~X86_CR4_CET);
-+ }
-+
- /*
- * During early boot, we can be called by panic before idle_vcpu[0] is
- * setup, but in that case we don't need to change page tables.
---
-2.42.0
-
diff --git a/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch b/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch
deleted file mode 100644
index 10aa14f..0000000
--- a/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From a939e953cdd522da3d8f0efeaea84448b5b570f9 Mon Sep 17 00:00:00 2001
-From: Jinoh Kang <jinoh.kang.kr@gmail.com>
-Date: Tue, 5 Sep 2023 08:53:01 +0200
-Subject: [PATCH 26/55] x86/svm: Fix valid condition in svm_get_pending_event()
-
-Fixes: 9864841914c2 ("x86/vm_event: add support for VM_EVENT_REASON_INTERRUPT")
-Signed-off-by: Jinoh Kang <jinoh.kang.kr@gmail.com>
-master commit: b2865c2b6f164d2c379177cdd1cb200e4eaba549
-master date: 2023-08-18 20:21:44 +0100
----
- xen/arch/x86/hvm/svm/svm.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index 5fa945c526..e8f50e7c5e 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -2490,7 +2490,7 @@ static bool cf_check svm_get_pending_event(
- {
- const struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
-
-- if ( vmcb->event_inj.v )
-+ if ( !vmcb->event_inj.v )
- return false;
-
- info->vector = vmcb->event_inj.vector;
---
-2.42.0
-
diff --git a/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch b/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch
deleted file mode 100644
index a022066..0000000
--- a/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch
+++ /dev/null
@@ -1,100 +0,0 @@
-From 8be85d8c0df2445c012fac42117396b483db5db0 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Sep 2023 08:53:31 +0200
-Subject: [PATCH 27/55] x86/vmx: Revert "x86/VMX: sanitize rIP before
- re-entering guest"
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-At the time of XSA-170, the x86 instruction emulator was genuinely broken. It
-would load arbitrary values into %rip and putting a check here probably was
-the best stopgap security fix. It should have been reverted following c/s
-81d3a0b26c1 "x86emul: limit-check branch targets" which corrected the emulator
-behaviour.
-
-However, everyone involved in XSA-170, myself included, failed to read the SDM
-correctly. On the subject of %rip consistency checks, the SDM stated:
-
- If the processor supports N < 64 linear-address bits, bits 63:N must be
- identical
-
-A non-canonical %rip (and SSP more recently) is an explicitly legal state in
-x86, and the VMEntry consistency checks are intentionally off-by-one from a
-regular canonical check.
-
-The consequence of this bug is that Xen will currently take a legal x86 state
-which would successfully VMEnter, and corrupt it into having non-architectural
-behaviour.
-
-Furthermore, in the time this bugfix has been pending in public, I
-successfully persuaded Intel to clarify the SDM, adding the following
-clarification:
-
- The guest RIP value is not required to be canonical; the value of bit N-1
- may differ from that of bit N.
-
-Fixes: ffbbfda377 ("x86/VMX: sanitize rIP before re-entering guest")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 10c83bb0f5d158d101d983883741b76f927e54a3
-master date: 2023-08-23 18:44:59 +0100
----
- xen/arch/x86/hvm/vmx/vmx.c | 34 +---------------------------------
- 1 file changed, 1 insertion(+), 33 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index f256dc2635..072288a5ef 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -3975,7 +3975,7 @@ static void undo_nmis_unblocked_by_iret(void)
- void vmx_vmexit_handler(struct cpu_user_regs *regs)
- {
- unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
-- unsigned int vector = 0, mode;
-+ unsigned int vector = 0;
- struct vcpu *v = current;
- struct domain *currd = v->domain;
-
-@@ -4650,38 +4650,6 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- out:
- if ( nestedhvm_vcpu_in_guestmode(v) )
- nvmx_idtv_handling();
--
-- /*
-- * VM entry will fail (causing the guest to get crashed) if rIP (and
-- * rFLAGS, but we don't have an issue there) doesn't meet certain
-- * criteria. As we must not allow less than fully privileged mode to have
-- * such an effect on the domain, we correct rIP in that case (accepting
-- * this not being architecturally correct behavior, as the injected #GP
-- * fault will then not see the correct [invalid] return address).
-- * And since we know the guest will crash, we crash it right away if it
-- * already is in most privileged mode.
-- */
-- mode = vmx_guest_x86_mode(v);
-- if ( mode == 8 ? !is_canonical_address(regs->rip)
-- : regs->rip != regs->eip )
-- {
-- gprintk(XENLOG_WARNING, "Bad rIP %lx for mode %u\n", regs->rip, mode);
--
-- if ( vmx_get_cpl() )
-- {
-- __vmread(VM_ENTRY_INTR_INFO, &intr_info);
-- if ( !(intr_info & INTR_INFO_VALID_MASK) )
-- hvm_inject_hw_exception(TRAP_gp_fault, 0);
-- /* Need to fix rIP nevertheless. */
-- if ( mode == 8 )
-- regs->rip = (long)(regs->rip << (64 - VADDR_BITS)) >>
-- (64 - VADDR_BITS);
-- else
-- regs->rip = regs->eip;
-- }
-- else
-- domain_crash(v->domain);
-- }
- }
-
- static void lbr_tsx_fixup(void)
---
-2.42.0
-
diff --git a/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch b/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch
deleted file mode 100644
index 2fcfd68..0000000
--- a/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 699de512748d8e3bdcb3225b3b2a77c10cfd2408 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Sep 2023 08:53:57 +0200
-Subject: [PATCH 28/55] x86/irq: fix reporting of spurious i8259 interrupts
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The return value of bogus_8259A_irq() is wrong: the function will
-return `true` when the IRQ is real and `false` when it's a spurious
-IRQ. This causes the "No irq handler for vector ..." message in
-do_IRQ() to be printed for spurious i8259 interrupts which is not
-intended (and not helpful).
-
-Fix by inverting the return value of bogus_8259A_irq().
-
-Fixes: 132906348a14 ('x86/i8259: Handle bogus spurious interrupts more quietly')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 709f6c8ce6422475c372e67507606170a31ccb65
-master date: 2023-08-30 10:03:53 +0200
----
- xen/arch/x86/i8259.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c
-index 6b35be10f0..ed9f55abe5 100644
---- a/xen/arch/x86/i8259.c
-+++ b/xen/arch/x86/i8259.c
-@@ -37,7 +37,7 @@ static bool _mask_and_ack_8259A_irq(unsigned int irq);
-
- bool bogus_8259A_irq(unsigned int irq)
- {
-- return _mask_and_ack_8259A_irq(irq);
-+ return !_mask_and_ack_8259A_irq(irq);
- }
-
- static void cf_check mask_and_ack_8259A_irq(struct irq_desc *desc)
---
-2.42.0
-
diff --git a/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch b/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
deleted file mode 100644
index bc866d0..0000000
--- a/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-From d31e5b2a9c39816a954d1088d4cfc782f0006f39 Mon Sep 17 00:00:00 2001
-From: Stefano Stabellini <stefano.stabellini@amd.com>
-Date: Tue, 5 Sep 2023 14:33:29 +0200
-Subject: [PATCH 29/55] xen/arm: page: Handle cache flush of an element at the
- top of the address space
-
-The region that needs to be cleaned/invalidated may be at the top
-of the address space. This means that 'end' (i.e. 'p + size') will
-be 0 and therefore nothing will be cleaned/invalidated as the check
-in the loop will always be false.
-
-On Arm64, we only support we only support up to 48-bit Virtual
-address space. So this is not a concern there. However, for 32-bit,
-the mapcache is using the last 2GB of the address space. Therefore
-we may not clean/invalidate properly some pages. This could lead
-to memory corruption or data leakage (the scrubbed value may
-still sit in the cache when the guest could read directly the memory
-and therefore read the old content).
-
-Rework invalidate_dcache_va_range(), clean_dcache_va_range(),
-clean_and_invalidate_dcache_va_range() to handle a cache flush
-with an element at the top of the address space.
-
-This is CVE-2023-34321 / XSA-437.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Acked-by: Bertrand Marquis <bertrand.marquis@arm.com>
-master commit: 9a216e92de9f9011097e4f1fb55ff67ba0a21704
-master date: 2023-09-05 14:30:08 +0200
----
- xen/arch/arm/include/asm/page.h | 33 ++++++++++++++++++++-------------
- 1 file changed, 20 insertions(+), 13 deletions(-)
-
-diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
-index e7cd62190c..d7fe770a5e 100644
---- a/xen/arch/arm/include/asm/page.h
-+++ b/xen/arch/arm/include/asm/page.h
-@@ -160,26 +160,25 @@ static inline size_t read_dcache_line_bytes(void)
-
- static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
- {
-- const void *end = p + size;
- size_t cacheline_mask = dcache_line_bytes - 1;
-
- dsb(sy); /* So the CPU issues all writes to the range */
-
- if ( (uintptr_t)p & cacheline_mask )
- {
-+ size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
- p = (void *)((uintptr_t)p & ~cacheline_mask);
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
- p += dcache_line_bytes;
- }
-- if ( (uintptr_t)end & cacheline_mask )
-- {
-- end = (void *)((uintptr_t)end & ~cacheline_mask);
-- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (end));
-- }
-
-- for ( ; p < end; p += dcache_line_bytes )
-+ for ( ; size >= dcache_line_bytes;
-+ p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__invalidate_dcache_one(0) : : "r" (p));
-
-+ if ( size > 0 )
-+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
-+
- dsb(sy); /* So we know the flushes happen before continuing */
-
- return 0;
-@@ -187,10 +186,14 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
-
- static inline int clean_dcache_va_range(const void *p, unsigned long size)
- {
-- const void *end = p + size;
-+ size_t cacheline_mask = dcache_line_bytes - 1;
-+
- dsb(sy); /* So the CPU issues all writes to the range */
-- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
-- for ( ; p < end; p += dcache_line_bytes )
-+ size += (uintptr_t)p & cacheline_mask;
-+ size = (size + cacheline_mask) & ~cacheline_mask;
-+ p = (void *)((uintptr_t)p & ~cacheline_mask);
-+ for ( ; size >= dcache_line_bytes;
-+ p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__clean_dcache_one(0) : : "r" (p));
- dsb(sy); /* So we know the flushes happen before continuing */
- /* ARM callers assume that dcache_* functions cannot fail. */
-@@ -200,10 +203,14 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
- static inline int clean_and_invalidate_dcache_va_range
- (const void *p, unsigned long size)
- {
-- const void *end = p + size;
-+ size_t cacheline_mask = dcache_line_bytes - 1;
-+
- dsb(sy); /* So the CPU issues all writes to the range */
-- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
-- for ( ; p < end; p += dcache_line_bytes )
-+ size += (uintptr_t)p & cacheline_mask;
-+ size = (size + cacheline_mask) & ~cacheline_mask;
-+ p = (void *)((uintptr_t)p & ~cacheline_mask);
-+ for ( ; size >= dcache_line_bytes;
-+ p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
- dsb(sy); /* So we know the flushes happen before continuing */
- /* ARM callers assume that dcache_* functions cannot fail. */
---
-2.42.0
-
diff --git a/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
deleted file mode 100644
index 4581d03..0000000
--- a/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Wed, 23 Aug 2023 09:26:36 +0200
-Subject: [PATCH 30/55] x86/AMD: extend Zenbleed check to models "good" ucode
- isn't known for
-
-Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and
-0x91, (quoting the respective Linux commit) is similarly affected. Put
-another instance of our Zen1 vs Zen2 distinction checks in
-amd_check_zenbleed(), forcing use of the chickenbit irrespective of
-ucode version (building upon real hardware never surfacing a version of
-0xffffffff).
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302)
----
- xen/arch/x86/cpu/amd.c | 13 ++++++++++---
- 1 file changed, 10 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index 3ea214fc2e..1bb3044be1 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -909,10 +909,17 @@ void amd_check_zenbleed(void)
- case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
- default:
- /*
-- * With the Fam17h check above, parts getting here are Zen1.
-- * They're not affected.
-+ * With the Fam17h check above, most parts getting here are
-+ * Zen1. They're not affected. Assume Zen2 ones making it
-+ * here are affected regardless of microcode version.
-+ *
-+ * Zen1 vs Zen2 isn't a simple model number comparison, so use
-+ * STIBP as a heuristic to distinguish.
- */
-- return;
-+ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+ return;
-+ good_rev = ~0U;
-+ break;
- }
-
- rdmsrl(MSR_AMD64_DE_CFG, val);
---
-2.42.0
-
diff --git a/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
deleted file mode 100644
index 10417ae..0000000
--- a/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Sep 2023 15:06:49 +0100
-Subject: [PATCH 31/55] x86/spec-ctrl: Fix confusion between
- SPEC_CTRL_EXIT_TO_XEN{,_IST}
-
-c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths")
-dropped the only user, leaving behind the (incorrect) implication that Xen had
-split exit paths.
-
-Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST
-to SPEC_CTRL_EXIT_TO_XEN for consistency.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++--------
- xen/arch/x86/x86_64/entry.S | 2 +-
- 2 files changed, 3 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index f23bb105c5..e8fd01243c 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -79,7 +79,6 @@
- * - SPEC_CTRL_ENTRY_FROM_PV
- * - SPEC_CTRL_ENTRY_FROM_INTR
- * - SPEC_CTRL_ENTRY_FROM_INTR_IST
-- * - SPEC_CTRL_EXIT_TO_XEN_IST
- * - SPEC_CTRL_EXIT_TO_XEN
- * - SPEC_CTRL_EXIT_TO_PV
- *
-@@ -268,11 +267,6 @@
- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
- X86_FEATURE_SC_MSR_PV
-
--/* Use when exiting to Xen context. */
--#define SPEC_CTRL_EXIT_TO_XEN \
-- ALTERNATIVE "", \
-- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV
--
- /* Use when exiting to PV guest context. */
- #define SPEC_CTRL_EXIT_TO_PV \
- ALTERNATIVE "", \
-@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- UNLIKELY_END(\@_serialise)
- .endm
-
--/* Use when exiting to Xen in IST context. */
--.macro SPEC_CTRL_EXIT_TO_XEN_IST
-+/* Use when exiting to Xen context. */
-+.macro SPEC_CTRL_EXIT_TO_XEN
- /*
- * Requires %rbx=stack_end
- * Clobbers %rax, %rcx, %rdx
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 7675a59ff0..b45a09823a 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3)
- UNLIKELY_END(exit_cr3)
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */
-+ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
-
- RESTORE_ALL adj=8
- iretq
---
-2.42.0
-
diff --git a/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
deleted file mode 100644
index a0c83da..0000000
--- a/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Sep 2023 17:03:16 +0100
-Subject: [PATCH 32/55] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into it's
- single user
-
-With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that
-there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into
-SPEC_CTRL_EXIT_TO_XEN to simplify further fixes.
-
-When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own
-is going to be too generic shortly.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++--------------
- 1 file changed, 16 insertions(+), 24 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index e8fd01243c..d5f65d80ea 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -211,27 +211,6 @@
- wrmsr
- .endm
-
--.macro DO_SPEC_CTRL_EXIT_TO_XEN
--/*
-- * Requires %rbx=stack_end
-- * Clobbers %rax, %rcx, %rdx
-- *
-- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
-- * in effect, and reload the shadow value. This covers race conditions which
-- * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
-- */
-- xor %edx, %edx
--
-- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-- jz .L\@_skip
--
-- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
-- mov $MSR_SPEC_CTRL, %ecx
-- wrmsr
--
--.L\@_skip:
--.endm
--
- .macro DO_SPEC_CTRL_EXIT_TO_GUEST
- /*
- * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
-@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- * Clobbers %rax, %rcx, %rdx
- */
- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-- jz .L\@_skip
-+ jz .L\@_skip_sc_msr
-
-- DO_SPEC_CTRL_EXIT_TO_XEN
-+ /*
-+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing
-+ * is in effect, and reload the shadow value. This covers race conditions
-+ * which exist with an NMI/MCE/etc hitting late in the return-to-guest
-+ * path.
-+ */
-+ xor %edx, %edx
-
--.L\@_skip:
-+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-+ jz .L\@_skip_sc_msr
-+
-+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
-+ mov $MSR_SPEC_CTRL, %ecx
-+ wrmsr
-+
-+.L\@_skip_sc_msr:
- .endm
-
- #endif /* __ASSEMBLY__ */
---
-2.42.0
-
diff --git a/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
deleted file mode 100644
index a278c5f..0000000
--- a/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 1 Sep 2023 11:38:44 +0100
-Subject: [PATCH 33/55] x86/spec-ctrl: Turn the remaining
- SPEC_CTRL_{ENTRY,EXIT}_* into asm macros
-
-These have grown more complex over time, with some already having been
-converted.
-
-Provide full Requires/Clobbers comments, otherwise missing at this level of
-indirection.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------
- 1 file changed, 28 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index d5f65d80ea..c6d5f2ad01 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -231,26 +231,45 @@
- .endm
-
- /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
--#define SPEC_CTRL_ENTRY_FROM_PV \
-+.macro SPEC_CTRL_ENTRY_FROM_PV
-+/*
-+ * Requires %rsp=regs/cpuinfo, %rdx=0
-+ * Clobbers %rax, %rcx, %rdx
-+ */
- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \
-- X86_FEATURE_IBPB_ENTRY_PV; \
-- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
-+ X86_FEATURE_IBPB_ENTRY_PV
-+
-+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
-+
- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \
- X86_FEATURE_SC_MSR_PV
-+.endm
-
- /* Use in interrupt/exception context. May interrupt Xen or PV context. */
--#define SPEC_CTRL_ENTRY_FROM_INTR \
-+.macro SPEC_CTRL_ENTRY_FROM_INTR
-+/*
-+ * Requires %rsp=regs, %r14=stack_end, %rdx=0
-+ * Clobbers %rax, %rcx, %rdx
-+ */
- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \
-- X86_FEATURE_IBPB_ENTRY_PV; \
-- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
-+ X86_FEATURE_IBPB_ENTRY_PV
-+
-+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
-+
- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
- X86_FEATURE_SC_MSR_PV
-+.endm
-
- /* Use when exiting to PV guest context. */
--#define SPEC_CTRL_EXIT_TO_PV \
-- ALTERNATIVE "", \
-- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \
-+.macro SPEC_CTRL_EXIT_TO_PV
-+/*
-+ * Requires %rax=spec_ctrl, %rsp=regs/info
-+ * Clobbers %rcx, %rdx
-+ */
-+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
-+
- DO_SPEC_CTRL_COND_VERW
-+.endm
-
- /*
- * Use in IST interrupt/exception context. May interrupt Xen or PV context.
---
-2.42.0
-
diff --git a/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
deleted file mode 100644
index f360cbd..0000000
--- a/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Aug 2023 20:11:50 +0100
-Subject: [PATCH 34/55] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_*
- comments
-
-... to better explain how they're used.
-
-Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the
-corner case when e.g. an NMI hits late in an exit-to-guest path.
-
-Leave a TODO, which will be addressed in subsequent patches which arrange for
-VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++----
- 1 file changed, 31 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index c6d5f2ad01..97c4db31cd 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -230,7 +230,10 @@
- wrmsr
- .endm
-
--/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
-+/*
-+ * Used after an entry from PV context: SYSCALL, SYSENTER, INT,
-+ * etc. There is always a guest speculation state in context.
-+ */
- .macro SPEC_CTRL_ENTRY_FROM_PV
- /*
- * Requires %rsp=regs/cpuinfo, %rdx=0
-@@ -245,7 +248,11 @@
- X86_FEATURE_SC_MSR_PV
- .endm
-
--/* Use in interrupt/exception context. May interrupt Xen or PV context. */
-+/*
-+ * Used after an exception or maskable interrupt, hitting Xen or PV context.
-+ * There will either be a guest speculation context, or (barring fatal
-+ * exceptions) a well-formed Xen speculation context.
-+ */
- .macro SPEC_CTRL_ENTRY_FROM_INTR
- /*
- * Requires %rsp=regs, %r14=stack_end, %rdx=0
-@@ -260,7 +267,10 @@
- X86_FEATURE_SC_MSR_PV
- .endm
-
--/* Use when exiting to PV guest context. */
-+/*
-+ * Used when exiting from any entry context, back to PV context. This
-+ * includes from an IST entry which moved onto the primary stack.
-+ */
- .macro SPEC_CTRL_EXIT_TO_PV
- /*
- * Requires %rax=spec_ctrl, %rsp=regs/info
-@@ -272,7 +282,13 @@
- .endm
-
- /*
-- * Use in IST interrupt/exception context. May interrupt Xen or PV context.
-+ * Used after an IST entry hitting Xen or PV context. Special care is needed,
-+ * because when hitting Xen context, there may not be a well-formed
-+ * speculation context. (i.e. it can hit in the middle of
-+ * SPEC_CTRL_{ENTRY,EXIT}_* regions.)
-+ *
-+ * An IST entry which hits PV context moves onto the primary stack and leaves
-+ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN.
- */
- .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
- /*
-@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- UNLIKELY_END(\@_serialise)
- .endm
-
--/* Use when exiting to Xen context. */
-+/*
-+ * Use when exiting from any entry context, back to Xen context. This
-+ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an
-+ * incomplete speculation context.
-+ *
-+ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we
-+ * need to treat this as if it were an EXIT_TO_$GUEST case too.
-+ */
- .macro SPEC_CTRL_EXIT_TO_XEN
- /*
- * Requires %rbx=stack_end
-@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- wrmsr
-
- .L\@_skip_sc_msr:
-+
-+ /* TODO VERW */
-+
- .endm
-
- #endif /* __ASSEMBLY__ */
---
-2.42.0
-
diff --git a/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
deleted file mode 100644
index fe2acaf..0000000
--- a/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 13 Sep 2023 13:48:16 +0100
-Subject: [PATCH 35/55] x86/entry: Adjust restore_all_xen to hold stack_end in
- %r14
-
-All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it
-for consistency.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++----
- xen/arch/x86/x86_64/entry.S | 8 ++++----
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index 97c4db31cd..66c706496f 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- */
- .macro SPEC_CTRL_EXIT_TO_XEN
- /*
-- * Requires %rbx=stack_end
-+ * Requires %r14=stack_end
- * Clobbers %rax, %rcx, %rdx
- */
-- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
- jz .L\@_skip_sc_msr
-
- /*
-@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- */
- xor %edx, %edx
-
-- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
- jz .L\@_skip_sc_msr
-
-- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
-+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
- mov $MSR_SPEC_CTRL, %ecx
- wrmsr
-
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index b45a09823a..92279a225d 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -665,15 +665,15 @@ restore_all_xen:
- * Check whether we need to switch to the per-CPU page tables, in
- * case we return to late PV exit code (from an NMI or #MC).
- */
-- GET_STACK_END(bx)
-- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx)
-+ GET_STACK_END(14)
-+ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
- UNLIKELY_START(ne, exit_cr3)
-- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
-+ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax
- mov %rax, %cr3
- UNLIKELY_END(exit_cr3)
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
-+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
-
- RESTORE_ALL adj=8
- iretq
---
-2.42.0
-
diff --git a/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
deleted file mode 100644
index ba7ea21..0000000
--- a/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
+++ /dev/null
@@ -1,109 +0,0 @@
-From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 13 Sep 2023 12:20:12 +0100
-Subject: [PATCH 36/55] x86/entry: Track the IST-ness of an entry for the exit
- paths
-
-Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the
-entry/exit asm, so it only needs setting in the IST path.
-
-As this is subtle and fragile, add check_ist_exit() to be used in debugging
-builds to cross-check that the ist_exit boolean matches the entry vector.
-
-Write check_ist_exit() it in C, because it's debug only and the logic more
-complicated than I care to maintain in asm.
-
-For now, we only need to use this signal in the exit-to-Xen path, but some
-exit-to-guest paths happen in IST context too. Check the correctness in all
-exit paths to avoid the logic bit-rotting.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c)
-
-x86/entry: Partially revert IST-exit checks
-
-The patch adding check_ist_exit() didn't account for the fact that
-reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in
-%r12 doesn't survive into the next context, and is a stale value C.
-
-This shows up in Gitlab CI for the Clang build:
-
- https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827
-
-and in OSSTest for GCC 8:
-
- http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log
-
-There's no straightforward way to reconstruct the IST-exit-ness on the
-exit-to-guest path after a context switch. For now, we only need IST-exit on
-the return-to-Xen path.
-
-Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9)
----
- xen/arch/x86/traps.c | 13 +++++++++++++
- xen/arch/x86/x86_64/entry.S | 13 ++++++++++++-
- 2 files changed, 25 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index d12004b1c6..e65cc60041 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr)
- do_softirq();
- }
-
-+#ifdef CONFIG_DEBUG
-+void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit)
-+{
-+ const unsigned int ist_mask =
-+ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) |
-+ (1U << X86_EXC_DF) | (1U << X86_EXC_MC);
-+ uint8_t ev = regs->entry_vector;
-+ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask);
-+
-+ ASSERT(is_ist == ist_exit);
-+}
-+#endif
-+
- /*
- * Local variables:
- * mode: C
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 92279a225d..4cebc4fbe3 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -659,8 +659,15 @@ ENTRY(early_page_fault)
- .section .text.entry, "ax", @progbits
-
- ALIGN
--/* No special register assumptions. */
-+/* %r12=ist_exit */
- restore_all_xen:
-+
-+#ifdef CONFIG_DEBUG
-+ mov %rsp, %rdi
-+ mov %r12, %rsi
-+ call check_ist_exit
-+#endif
-+
- /*
- * Check whether we need to switch to the per-CPU page tables, in
- * case we return to late PV exit code (from an NMI or #MC).
-@@ -1091,6 +1098,10 @@ handle_ist_exception:
- .L_ist_dispatch_done:
- mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
- mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
-+
-+ /* This is an IST exit */
-+ mov $1, %r12d
-+
- cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
- jne ret_from_intr
-
---
-2.42.0
-
diff --git a/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
deleted file mode 100644
index 6580907..0000000
--- a/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 13 Sep 2023 13:53:33 +0100
-Subject: [PATCH 37/55] x86/spec-ctrl: Issue VERW during IST exit to Xen
-
-There is a corner case where e.g. an NMI hitting an exit-to-guest path after
-SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW
-flush to scrub potentially sensitive data from uarch buffers.
-
-In order to compensate, issue VERW when exiting to Xen from an IST entry.
-
-SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack,
-and we're about to add a third. Load the field into %ebx, and list the
-register as clobbered.
-
-%r12 has been arranged to be the ist_exit signal, so add this as an input
-dependency and use it to identify when to issue a VERW.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++-----
- xen/arch/x86/x86_64/entry.S | 2 +-
- 2 files changed, 16 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index 66c706496f..28a75796e6 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- */
- .macro SPEC_CTRL_EXIT_TO_XEN
- /*
-- * Requires %r14=stack_end
-- * Clobbers %rax, %rcx, %rdx
-+ * Requires %r12=ist_exit, %r14=stack_end
-+ * Clobbers %rax, %rbx, %rcx, %rdx
- */
-- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
-+
-+ testb $SCF_ist_sc_msr, %bl
- jz .L\@_skip_sc_msr
-
- /*
-@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- */
- xor %edx, %edx
-
-- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-+ testb $SCF_use_shadow, %bl
- jz .L\@_skip_sc_msr
-
- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
-@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-
- .L\@_skip_sc_msr:
-
-- /* TODO VERW */
-+ test %r12, %r12
-+ jz .L\@_skip_ist_exit
-+
-+ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
-+ testb $SCF_verw, %bl
-+ jz .L\@_skip_verw
-+ verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
-+.L\@_skip_verw:
-
-+.L\@_skip_ist_exit:
- .endm
-
- #endif /* __ASSEMBLY__ */
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 4cebc4fbe3..c12e011b4d 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3)
- UNLIKELY_END(exit_cr3)
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
-+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */
-
- RESTORE_ALL adj=8
- iretq
---
-2.42.0
-
diff --git a/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
deleted file mode 100644
index 6f2cdcb..0000000
--- a/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 15 Sep 2023 12:13:51 +0100
-Subject: [PATCH 38/55] x86/amd: Introduce is_zen{1,2}_uarch() predicates
-
-We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to
-introduce a 4th. Wrap the heuristic into a pair of predicates rather than
-opencoding it, and the explanation of the heuristic, at each usage site.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705)
----
- xen/arch/x86/cpu/amd.c | 18 ++++--------------
- xen/arch/x86/include/asm/amd.h | 11 +++++++++++
- 2 files changed, 15 insertions(+), 14 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index 1bb3044be1..e94ba5a0e0 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable)
- * non-branch instructions to be ignored. It is to be set unilaterally in
- * newer microcode.
- *
-- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a
-- * simple model number comparison, so use STIBP as a heuristic to separate the
-- * two uarches in Fam17h(AMD)/18h(Hygon).
-+ * This chickenbit is something unrelated on Zen1.
- */
- void amd_init_spectral_chicken(void)
- {
- uint64_t val, chickenbit = 1 << 1;
-
-- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+ if (cpu_has_hypervisor || !is_zen2_uarch())
- return;
-
- if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit))
-@@ -912,11 +910,8 @@ void amd_check_zenbleed(void)
- * With the Fam17h check above, most parts getting here are
- * Zen1. They're not affected. Assume Zen2 ones making it
- * here are affected regardless of microcode version.
-- *
-- * Zen1 vs Zen2 isn't a simple model number comparison, so use
-- * STIBP as a heuristic to distinguish.
- */
-- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+ if (is_zen1_uarch())
- return;
- good_rev = ~0U;
- break;
-@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void)
- */
- s_time_t delta;
-
-- /*
-- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as
-- * a heuristic to separate the two uarches in Fam17h.
-- */
-- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 ||
-- !boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
- return 0;
-
- /*
-diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h
-index a975d3de26..82324110ab 100644
---- a/xen/arch/x86/include/asm/amd.h
-+++ b/xen/arch/x86/include/asm/amd.h
-@@ -140,6 +140,17 @@
- AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
- AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
-
-+/*
-+ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and
-+ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP
-+ * as a heuristic that distinguishes the two.
-+ *
-+ * The caller is required to perform the appropriate vendor/family checks
-+ * first.
-+ */
-+#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP)
-+
- struct cpuinfo_x86;
- int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
-
---
-2.42.0
-
diff --git a/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
deleted file mode 100644
index 4b23d12..0000000
--- a/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
+++ /dev/null
@@ -1,228 +0,0 @@
-From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Aug 2023 20:24:25 +0100
-Subject: [PATCH 39/55] x86/spec-ctrl: Mitigate the Zen1 DIV leakage
-
-In the Zen1 microarchitecure, there is one divider in the pipeline which
-services uops from both threads. In the case of #DE, the latched result from
-the previous DIV to execute will be forwarded speculatively.
-
-This is an interesting covert channel that allows two threads to communicate
-without any system calls. In also allows userspace to obtain the result of
-the most recent DIV instruction executed (even speculatively) in the core,
-which can be from a higher privilege context.
-
-Scrub the result from the divider by executing a non-faulting divide. This
-needs performing on the exit-to-guest paths, and ist_exit-to-Xen.
-
-Alternatives in IST context is believed safe now that it's done in NMI
-context.
-
-This is XSA-439 / CVE-2023-20588.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315)
----
- docs/misc/xen-command-line.pandoc | 6 ++-
- xen/arch/x86/hvm/svm/entry.S | 1 +
- xen/arch/x86/include/asm/cpufeatures.h | 2 +-
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++
- xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++-
- 5 files changed, 71 insertions(+), 3 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index d9dae740cc..b92c8f969c 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- > {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- > eager-fpu,l1d-flush,branch-harden,srb-lock,
--> unpriv-mmio,gds-mit}=<bool> ]`
-+> unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
-
- Controls for speculative execution sidechannel mitigations. By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate
- GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use
- of the AVX2 Gather instructions.
-
-+On all hardware, the `div-scrub=` option can be used to force or prevent Xen
-+from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate
-+DIV-leakage on hardware believed to be vulnerable.
-+
- ### sync_console
- > `= <boolean>`
-
-diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
-index 981cd82e7c..934f12cf5c 100644
---- a/xen/arch/x86/hvm/svm/entry.S
-+++ b/xen/arch/x86/hvm/svm/entry.S
-@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap)
- 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */
- .endm
- ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM
-+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
-
- pop %r15
- pop %r14
-diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
-index da0593de85..c3aad21c3b 100644
---- a/xen/arch/x86/include/asm/cpufeatures.h
-+++ b/xen/arch/x86/include/asm/cpufeatures.h
-@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM
- XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
- XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
- XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
--/* Bits 23 unused. */
-+XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */
- XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */
- XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */
- XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index 28a75796e6..f4b8b9d956 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -177,6 +177,19 @@
- .L\@_verw_skip:
- .endm
-
-+.macro DO_SPEC_CTRL_DIV
-+/*
-+ * Requires nothing
-+ * Clobbers %rax
-+ *
-+ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any
-+ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber
-+ * %rdx.
-+ */
-+ mov $1, %eax
-+ div %al
-+.endm
-+
- .macro DO_SPEC_CTRL_ENTRY maybexen:req
- /*
- * Requires %rsp=regs (also cpuinfo if !maybexen)
-@@ -279,6 +292,8 @@
- ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
-
- DO_SPEC_CTRL_COND_VERW
-+
-+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
- .endm
-
- /*
-@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
- .L\@_skip_verw:
-
-+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
-+
- .L\@_skip_ist_exit:
- .endm
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 79b98f0fe7..0ff3c895ac 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1;
- static bool __initdata opt_unpriv_mmio;
- static bool __ro_after_init opt_fb_clear_mmio;
- static int8_t __initdata opt_gds_mit = -1;
-+static int8_t __initdata opt_div_scrub = -1;
-
- static int __init cf_check parse_spec_ctrl(const char *s)
- {
-@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- opt_srb_lock = 0;
- opt_unpriv_mmio = false;
- opt_gds_mit = 0;
-+ opt_div_scrub = 0;
- }
- else if ( val > 0 )
- rc = -EINVAL;
-@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- opt_unpriv_mmio = val;
- else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 )
- opt_gds_mit = val;
-+ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 )
-+ opt_div_scrub = val;
- else
- rc = -EINVAL;
-
-@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk)
- "\n");
-
- /* Settings for Xen's protection, irrespective of guests. */
-- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n",
-+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
- thunk == THUNK_NONE ? "N/A" :
- thunk == THUNK_RETPOLINE ? "RETPOLINE" :
- thunk == THUNK_LFENCE ? "LFENCE" :
-@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk)
- opt_l1d_flush ? " L1D_FLUSH" : "",
- opt_md_clear_pv || opt_md_clear_hvm ||
- opt_fb_clear_mmio ? " VERW" : "",
-+ opt_div_scrub ? " DIV" : "",
- opt_branch_harden ? " BRANCH_HARDEN" : "");
-
- /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
-@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled)
- setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
- }
-
-+/*
-+ * The Div leakage issue is specific to the AMD Zen1 microarchitecure.
-+ *
-+ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no
-+ * hope of spotting the case where we might move to vulnerable hardware. We
-+ * also can't make any useful conclusion about SMT-ness.
-+ *
-+ * Don't check the hypervisor bit, so at least we do the safe thing when
-+ * booting on something that looks like a Zen1 CPU.
-+ */
-+static bool __init has_div_vuln(void)
-+{
-+ if ( !(boot_cpu_data.x86_vendor &
-+ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
-+ return false;
-+
-+ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 )
-+ return false;
-+
-+ return is_zen1_uarch();
-+}
-+
-+static void __init div_calculations(bool hw_smt_enabled)
-+{
-+ bool cpu_bug_div = has_div_vuln();
-+
-+ if ( opt_div_scrub == -1 )
-+ opt_div_scrub = cpu_bug_div;
-+
-+ if ( opt_div_scrub )
-+ setup_force_cpu_cap(X86_FEATURE_SC_DIV);
-+
-+ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled )
-+ warning_add(
-+ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n"
-+ "enabled. Please assess your configuration and choose an\n"
-+ "explicit 'smt=<bool>' setting. See XSA-439.\n");
-+}
-+
- static void __init ibpb_calculations(void)
- {
- bool def_ibpb_entry = false;
-@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void)
-
- ibpb_calculations();
-
-+ div_calculations(hw_smt_enabled);
-+
- /* Check whether Eager FPU should be enabled by default. */
- if ( opt_eager_fpu == -1 )
- opt_eager_fpu = should_use_eager_fpu();
---
-2.42.0
-
diff --git a/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch b/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
deleted file mode 100644
index 21fb16f..0000000
--- a/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
+++ /dev/null
@@ -1,455 +0,0 @@
-From 90c540c58985dc774cf0a1d2dc423473d3f37267 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <JBeulich@suse.com>
-Date: Wed, 20 Sep 2023 10:33:26 +0100
-Subject: [PATCH 40/55] x86/shadow: defer releasing of PV's top-level shadow
- reference
-
-sh_set_toplevel_shadow() re-pinning the top-level shadow we may be
-running on is not enough (and at the same time unnecessary when the
-shadow isn't what we're running on): That shadow becomes eligible for
-blowing away (from e.g. shadow_prealloc()) immediately after the
-paging lock was dropped. Yet it needs to remain valid until the actual
-page table switch occurred.
-
-Propagate up the call chain the shadow entry that needs releasing
-eventually, and carry out the release immediately after switching page
-tables. Handle update_cr3() failures by switching to idle pagetables.
-Note that various further uses of update_cr3() are HVM-only or only act
-on paused vCPU-s, in which case sh_set_toplevel_shadow() will not defer
-releasing of the reference.
-
-While changing the update_cr3() hook, also convert the "do_locking"
-parameter to boolean.
-
-This is CVE-2023-34322 / XSA-438.
-
-Reported-by: Tim Deegan <tim@xen.org>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@cloud.com>
-(cherry picked from commit fb0ff49fe9f784bfee0370c2a3c5f20e39d7a1cb)
----
- xen/arch/x86/include/asm/mm.h | 2 +-
- xen/arch/x86/include/asm/paging.h | 6 ++--
- xen/arch/x86/include/asm/shadow.h | 8 +++++
- xen/arch/x86/mm.c | 27 +++++++++++----
- xen/arch/x86/mm/hap/hap.c | 6 ++--
- xen/arch/x86/mm/shadow/common.c | 55 ++++++++++++++++++++-----------
- xen/arch/x86/mm/shadow/multi.c | 33 ++++++++++++-------
- xen/arch/x86/mm/shadow/none.c | 4 ++-
- xen/arch/x86/mm/shadow/private.h | 14 ++++----
- xen/arch/x86/pv/domain.c | 25 ++++++++++++--
- 10 files changed, 127 insertions(+), 53 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
-index d723c7c38f..a5d7fdd32e 100644
---- a/xen/arch/x86/include/asm/mm.h
-+++ b/xen/arch/x86/include/asm/mm.h
-@@ -552,7 +552,7 @@ void audit_domains(void);
- #endif
-
- void make_cr3(struct vcpu *v, mfn_t mfn);
--void update_cr3(struct vcpu *v);
-+pagetable_t update_cr3(struct vcpu *v);
- int vcpu_destroy_pagetables(struct vcpu *);
- void *do_page_walk(struct vcpu *v, unsigned long addr);
-
-diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
-index 6f7000d5f4..94c590f31a 100644
---- a/xen/arch/x86/include/asm/paging.h
-+++ b/xen/arch/x86/include/asm/paging.h
-@@ -138,7 +138,7 @@ struct paging_mode {
- paddr_t ga, uint32_t *pfec,
- unsigned int *page_order);
- #endif
-- void (*update_cr3 )(struct vcpu *v, int do_locking,
-+ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
- bool noflush);
- void (*update_paging_modes )(struct vcpu *v);
- bool (*flush_tlb )(const unsigned long *vcpu_bitmap);
-@@ -310,9 +310,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
- /* Update all the things that are derived from the guest's CR3.
- * Called when the guest changes CR3; the caller can then use v->arch.cr3
- * as the value to load into the host CR3 to schedule this vcpu */
--static inline void paging_update_cr3(struct vcpu *v, bool noflush)
-+static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
- {
-- paging_get_hostmode(v)->update_cr3(v, 1, noflush);
-+ return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
- }
-
- /* Update all the things that are derived from the guest's CR0/CR3/CR4.
-diff --git a/xen/arch/x86/include/asm/shadow.h b/xen/arch/x86/include/asm/shadow.h
-index dad876d294..0b72c9eda8 100644
---- a/xen/arch/x86/include/asm/shadow.h
-+++ b/xen/arch/x86/include/asm/shadow.h
-@@ -99,6 +99,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages,
-
- int shadow_get_allocation_bytes(struct domain *d, uint64_t *size);
-
-+/* Helper to invoke for deferred releasing of a top-level shadow's reference. */
-+void shadow_put_top_level(struct domain *d, pagetable_t old);
-+
- #else /* !CONFIG_SHADOW_PAGING */
-
- #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
-@@ -121,6 +124,11 @@ static inline void shadow_prepare_page_type_change(struct domain *d,
-
- static inline void shadow_blow_tables_per_domain(struct domain *d) {}
-
-+static inline void shadow_put_top_level(struct domain *d, pagetable_t old)
-+{
-+ ASSERT_UNREACHABLE();
-+}
-+
- static inline int shadow_domctl(struct domain *d,
- struct xen_domctl_shadow_op *sc,
- XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index b46eee1332..e884a6fdbd 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -567,15 +567,12 @@ void write_ptbase(struct vcpu *v)
- *
- * Update ref counts to shadow tables appropriately.
- */
--void update_cr3(struct vcpu *v)
-+pagetable_t update_cr3(struct vcpu *v)
- {
- mfn_t cr3_mfn;
-
- if ( paging_mode_enabled(v->domain) )
-- {
-- paging_update_cr3(v, false);
-- return;
-- }
-+ return paging_update_cr3(v, false);
-
- if ( !(v->arch.flags & TF_kernel_mode) )
- cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user);
-@@ -583,6 +580,8 @@ void update_cr3(struct vcpu *v)
- cr3_mfn = pagetable_get_mfn(v->arch.guest_table);
-
- make_cr3(v, cr3_mfn);
-+
-+ return pagetable_null();
- }
-
- static inline void set_tlbflush_timestamp(struct page_info *page)
-@@ -3285,6 +3284,7 @@ int new_guest_cr3(mfn_t mfn)
- struct domain *d = curr->domain;
- int rc;
- mfn_t old_base_mfn;
-+ pagetable_t old_shadow;
-
- if ( is_pv_32bit_domain(d) )
- {
-@@ -3352,9 +3352,22 @@ int new_guest_cr3(mfn_t mfn)
- if ( !VM_ASSIST(d, m2p_strict) )
- fill_ro_mpt(mfn);
- curr->arch.guest_table = pagetable_from_mfn(mfn);
-- update_cr3(curr);
-+ old_shadow = update_cr3(curr);
-+
-+ /*
-+ * In shadow mode update_cr3() can fail, in which case here we're still
-+ * running on the prior top-level shadow (which we're about to release).
-+ * Switch to the idle page tables in such an event; the guest will have
-+ * been crashed already.
-+ */
-+ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow),
-+ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) )
-+ write_ptbase(curr);
-+ else
-+ write_ptbase(idle_vcpu[curr->processor]);
-
-- write_ptbase(curr);
-+ if ( !pagetable_is_null(old_shadow) )
-+ shadow_put_top_level(d, old_shadow);
-
- if ( likely(mfn_x(old_base_mfn) != 0) )
- {
-diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
-index 0fc1b1d9ac..57a19c3d59 100644
---- a/xen/arch/x86/mm/hap/hap.c
-+++ b/xen/arch/x86/mm/hap/hap.c
-@@ -739,11 +739,13 @@ static bool cf_check hap_invlpg(struct vcpu *v, unsigned long linear)
- return 1;
- }
-
--static void cf_check hap_update_cr3(
-- struct vcpu *v, int do_locking, bool noflush)
-+static pagetable_t cf_check hap_update_cr3(
-+ struct vcpu *v, bool do_locking, bool noflush)
- {
- v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
- hvm_update_guest_cr3(v, noflush);
-+
-+ return pagetable_null();
- }
-
- static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap)
-diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
-index cf5e181f74..c0940f939e 100644
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -2590,13 +2590,13 @@ void cf_check shadow_update_paging_modes(struct vcpu *v)
- }
-
- /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
--void sh_set_toplevel_shadow(struct vcpu *v,
-- unsigned int slot,
-- mfn_t gmfn,
-- unsigned int root_type,
-- mfn_t (*make_shadow)(struct vcpu *v,
-- mfn_t gmfn,
-- uint32_t shadow_type))
-+pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
-+ unsigned int slot,
-+ mfn_t gmfn,
-+ unsigned int root_type,
-+ mfn_t (*make_shadow)(struct vcpu *v,
-+ mfn_t gmfn,
-+ uint32_t shadow_type))
- {
- mfn_t smfn;
- pagetable_t old_entry, new_entry;
-@@ -2653,20 +2653,37 @@ void sh_set_toplevel_shadow(struct vcpu *v,
- mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
- v->arch.paging.shadow.shadow_table[slot] = new_entry;
-
-- /* Decrement the refcount of the old contents of this slot */
-- if ( !pagetable_is_null(old_entry) )
-+ /*
-+ * Decrement the refcount of the old contents of this slot, unless
-+ * we're still running on that shadow - in that case it'll need holding
-+ * on to until the actual page table switch did occur.
-+ */
-+ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) )
- {
-- mfn_t old_smfn = pagetable_get_mfn(old_entry);
-- /* Need to repin the old toplevel shadow if it's been unpinned
-- * by shadow_prealloc(): in PV mode we're still running on this
-- * shadow and it's not safe to free it yet. */
-- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) )
-- {
-- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn));
-- domain_crash(d);
-- }
-- sh_put_ref(d, old_smfn, 0);
-+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
-+ old_entry = pagetable_null();
- }
-+
-+ /*
-+ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run
-+ * on such a shadow, so only call sites requesting an L4 shadow need to pay
-+ * attention to the returned value.
-+ */
-+ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow);
-+
-+ return old_entry;
-+}
-+
-+/*
-+ * Helper invoked when releasing of a top-level shadow's reference was
-+ * deferred in sh_set_toplevel_shadow() above.
-+ */
-+void shadow_put_top_level(struct domain *d, pagetable_t old_entry)
-+{
-+ ASSERT(!pagetable_is_null(old_entry));
-+ paging_lock(d);
-+ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
-+ paging_unlock(d);
- }
-
- /**************************************************************************/
-diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
-index 671bf8c228..c92b354a78 100644
---- a/xen/arch/x86/mm/shadow/multi.c
-+++ b/xen/arch/x86/mm/shadow/multi.c
-@@ -3224,7 +3224,8 @@ static void cf_check sh_detach_old_tables(struct vcpu *v)
- }
- }
-
--static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
-+static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
-+ bool noflush)
- /* Updates vcpu->arch.cr3 after the guest has changed CR3.
- * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
- * if appropriate).
-@@ -3238,6 +3239,7 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
- {
- struct domain *d = v->domain;
- mfn_t gmfn;
-+ pagetable_t old_entry = pagetable_null();
- #if GUEST_PAGING_LEVELS == 3
- const guest_l3e_t *gl3e;
- unsigned int i, guest_idx;
-@@ -3247,7 +3249,7 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
- if ( !is_hvm_domain(d) && !v->is_initialised )
- {
- ASSERT(v->arch.cr3 == 0);
-- return;
-+ return old_entry;
- }
-
- if ( do_locking ) paging_lock(v->domain);
-@@ -3320,11 +3322,12 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
- #if GUEST_PAGING_LEVELS == 4
- if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
- guest_flush_tlb_mask(d, d->dirty_cpumask);
-- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
-+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow,
-+ sh_make_shadow);
- if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
- {
- ASSERT(d->is_dying || d->is_shutting_down);
-- return;
-+ return old_entry;
- }
- if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
- {
-@@ -3368,24 +3371,30 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
- gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
- if ( p2m_is_ram(p2mt) )
-- sh_set_toplevel_shadow(v, i, gl2mfn, SH_type_l2_shadow,
-- sh_make_shadow);
-+ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn,
-+ SH_type_l2_shadow,
-+ sh_make_shadow);
- else
-- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
-- sh_make_shadow);
-+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
-+ sh_make_shadow);
- }
- else
-- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow);
-+ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
-+ sh_make_shadow);
-+
-+ ASSERT(pagetable_is_null(old_entry));
- }
- }
- #elif GUEST_PAGING_LEVELS == 2
- if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
- guest_flush_tlb_mask(d, d->dirty_cpumask);
-- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
-+ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow,
-+ sh_make_shadow);
-+ ASSERT(pagetable_is_null(old_entry));
- if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
- {
- ASSERT(d->is_dying || d->is_shutting_down);
-- return;
-+ return old_entry;
- }
- #else
- #error This should never happen
-@@ -3473,6 +3482,8 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
-
- /* Release the lock, if we took it (otherwise it's the caller's problem) */
- if ( do_locking ) paging_unlock(v->domain);
-+
-+ return old_entry;
- }
-
-
-diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
-index eaaa874b11..743c0ffb85 100644
---- a/xen/arch/x86/mm/shadow/none.c
-+++ b/xen/arch/x86/mm/shadow/none.c
-@@ -52,9 +52,11 @@ static unsigned long cf_check _gva_to_gfn(
- }
- #endif
-
--static void cf_check _update_cr3(struct vcpu *v, int do_locking, bool noflush)
-+static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking,
-+ bool noflush)
- {
- ASSERT_UNREACHABLE();
-+ return pagetable_null();
- }
-
- static void cf_check _update_paging_modes(struct vcpu *v)
-diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
-index c2bb1ed3c3..91f798c5aa 100644
---- a/xen/arch/x86/mm/shadow/private.h
-+++ b/xen/arch/x86/mm/shadow/private.h
-@@ -391,13 +391,13 @@ mfn_t shadow_alloc(struct domain *d,
- void shadow_free(struct domain *d, mfn_t smfn);
-
- /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
--void sh_set_toplevel_shadow(struct vcpu *v,
-- unsigned int slot,
-- mfn_t gmfn,
-- unsigned int root_type,
-- mfn_t (*make_shadow)(struct vcpu *v,
-- mfn_t gmfn,
-- uint32_t shadow_type));
-+pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
-+ unsigned int slot,
-+ mfn_t gmfn,
-+ unsigned int root_type,
-+ mfn_t (*make_shadow)(struct vcpu *v,
-+ mfn_t gmfn,
-+ uint32_t shadow_type));
-
- /* Update the shadows in response to a pagetable write from Xen */
- int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size);
-diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
-index 5c92812dc6..2a445bb17b 100644
---- a/xen/arch/x86/pv/domain.c
-+++ b/xen/arch/x86/pv/domain.c
-@@ -424,10 +424,13 @@ bool __init xpti_pcid_enabled(void)
-
- static void _toggle_guest_pt(struct vcpu *v)
- {
-+ bool guest_update;
-+ pagetable_t old_shadow;
- unsigned long cr3;
-
- v->arch.flags ^= TF_kernel_mode;
-- update_cr3(v);
-+ guest_update = v->arch.flags & TF_kernel_mode;
-+ old_shadow = update_cr3(v);
-
- /*
- * Don't flush user global mappings from the TLB. Don't tick TLB clock.
-@@ -436,13 +439,31 @@ static void _toggle_guest_pt(struct vcpu *v)
- * TLB flush (for just the incoming PCID), as the top level page table may
- * have changed behind our backs. To be on the safe side, suppress the
- * no-flush unconditionally in this case.
-+ *
-+ * Furthermore in shadow mode update_cr3() can fail, in which case here
-+ * we're still running on the prior top-level shadow (which we're about
-+ * to release). Switch to the idle page tables in such an event; the
-+ * guest will have been crashed already.
- */
- cr3 = v->arch.cr3;
- if ( shadow_mode_enabled(v->domain) )
-+ {
- cr3 &= ~X86_CR3_NOFLUSH;
-+
-+ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow),
-+ maddr_to_mfn(cr3))) )
-+ {
-+ cr3 = idle_vcpu[v->processor]->arch.cr3;
-+ /* Also suppress runstate/time area updates below. */
-+ guest_update = false;
-+ }
-+ }
- write_cr3(cr3);
-
-- if ( !(v->arch.flags & TF_kernel_mode) )
-+ if ( !pagetable_is_null(old_shadow) )
-+ shadow_put_top_level(v->domain, old_shadow);
-+
-+ if ( !guest_update )
- return;
-
- if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) )
---
-2.42.0
-
diff --git a/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch b/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
deleted file mode 100644
index 1edecc8..0000000
--- a/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From c4e05c97f57d236040d1da5c1fbf6e3699dc86ea Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Fri, 22 Sep 2023 11:32:16 +0100
-Subject: [PATCH 41/55] tools/xenstored: domain_entry_fix(): Handle conflicting
- transaction
-
-The function domain_entry_fix() will be initially called to check if the
-quota is correct before attempt to commit any nodes. So it would be
-possible that accounting is temporarily negative. This is the case
-in the following sequence:
-
- 1) Create 50 nodes
- 2) Start two transactions
- 3) Delete all the nodes in each transaction
- 4) Commit the two transactions
-
-Because the first transaction will have succeed and updated the
-accounting, there is no guarantee that 'd->nbentry + num' will still
-be above 0. So the assert() would be triggered.
-The assert() was introduced in dbef1f748289 ("tools/xenstore: simplify
-and fix per domain node accounting") with the assumption that the
-value can't be negative. As this is not true revert to the original
-check but restricted to the path where we don't update. Take the
-opportunity to explain the rationale behind the check.
-
-This CVE-2023-34323 / XSA-440.
-
-Fixes: dbef1f748289 ("tools/xenstore: simplify and fix per domain node accounting")
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
----
- tools/xenstore/xenstored_domain.c | 14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
-index aa86892fed..6074df210c 100644
---- a/tools/xenstore/xenstored_domain.c
-+++ b/tools/xenstore/xenstored_domain.c
-@@ -1094,10 +1094,20 @@ int domain_entry_fix(unsigned int domid, int num, bool update)
- }
-
- cnt = d->nbentry + num;
-- assert(cnt >= 0);
-
-- if (update)
-+ if (update) {
-+ assert(cnt >= 0);
- d->nbentry = cnt;
-+ } else if (cnt < 0) {
-+ /*
-+ * In a transaction when a node is being added/removed AND
-+ * the same node has been added/removed outside the
-+ * transaction in parallel, the result value may be negative.
-+ * This is no problem, as the transaction will fail due to
-+ * the resulting conflict. So override 'cnt'.
-+ */
-+ cnt = 0;
-+ }
-
- return domid_is_unprivileged(domid) ? cnt : 0;
- }
---
-2.42.0
-
diff --git a/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch b/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
deleted file mode 100644
index 66597c2..0000000
--- a/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
+++ /dev/null
@@ -1,186 +0,0 @@
-From 0d8f9f7f2706e8ad8dfff203173693b631339b86 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Tue, 13 Jun 2023 15:01:05 +0200
-Subject: [PATCH 42/55] iommu/amd-vi: flush IOMMU TLB when flushing the DTE
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The caching invalidation guidelines from the AMD-Vi specification (48882—Rev
-3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will
-malfunction (see stale DMA mappings) if some fields of the DTE are updated but
-the IOMMU TLB is not flushed. This has been observed in practice on AMD
-systems. Due to the lack of guidance from the currently published
-specification this patch aims to increase the flushing done in order to prevent
-device malfunction.
-
-In order to fix, issue an INVALIDATE_IOMMU_PAGES command from
-amd_iommu_flush_device(), flushing all the address space. Note this requires
-callers to be adjusted in order to pass the DomID on the DTE previous to the
-modification.
-
-Some call sites don't provide a valid DomID to amd_iommu_flush_device() in
-order to avoid the flush. That's because the device had address translations
-disabled and hence the previous DomID on the DTE is not valid. Note the
-current logic relies on the entity disabling address translations to also flush
-the TLB of the in use DomID.
-
-Device I/O TLB flushing when ATS are enabled is not covered by the current
-change, as ATS usage is not security supported.
-
-This is XSA-442 / CVE-2023-34326
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 5fc98b97084a46884acef9320e643faf40d42212)
----
- xen/drivers/passthrough/amd/iommu.h | 3 ++-
- xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++-
- xen/drivers/passthrough/amd/iommu_guest.c | 5 +++--
- xen/drivers/passthrough/amd/iommu_init.c | 6 +++++-
- xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++----
- 5 files changed, 29 insertions(+), 9 deletions(-)
-
-diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
-index 5429ada58e..a58be28bf9 100644
---- a/xen/drivers/passthrough/amd/iommu.h
-+++ b/xen/drivers/passthrough/amd/iommu.h
-@@ -283,7 +283,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn,
- unsigned int order);
- void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
- uint64_t gaddr, unsigned int order);
--void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
-+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
-+ domid_t domid);
- void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
- void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
-
-diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c
-index 40ddf366bb..cb28b36abc 100644
---- a/xen/drivers/passthrough/amd/iommu_cmd.c
-+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
-@@ -363,10 +363,18 @@ void amd_iommu_flush_pages(struct domain *d,
- _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order);
- }
-
--void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf)
-+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
-+ domid_t domid)
- {
- invalidate_dev_table_entry(iommu, bdf);
- flush_command_buffer(iommu, 0);
-+
-+ /* Also invalidate IOMMU TLB entries when flushing the DTE. */
-+ if ( domid != DOMID_INVALID )
-+ {
-+ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0);
-+ flush_command_buffer(iommu, 0);
-+ }
- }
-
- void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
-diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c
-index 80a331f546..be86bce6fb 100644
---- a/xen/drivers/passthrough/amd/iommu_guest.c
-+++ b/xen/drivers/passthrough/amd/iommu_guest.c
-@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd)
-
- static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
- {
-- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id;
-+ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid;
- struct amd_iommu_dte *gdte, *mdte, *dte_base;
- struct amd_iommu *iommu = NULL;
- struct guest_iommu *g_iommu;
-@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
- req_id = get_dma_requestor_id(iommu->seg, mbdf);
- dte_base = iommu->dev_table.buffer;
- mdte = &dte_base[req_id];
-+ prev_domid = mdte->domain_id;
-
- spin_lock_irqsave(&iommu->lock, flags);
- dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
-- amd_iommu_flush_device(iommu, req_id);
-+ amd_iommu_flush_device(iommu, req_id, prev_domid);
-
- return 0;
- }
-diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
-index 166570648d..101a60ce17 100644
---- a/xen/drivers/passthrough/amd/iommu_init.c
-+++ b/xen/drivers/passthrough/amd/iommu_init.c
-@@ -1547,7 +1547,11 @@ static int cf_check _invalidate_all_devices(
- req_id = ivrs_mappings[bdf].dte_requestor_id;
- if ( iommu )
- {
-- amd_iommu_flush_device(iommu, req_id);
-+ /*
-+ * IOMMU TLB flush performed separately (see
-+ * invalidate_all_domain_pages()).
-+ */
-+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
- amd_iommu_flush_intremap(iommu, req_id);
- }
- }
-diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-index 94e3775506..8641b84712 100644
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device(
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
-- amd_iommu_flush_device(iommu, req_id);
-+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
-+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
- }
- else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) )
- {
-+ domid_t prev_domid = dte->domain_id;
-+
- /*
- * Strictly speaking if the device is the only one with this requestor
- * ID, it could be allowed to be re-assigned regardless of unity map
-@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device(
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
-- amd_iommu_flush_device(iommu, req_id);
-+ amd_iommu_flush_device(iommu, req_id, prev_domid);
- }
- else
- spin_unlock_irqrestore(&iommu->lock, flags);
-@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
- spin_lock_irqsave(&iommu->lock, flags);
- if ( dte->tv || dte->v )
- {
-+ domid_t prev_domid = dte->domain_id;
-+
- /* See the comment in amd_iommu_setup_device_table(). */
- dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
- smp_wmb();
-@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
-- amd_iommu_flush_device(iommu, req_id);
-+ amd_iommu_flush_device(iommu, req_id, prev_domid);
-
- AMD_IOMMU_DEBUG("Disable: device id = %#x, "
- "domain = %d, paging mode = %d\n",
-@@ -610,7 +615,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
-- amd_iommu_flush_device(iommu, bdf);
-+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
-+ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID);
- }
-
- if ( amd_iommu_reserve_domain_unity_map(
---
-2.42.0
-
diff --git a/0043-libfsimage-xfs-Remove-dead-code.patch b/0043-libfsimage-xfs-Remove-dead-code.patch
deleted file mode 100644
index cbb9ad4..0000000
--- a/0043-libfsimage-xfs-Remove-dead-code.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From d665c6690eb3c2c86cb2c7dac09804211481f926 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Thu, 14 Sep 2023 13:22:50 +0100
-Subject: [PATCH 43/55] libfsimage/xfs: Remove dead code
-
-xfs_info.agnolog (and related code) and XFS_INO_AGBNO_BITS are dead code
-that serve no purpose.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 37fc1e6c1c5c63aafd9cfd76a37728d5baea7d71)
----
- tools/libfsimage/xfs/fsys_xfs.c | 18 ------------------
- 1 file changed, 18 deletions(-)
-
-diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
-index d735a88e55..2800699f59 100644
---- a/tools/libfsimage/xfs/fsys_xfs.c
-+++ b/tools/libfsimage/xfs/fsys_xfs.c
-@@ -37,7 +37,6 @@ struct xfs_info {
- int blklog;
- int inopblog;
- int agblklog;
-- int agnolog;
- unsigned int nextents;
- xfs_daddr_t next;
- xfs_daddr_t daddr;
-@@ -65,9 +64,7 @@ static struct xfs_info xfs;
-
- #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
- #define XFS_INO_OFFSET_BITS xfs.inopblog
--#define XFS_INO_AGBNO_BITS xfs.agblklog
- #define XFS_INO_AGINO_BITS (xfs.agblklog + xfs.inopblog)
--#define XFS_INO_AGNO_BITS xfs.agnolog
-
- static inline xfs_agblock_t
- agino2agbno (xfs_agino_t agino)
-@@ -149,20 +146,6 @@ xt_len (xfs_bmbt_rec_32_t *r)
- return le32(r->l3) & mask32lo(21);
- }
-
--static inline int
--xfs_highbit32(xfs_uint32_t v)
--{
-- int i;
--
-- if (--v) {
-- for (i = 0; i < 31; i++, v >>= 1) {
-- if (v == 0)
-- return i;
-- }
-- }
-- return 0;
--}
--
- static int
- isinxt (xfs_fileoff_t key, xfs_fileoff_t offset, xfs_filblks_t len)
- {
-@@ -472,7 +455,6 @@ xfs_mount (fsi_file_t *ffi, const char *options)
-
- xfs.inopblog = super.sb_inopblog;
- xfs.agblklog = super.sb_agblklog;
-- xfs.agnolog = xfs_highbit32 (le32(super.sb_agcount));
-
- xfs.btnode_ptr0_off =
- ((xfs.bsize - sizeof(xfs_btree_block_t)) /
---
-2.42.0
-
diff --git a/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch b/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
deleted file mode 100644
index 880ff83..0000000
--- a/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From f1cd620cc3572c858e276463e05f695d949362c5 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Thu, 14 Sep 2023 13:22:51 +0100
-Subject: [PATCH 44/55] libfsimage/xfs: Amend mask32lo() to allow the value 32
-
-agblklog could plausibly be 32, but that would overflow this shift.
-Perform the shift as ULL and cast to u32 at the end instead.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit ddc45e4eea946bb373a4b4a60c84bf9339cf413b)
----
- tools/libfsimage/xfs/fsys_xfs.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
-index 2800699f59..4720bb4505 100644
---- a/tools/libfsimage/xfs/fsys_xfs.c
-+++ b/tools/libfsimage/xfs/fsys_xfs.c
-@@ -60,7 +60,7 @@ static struct xfs_info xfs;
- #define inode ((xfs_dinode_t *)((char *)FSYS_BUF + 8192))
- #define icore (inode->di_core)
-
--#define mask32lo(n) (((xfs_uint32_t)1 << (n)) - 1)
-+#define mask32lo(n) ((xfs_uint32_t)((1ull << (n)) - 1))
-
- #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
- #define XFS_INO_OFFSET_BITS xfs.inopblog
---
-2.42.0
-
diff --git a/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch b/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
deleted file mode 100644
index 01ae52a..0000000
--- a/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
+++ /dev/null
@@ -1,137 +0,0 @@
-From 78143c5336c8316bcc648e964d65a07f216cf77f Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Thu, 14 Sep 2023 13:22:52 +0100
-Subject: [PATCH 45/55] libfsimage/xfs: Sanity-check the superblock during
- mounts
-
-Sanity-check the XFS superblock for wellformedness at the mount handler.
-This forces pygrub to abort parsing a potentially malformed filesystem and
-ensures the invariants assumed throughout the rest of the code hold.
-
-Also, derive parameters from previously sanitized parameters where possible
-(rather than reading them off the superblock)
-
-The code doesn't try to avoid overflowing the end of the disk, because
-that's an unlikely and benign error. Parameters used in calculations of
-xfs_daddr_t (like the root inode index) aren't in critical need of being
-sanitized.
-
-The sanitization of agblklog is basically checking that no obvious
-overflows happen on agblklog, and then ensuring agblocks is contained in
-the range (2^(sb_agblklog-1), 2^sb_agblklog].
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 620500dd1baf33347dfde5e7fde7cf7fe347da5c)
----
- tools/libfsimage/xfs/fsys_xfs.c | 48 ++++++++++++++++++++++++++-------
- tools/libfsimage/xfs/xfs.h | 12 +++++++++
- 2 files changed, 50 insertions(+), 10 deletions(-)
-
-diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
-index 4720bb4505..e4eb7e1ee2 100644
---- a/tools/libfsimage/xfs/fsys_xfs.c
-+++ b/tools/libfsimage/xfs/fsys_xfs.c
-@@ -17,6 +17,7 @@
- * along with this program; If not, see <http://www.gnu.org/licenses/>.
- */
-
-+#include <stdbool.h>
- #include <xenfsimage_grub.h>
- #include "xfs.h"
-
-@@ -433,29 +434,56 @@ first_dentry (fsi_file_t *ffi, xfs_ino_t *ino)
- return next_dentry (ffi, ino);
- }
-
-+static bool
-+xfs_sb_is_invalid (const xfs_sb_t *super)
-+{
-+ return (le32(super->sb_magicnum) != XFS_SB_MAGIC)
-+ || ((le16(super->sb_versionnum) & XFS_SB_VERSION_NUMBITS) !=
-+ XFS_SB_VERSION_4)
-+ || (super->sb_inodelog < XFS_SB_INODELOG_MIN)
-+ || (super->sb_inodelog > XFS_SB_INODELOG_MAX)
-+ || (super->sb_blocklog < XFS_SB_BLOCKLOG_MIN)
-+ || (super->sb_blocklog > XFS_SB_BLOCKLOG_MAX)
-+ || (super->sb_blocklog < super->sb_inodelog)
-+ || (super->sb_agblklog > XFS_SB_AGBLKLOG_MAX)
-+ || ((1ull << super->sb_agblklog) < le32(super->sb_agblocks))
-+ || (((1ull << super->sb_agblklog) >> 1) >=
-+ le32(super->sb_agblocks))
-+ || ((super->sb_blocklog + super->sb_dirblklog) >=
-+ XFS_SB_DIRBLK_NUMBITS);
-+}
-+
- static int
- xfs_mount (fsi_file_t *ffi, const char *options)
- {
- xfs_sb_t super;
-
- if (!devread (ffi, 0, 0, sizeof(super), (char *)&super)
-- || (le32(super.sb_magicnum) != XFS_SB_MAGIC)
-- || ((le16(super.sb_versionnum)
-- & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4) ) {
-+ || xfs_sb_is_invalid(&super)) {
- return 0;
- }
-
-- xfs.bsize = le32 (super.sb_blocksize);
-- xfs.blklog = super.sb_blocklog;
-- xfs.bdlog = xfs.blklog - SECTOR_BITS;
-+ /*
-+ * Not sanitized. It's exclusively used to generate disk addresses,
-+ * so it's not important from a security standpoint.
-+ */
- xfs.rootino = le64 (super.sb_rootino);
-- xfs.isize = le16 (super.sb_inodesize);
-- xfs.agblocks = le32 (super.sb_agblocks);
-- xfs.dirbsize = xfs.bsize << super.sb_dirblklog;
-
-- xfs.inopblog = super.sb_inopblog;
-+ /*
-+ * Sanitized to be consistent with each other, only used to
-+ * generate disk addresses, so it's safe
-+ */
-+ xfs.agblocks = le32 (super.sb_agblocks);
- xfs.agblklog = super.sb_agblklog;
-
-+ /* Derived from sanitized parameters */
-+ xfs.bsize = 1 << super.sb_blocklog;
-+ xfs.blklog = super.sb_blocklog;
-+ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
-+ xfs.isize = 1 << super.sb_inodelog;
-+ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
-+ xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
-+
- xfs.btnode_ptr0_off =
- ((xfs.bsize - sizeof(xfs_btree_block_t)) /
- (sizeof (xfs_bmbt_key_t) + sizeof (xfs_bmbt_ptr_t)))
-diff --git a/tools/libfsimage/xfs/xfs.h b/tools/libfsimage/xfs/xfs.h
-index 40699281e4..b87e37d3d7 100644
---- a/tools/libfsimage/xfs/xfs.h
-+++ b/tools/libfsimage/xfs/xfs.h
-@@ -134,6 +134,18 @@ typedef struct xfs_sb
- xfs_uint8_t sb_dummy[7]; /* padding */
- } xfs_sb_t;
-
-+/* Bound taken from xfs.c in GRUB2. It doesn't exist in the spec */
-+#define XFS_SB_DIRBLK_NUMBITS 27
-+/* Implied by the XFS specification. The minimum block size is 512 octets */
-+#define XFS_SB_BLOCKLOG_MIN 9
-+/* Implied by the XFS specification. The maximum block size is 65536 octets */
-+#define XFS_SB_BLOCKLOG_MAX 16
-+/* Implied by the XFS specification. The minimum inode size is 256 octets */
-+#define XFS_SB_INODELOG_MIN 8
-+/* Implied by the XFS specification. The maximum inode size is 2048 octets */
-+#define XFS_SB_INODELOG_MAX 11
-+/* High bound for sb_agblklog */
-+#define XFS_SB_AGBLKLOG_MAX 32
-
- /* those are from xfs_btree.h */
-
---
-2.42.0
-
diff --git a/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch b/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
deleted file mode 100644
index 0c32745..0000000
--- a/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From eb4efdac4cc7121f832ee156f39761312878f3a5 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Thu, 14 Sep 2023 13:22:53 +0100
-Subject: [PATCH 46/55] libfsimage/xfs: Add compile-time check to libfsimage
-
-Adds the common tools include folder to the -I compile flags
-of libfsimage. This allows us to use:
- xen-tools/common-macros.h:BUILD_BUG_ON()
-
-With it, statically assert a sanitized "blocklog - SECTOR_BITS" cannot
-underflow.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 7d85c70431593550e32022e3a19a37f306f49e00)
----
- tools/libfsimage/common.mk | 2 +-
- tools/libfsimage/xfs/fsys_xfs.c | 4 +++-
- 2 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libfsimage/common.mk b/tools/libfsimage/common.mk
-index 4fc8c66795..e4336837d0 100644
---- a/tools/libfsimage/common.mk
-+++ b/tools/libfsimage/common.mk
-@@ -1,7 +1,7 @@
- include $(XEN_ROOT)/tools/Rules.mk
-
- FSDIR := $(libdir)/xenfsimage
--CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\"
-+CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ $(CFLAGS_xeninclude) -DFSIMAGE_FSDIR=\"$(FSDIR)\"
- CFLAGS += -D_GNU_SOURCE
- LDFLAGS += -L../common/
-
-diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
-index e4eb7e1ee2..4a8dd6f239 100644
---- a/tools/libfsimage/xfs/fsys_xfs.c
-+++ b/tools/libfsimage/xfs/fsys_xfs.c
-@@ -19,6 +19,7 @@
-
- #include <stdbool.h>
- #include <xenfsimage_grub.h>
-+#include <xen-tools/libs.h>
- #include "xfs.h"
-
- #define MAX_LINK_COUNT 8
-@@ -477,9 +478,10 @@ xfs_mount (fsi_file_t *ffi, const char *options)
- xfs.agblklog = super.sb_agblklog;
-
- /* Derived from sanitized parameters */
-+ BUILD_BUG_ON(XFS_SB_BLOCKLOG_MIN < SECTOR_BITS);
-+ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
- xfs.bsize = 1 << super.sb_blocklog;
- xfs.blklog = super.sb_blocklog;
-- xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
- xfs.isize = 1 << super.sb_inodelog;
- xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
- xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
---
-2.42.0
-
diff --git a/0047-tools-pygrub-Remove-unnecessary-hypercall.patch b/0047-tools-pygrub-Remove-unnecessary-hypercall.patch
deleted file mode 100644
index 6bdd9bb..0000000
--- a/0047-tools-pygrub-Remove-unnecessary-hypercall.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 8a584126eae53a44cefb0acdbca201233a557fa5 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Mon, 25 Sep 2023 18:32:21 +0100
-Subject: [PATCH 47/55] tools/pygrub: Remove unnecessary hypercall
-
-There's a hypercall being issued in order to determine whether PV64 is
-supported, but since Xen 4.3 that's strictly true so it's not required.
-
-Plus, this way we can avoid mapping the privcmd interface altogether in the
-depriv pygrub.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit f4b504c6170c446e61055cbd388ae4e832a9deca)
----
- tools/pygrub/src/pygrub | 12 +-----------
- 1 file changed, 1 insertion(+), 11 deletions(-)
-
-diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
-index ce7ab0eb8c..ce4e07d3e8 100755
---- a/tools/pygrub/src/pygrub
-+++ b/tools/pygrub/src/pygrub
-@@ -18,7 +18,6 @@ import os, sys, string, struct, tempfile, re, traceback, stat, errno
- import copy
- import logging
- import platform
--import xen.lowlevel.xc
-
- import curses, _curses, curses.textpad, curses.ascii
- import getopt
-@@ -668,14 +667,6 @@ def run_grub(file, entry, fs, cfg_args):
-
- return grubcfg
-
--def supports64bitPVguest():
-- xc = xen.lowlevel.xc.xc()
-- caps = xc.xeninfo()['xen_caps'].split(" ")
-- for cap in caps:
-- if cap == "xen-3.0-x86_64":
-- return True
-- return False
--
- # If nothing has been specified, look for a Solaris domU. If found, perform the
- # necessary tweaks.
- def sniff_solaris(fs, cfg):
-@@ -684,8 +675,7 @@ def sniff_solaris(fs, cfg):
- return cfg
-
- if not cfg["kernel"]:
-- if supports64bitPVguest() and \
-- fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
-+ if fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
- cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix"
- cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
- elif fs.file_exists("/platform/i86xpv/kernel/unix"):
---
-2.42.0
-
diff --git a/0048-tools-pygrub-Small-refactors.patch b/0048-tools-pygrub-Small-refactors.patch
deleted file mode 100644
index 55b238c..0000000
--- a/0048-tools-pygrub-Small-refactors.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From e7059f16f7c2b99fea30b9671fec74c0375eee8f Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Mon, 25 Sep 2023 18:32:22 +0100
-Subject: [PATCH 48/55] tools/pygrub: Small refactors
-
-Small tidy up to ensure output_directory always has a trailing '/' to ease
-concatenating paths and that `output` can only be a filename or None.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-(cherry picked from commit 9f2ff9a7c9b3ac734ae99f17f0134ed0343dcccf)
----
- tools/pygrub/src/pygrub | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
-index ce4e07d3e8..1042c05b86 100755
---- a/tools/pygrub/src/pygrub
-+++ b/tools/pygrub/src/pygrub
-@@ -793,7 +793,7 @@ if __name__ == "__main__":
- debug = False
- not_really = False
- output_format = "sxp"
-- output_directory = "/var/run/xen/pygrub"
-+ output_directory = "/var/run/xen/pygrub/"
-
- # what was passed in
- incfg = { "kernel": None, "ramdisk": None, "args": "" }
-@@ -815,7 +815,8 @@ if __name__ == "__main__":
- usage()
- sys.exit()
- elif o in ("--output",):
-- output = a
-+ if a != "-":
-+ output = a
- elif o in ("--kernel",):
- incfg["kernel"] = a
- elif o in ("--ramdisk",):
-@@ -847,12 +848,11 @@ if __name__ == "__main__":
- if not os.path.isdir(a):
- print("%s is not an existing directory" % a)
- sys.exit(1)
-- output_directory = a
-+ output_directory = a + '/'
-
- if debug:
- logging.basicConfig(level=logging.DEBUG)
-
--
- try:
- os.makedirs(output_directory, 0o700)
- except OSError as e:
-@@ -861,7 +861,7 @@ if __name__ == "__main__":
- else:
- raise
-
-- if output is None or output == "-":
-+ if output is None:
- fd = sys.stdout.fileno()
- else:
- fd = os.open(output, os.O_WRONLY)
---
-2.42.0
-
diff --git a/0049-tools-pygrub-Open-the-output-files-earlier.patch b/0049-tools-pygrub-Open-the-output-files-earlier.patch
deleted file mode 100644
index c3b00b1..0000000
--- a/0049-tools-pygrub-Open-the-output-files-earlier.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-From 37977420670c65db220349510599d3fe47600ad8 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Mon, 25 Sep 2023 18:32:23 +0100
-Subject: [PATCH 49/55] tools/pygrub: Open the output files earlier
-
-This patch allows pygrub to get ahold of every RW file descriptor it needs
-early on. A later patch will clamp the filesystem it can access so it can't
-obtain any others.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-(cherry picked from commit 0710d7d44586251bfca9758890616dc3d6de8a74)
----
- tools/pygrub/src/pygrub | 37 ++++++++++++++++++++++---------------
- 1 file changed, 22 insertions(+), 15 deletions(-)
-
-diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
-index 1042c05b86..91e2ec2ab1 100755
---- a/tools/pygrub/src/pygrub
-+++ b/tools/pygrub/src/pygrub
-@@ -738,8 +738,7 @@ if __name__ == "__main__":
- def usage():
- print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
-
-- def copy_from_image(fs, file_to_read, file_type, output_directory,
-- not_really):
-+ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
- if not_really:
- if fs.file_exists(file_to_read):
- return "<%s:%s>" % (file_type, file_to_read)
-@@ -750,21 +749,18 @@ if __name__ == "__main__":
- except Exception as e:
- print(e, file=sys.stderr)
- sys.exit("Error opening %s in guest" % file_to_read)
-- (tfd, ret) = tempfile.mkstemp(prefix="boot_"+file_type+".",
-- dir=output_directory)
- dataoff = 0
- while True:
- data = datafile.read(FS_READ_MAX, dataoff)
- if len(data) == 0:
-- os.close(tfd)
-+ os.close(fd_dst)
- del datafile
-- return ret
-+ return
- try:
-- os.write(tfd, data)
-+ os.write(fd_dst, data)
- except Exception as e:
- print(e, file=sys.stderr)
-- os.close(tfd)
-- os.unlink(ret)
-+ os.unlink(path_dst)
- del datafile
- sys.exit("Error writing temporary copy of "+file_type)
- dataoff += len(data)
-@@ -861,6 +857,14 @@ if __name__ == "__main__":
- else:
- raise
-
-+ if not_really:
-+ fd_kernel = path_kernel = fd_ramdisk = path_ramdisk = None
-+ else:
-+ (fd_kernel, path_kernel) = tempfile.mkstemp(prefix="boot_kernel.",
-+ dir=output_directory)
-+ (fd_ramdisk, path_ramdisk) = tempfile.mkstemp(prefix="boot_ramdisk.",
-+ dir=output_directory)
-+
- if output is None:
- fd = sys.stdout.fileno()
- else:
-@@ -920,20 +924,23 @@ if __name__ == "__main__":
- if fs is None:
- raise RuntimeError("Unable to find partition containing kernel")
-
-- bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel",
-- output_directory, not_really)
-+ copy_from_image(fs, chosencfg["kernel"], "kernel",
-+ fd_kernel, path_kernel, not_really)
-+ bootcfg["kernel"] = path_kernel
-
- if chosencfg["ramdisk"]:
- try:
-- bootcfg["ramdisk"] = copy_from_image(fs, chosencfg["ramdisk"],
-- "ramdisk", output_directory,
-- not_really)
-+ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
-+ fd_ramdisk, path_ramdisk, not_really)
- except:
- if not not_really:
-- os.unlink(bootcfg["kernel"])
-+ os.unlink(path_kernel)
- raise
-+ bootcfg["ramdisk"] = path_ramdisk
- else:
- initrd = None
-+ if not not_really:
-+ os.unlink(path_ramdisk)
-
- args = None
- if chosencfg["args"]:
---
-2.42.0
-
diff --git a/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch b/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch
deleted file mode 100644
index 949528d..0000000
--- a/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From 8ee19246ad2c1d0ce241a52683f56b144a4f0b0e Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Mon, 25 Sep 2023 18:32:24 +0100
-Subject: [PATCH 50/55] tools/libfsimage: Export a new function to preload all
- plugins
-
-This is work required in order to let pygrub operate in highly deprivileged
-chroot mode. This patch adds a function that preloads every plugin, hence
-ensuring that a on function exit, every shared library is loaded in memory.
-
-The new "init" function is supposed to be used before depriv, but that's
-fine because it's not acting on untrusted data.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-(cherry picked from commit 990e65c3ad9ac08642ce62a92852c80be6c83e96)
----
- tools/libfsimage/common/fsimage_plugin.c | 4 ++--
- tools/libfsimage/common/mapfile-GNU | 1 +
- tools/libfsimage/common/mapfile-SunOS | 1 +
- tools/libfsimage/common/xenfsimage.h | 8 ++++++++
- tools/pygrub/src/fsimage/fsimage.c | 15 +++++++++++++++
- 5 files changed, 27 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c
-index de1412b423..d0cb9e96a6 100644
---- a/tools/libfsimage/common/fsimage_plugin.c
-+++ b/tools/libfsimage/common/fsimage_plugin.c
-@@ -119,7 +119,7 @@ fail:
- return (-1);
- }
-
--static int load_plugins(void)
-+int fsi_init(void)
- {
- const char *fsdir = getenv("XEN_FSIMAGE_FSDIR");
- struct dirent *dp = NULL;
-@@ -180,7 +180,7 @@ int find_plugin(fsi_t *fsi, const char *path, const char *options)
- fsi_plugin_t *fp;
- int ret = 0;
-
-- if (plugins == NULL && (ret = load_plugins()) != 0)
-+ if (plugins == NULL && (ret = fsi_init()) != 0)
- goto out;
-
- for (fp = plugins; fp != NULL; fp = fp->fp_next) {
-diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU
-index 26d4d7a69e..2d54d527d7 100644
---- a/tools/libfsimage/common/mapfile-GNU
-+++ b/tools/libfsimage/common/mapfile-GNU
-@@ -1,6 +1,7 @@
- VERSION {
- libfsimage.so.1.0 {
- global:
-+ fsi_init;
- fsi_open_fsimage;
- fsi_close_fsimage;
- fsi_file_exists;
-diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS
-index e99b90b650..48deedb425 100644
---- a/tools/libfsimage/common/mapfile-SunOS
-+++ b/tools/libfsimage/common/mapfile-SunOS
-@@ -1,5 +1,6 @@
- libfsimage.so.1.0 {
- global:
-+ fsi_init;
- fsi_open_fsimage;
- fsi_close_fsimage;
- fsi_file_exists;
-diff --git a/tools/libfsimage/common/xenfsimage.h b/tools/libfsimage/common/xenfsimage.h
-index 201abd54f2..341883b2d7 100644
---- a/tools/libfsimage/common/xenfsimage.h
-+++ b/tools/libfsimage/common/xenfsimage.h
-@@ -35,6 +35,14 @@ extern C {
- typedef struct fsi fsi_t;
- typedef struct fsi_file fsi_file_t;
-
-+/*
-+ * Optional initialization function. If invoked it loads the associated
-+ * dynamic libraries for the backends ahead of time. This is required if
-+ * the library is to run as part of a highly deprivileged executable, as
-+ * the libraries may not be reachable after depriv.
-+ */
-+int fsi_init(void);
-+
- fsi_t *fsi_open_fsimage(const char *, uint64_t, const char *);
- void fsi_close_fsimage(fsi_t *);
-
-diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c
-index 2ebbbe35df..92fbf2851f 100644
---- a/tools/pygrub/src/fsimage/fsimage.c
-+++ b/tools/pygrub/src/fsimage/fsimage.c
-@@ -286,6 +286,15 @@ fsimage_getbootstring(PyObject *o, PyObject *args)
- return Py_BuildValue("s", bootstring);
- }
-
-+static PyObject *
-+fsimage_init(PyObject *o, PyObject *args)
-+{
-+ if (!PyArg_ParseTuple(args, ""))
-+ return (NULL);
-+
-+ return Py_BuildValue("i", fsi_init());
-+}
-+
- PyDoc_STRVAR(fsimage_open__doc__,
- "open(name, [offset=off]) - Open the given file as a filesystem image.\n"
- "\n"
-@@ -297,7 +306,13 @@ PyDoc_STRVAR(fsimage_getbootstring__doc__,
- "getbootstring(fs) - Return the boot string needed for this file system "
- "or NULL if none is needed.\n");
-
-+PyDoc_STRVAR(fsimage_init__doc__,
-+ "init() - Loads every dynamic library contained in xenfsimage "
-+ "into memory so that it can be used in chrooted environments.\n");
-+
- static struct PyMethodDef fsimage_module_methods[] = {
-+ { "init", (PyCFunction)fsimage_init,
-+ METH_VARARGS, fsimage_init__doc__ },
- { "open", (PyCFunction)fsimage_open,
- METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ },
- { "getbootstring", (PyCFunction)fsimage_getbootstring,
---
-2.42.0
-
diff --git a/0051-tools-pygrub-Deprivilege-pygrub.patch b/0051-tools-pygrub-Deprivilege-pygrub.patch
deleted file mode 100644
index 1d89191..0000000
--- a/0051-tools-pygrub-Deprivilege-pygrub.patch
+++ /dev/null
@@ -1,307 +0,0 @@
-From f5e211654e5fbb7f1fc5cfea7f9c7ab525edb9e7 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Mon, 25 Sep 2023 18:32:25 +0100
-Subject: [PATCH 51/55] tools/pygrub: Deprivilege pygrub
-
-Introduce a --runas=<uid> flag to deprivilege pygrub on Linux and *BSDs. It
-also implicitly creates a chroot env where it drops a deprivileged forked
-process. The chroot itself is cleaned up at the end.
-
-If the --runas arg is present, then pygrub forks, leaving the child to
-deprivilege itself, and waiting for it to complete. When the child exists,
-the parent performs cleanup and exits with the same error code.
-
-This is roughly what the child does:
- 1. Initialize libfsimage (this loads every .so in memory so the chroot
- can avoid bind-mounting /{,usr}/lib*
- 2. Create a temporary empty chroot directory
- 3. Mount tmpfs in it
- 4. Bind mount the disk inside, because libfsimage expects a path, not a
- file descriptor.
- 5. Remount the root tmpfs to be stricter (ro,nosuid,nodev)
- 6. Set RLIMIT_FSIZE to a sensibly high amount (128 MiB)
- 7. Depriv gid, groups and uid
-
-With this scheme in place, the "output" files are writable (up to
-RLIMIT_FSIZE octets) and the exposed filesystem is immutable and contains
-the single only file we can't easily get rid of (the disk).
-
-If running on Linux, the child process also unshares mount, IPC, and
-network namespaces before dropping its privileges.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-(cherry picked from commit e0342ae5556f2b6e2db50701b8a0679a45822ca6)
----
- tools/pygrub/setup.py | 2 +-
- tools/pygrub/src/pygrub | 162 +++++++++++++++++++++++++++++++++++++---
- 2 files changed, 154 insertions(+), 10 deletions(-)
-
-diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py
-index 0e4e3d02d3..06b96733d0 100644
---- a/tools/pygrub/setup.py
-+++ b/tools/pygrub/setup.py
-@@ -17,7 +17,7 @@ xenfsimage = Extension("xenfsimage",
- pkgs = [ 'grub' ]
-
- setup(name='pygrub',
-- version='0.6',
-+ version='0.7',
- description='Boot loader that looks a lot like grub for Xen',
- author='Jeremy Katz',
- author_email='katzj@redhat.com',
-diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
-index 91e2ec2ab1..7cea496ade 100755
---- a/tools/pygrub/src/pygrub
-+++ b/tools/pygrub/src/pygrub
-@@ -16,8 +16,11 @@ from __future__ import print_function
-
- import os, sys, string, struct, tempfile, re, traceback, stat, errno
- import copy
-+import ctypes, ctypes.util
- import logging
- import platform
-+import resource
-+import subprocess
-
- import curses, _curses, curses.textpad, curses.ascii
- import getopt
-@@ -27,10 +30,135 @@ import grub.GrubConf
- import grub.LiloConf
- import grub.ExtLinuxConf
-
--PYGRUB_VER = 0.6
-+PYGRUB_VER = 0.7
- FS_READ_MAX = 1024 * 1024
- SECTOR_SIZE = 512
-
-+# Unless provided through the env variable PYGRUB_MAX_FILE_SIZE_MB, then
-+# this is the maximum filesize allowed for files written by the depriv
-+# pygrub
-+LIMIT_FSIZE = 128 << 20
-+
-+CLONE_NEWNS = 0x00020000 # mount namespace
-+CLONE_NEWNET = 0x40000000 # network namespace
-+CLONE_NEWIPC = 0x08000000 # IPC namespace
-+
-+def unshare(flags):
-+ if not sys.platform.startswith("linux"):
-+ print("skip_unshare reason=not_linux platform=%s", sys.platform, file=sys.stderr)
-+ return
-+
-+ libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
-+ unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True)
-+ unshare = unshare_prototype(('unshare', libc))
-+
-+ if unshare(flags) < 0:
-+ raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno()))
-+
-+def bind_mount(src, dst, options):
-+ open(dst, "a").close() # touch
-+
-+ rc = subprocess.call(["mount", "--bind", "-o", options, src, dst])
-+ if rc != 0:
-+ raise RuntimeError("bad_mount: src=%s dst=%s opts=%s" %
-+ (src, dst, options))
-+
-+def downgrade_rlimits():
-+ # Wipe the authority to use unrequired resources
-+ resource.setrlimit(resource.RLIMIT_NPROC, (0, 0))
-+ resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
-+ resource.setrlimit(resource.RLIMIT_MEMLOCK, (0, 0))
-+
-+ # py2's resource module doesn't know about resource.RLIMIT_MSGQUEUE
-+ #
-+ # TODO: Use resource.RLIMIT_MSGQUEUE after python2 is deprecated
-+ if sys.platform.startswith('linux'):
-+ RLIMIT_MSGQUEUE = 12
-+ resource.setrlimit(RLIMIT_MSGQUEUE, (0, 0))
-+
-+ # The final look of the filesystem for this process is fully RO, but
-+ # note we have some file descriptor already open (notably, kernel and
-+ # ramdisk). In order to avoid a compromised pygrub from filling up the
-+ # filesystem we set RLIMIT_FSIZE to a high bound, so that the file
-+ # write permissions are bound.
-+ fsize = LIMIT_FSIZE
-+ if "PYGRUB_MAX_FILE_SIZE_MB" in os.environ.keys():
-+ fsize = os.environ["PYGRUB_MAX_FILE_SIZE_MB"] << 20
-+
-+ resource.setrlimit(resource.RLIMIT_FSIZE, (fsize, fsize))
-+
-+def depriv(output_directory, output, device, uid, path_kernel, path_ramdisk):
-+ # The only point of this call is to force the loading of libfsimage.
-+ # That way, we don't need to bind-mount it into the chroot
-+ rc = xenfsimage.init()
-+ if rc != 0:
-+ os.unlink(path_ramdisk)
-+ os.unlink(path_kernel)
-+ raise RuntimeError("bad_xenfsimage: rc=%d" % rc)
-+
-+ # Create a temporary directory for the chroot
-+ chroot = tempfile.mkdtemp(prefix=str(uid)+'-', dir=output_directory) + '/'
-+ device_path = '/device'
-+
-+ pid = os.fork()
-+ if pid:
-+ # parent
-+ _, rc = os.waitpid(pid, 0)
-+
-+ for path in [path_kernel, path_ramdisk]:
-+ # If the child didn't write anything, just get rid of it,
-+ # otherwise we end up consuming a 0-size file when parsing
-+ # systems without a ramdisk that the ultimate caller of pygrub
-+ # may just be unaware of
-+ if rc != 0 or os.path.getsize(path) == 0:
-+ os.unlink(path)
-+
-+ # Normally, unshare(CLONE_NEWNS) will ensure this is not required.
-+ # However, this syscall doesn't exist in *BSD systems and doesn't
-+ # auto-unmount everything on older Linux kernels (At least as of
-+ # Linux 4.19, but it seems fixed in 5.15). Either way,
-+ # recursively unmount everything if needed. Quietly.
-+ with open('/dev/null', 'w') as devnull:
-+ subprocess.call(["umount", "-f", chroot + device_path],
-+ stdout=devnull, stderr=devnull)
-+ subprocess.call(["umount", "-f", chroot],
-+ stdout=devnull, stderr=devnull)
-+ os.rmdir(chroot)
-+
-+ sys.exit(rc)
-+
-+ # By unsharing the namespace we're making sure it's all bulk-released
-+ # at the end, when the namespaces disappear. This means the kernel does
-+ # (almost) all the cleanup for us and the parent just has to remove the
-+ # temporary directory.
-+ unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWNET)
-+
-+ # Set sensible limits using the setrlimit interface
-+ downgrade_rlimits()
-+
-+ # We'll mount tmpfs on the chroot to ensure the deprivileged child
-+ # cannot affect the persistent state. It's RW now in order to
-+ # bind-mount the device, but note it's remounted RO after that.
-+ rc = subprocess.call(["mount", "-t", "tmpfs", "none", chroot])
-+ if rc != 0:
-+ raise RuntimeError("mount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
-+
-+ # Bind the untrusted device RO
-+ bind_mount(device, chroot + device_path, "ro,nosuid,noexec")
-+
-+ rc = subprocess.call(["mount", "-t", "tmpfs", "-o", "remount,ro,nosuid,noexec,nodev", "none", chroot])
-+ if rc != 0:
-+ raise RuntimeError("remount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
-+
-+ # Drop superpowers!
-+ os.chroot(chroot)
-+ os.chdir('/')
-+ os.setgid(uid)
-+ os.setgroups([uid])
-+ os.setuid(uid)
-+
-+ return device_path
-+
- def read_size_roundup(fd, size):
- if platform.system() != 'FreeBSD':
- return size
-@@ -736,7 +864,7 @@ if __name__ == "__main__":
- sel = None
-
- def usage():
-- print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
-+ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--runas=] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
-
- def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
- if not_really:
-@@ -760,7 +888,8 @@ if __name__ == "__main__":
- os.write(fd_dst, data)
- except Exception as e:
- print(e, file=sys.stderr)
-- os.unlink(path_dst)
-+ if path_dst:
-+ os.unlink(path_dst)
- del datafile
- sys.exit("Error writing temporary copy of "+file_type)
- dataoff += len(data)
-@@ -769,7 +898,7 @@ if __name__ == "__main__":
- opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::',
- ["quiet", "interactive", "list-entries", "not-really", "help",
- "output=", "output-format=", "output-directory=", "offset=",
-- "entry=", "kernel=",
-+ "runas=", "entry=", "kernel=",
- "ramdisk=", "args=", "isconfig", "debug"])
- except getopt.GetoptError:
- usage()
-@@ -790,6 +919,7 @@ if __name__ == "__main__":
- not_really = False
- output_format = "sxp"
- output_directory = "/var/run/xen/pygrub/"
-+ uid = None
-
- # what was passed in
- incfg = { "kernel": None, "ramdisk": None, "args": "" }
-@@ -813,6 +943,13 @@ if __name__ == "__main__":
- elif o in ("--output",):
- if a != "-":
- output = a
-+ elif o in ("--runas",):
-+ try:
-+ uid = int(a)
-+ except ValueError:
-+ print("runas value must be an integer user id")
-+ usage()
-+ sys.exit(1)
- elif o in ("--kernel",):
- incfg["kernel"] = a
- elif o in ("--ramdisk",):
-@@ -849,6 +986,10 @@ if __name__ == "__main__":
- if debug:
- logging.basicConfig(level=logging.DEBUG)
-
-+ if interactive and uid:
-+ print("In order to use --runas, you must also set --entry or -q", file=sys.stderr)
-+ sys.exit(1)
-+
- try:
- os.makedirs(output_directory, 0o700)
- except OSError as e:
-@@ -870,6 +1011,9 @@ if __name__ == "__main__":
- else:
- fd = os.open(output, os.O_WRONLY)
-
-+ if uid:
-+ file = depriv(output_directory, output, file, uid, path_kernel, path_ramdisk)
-+
- # debug
- if isconfig:
- chosencfg = run_grub(file, entry, fs, incfg["args"])
-@@ -925,21 +1069,21 @@ if __name__ == "__main__":
- raise RuntimeError("Unable to find partition containing kernel")
-
- copy_from_image(fs, chosencfg["kernel"], "kernel",
-- fd_kernel, path_kernel, not_really)
-+ fd_kernel, None if uid else path_kernel, not_really)
- bootcfg["kernel"] = path_kernel
-
- if chosencfg["ramdisk"]:
- try:
- copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
-- fd_ramdisk, path_ramdisk, not_really)
-+ fd_ramdisk, None if uid else path_ramdisk, not_really)
- except:
-- if not not_really:
-- os.unlink(path_kernel)
-+ if not uid and not not_really:
-+ os.unlink(path_kernel)
- raise
- bootcfg["ramdisk"] = path_ramdisk
- else:
- initrd = None
-- if not not_really:
-+ if not uid and not not_really:
- os.unlink(path_ramdisk)
-
- args = None
---
-2.42.0
-
diff --git a/0052-libxl-add-support-for-running-bootloader-in-restrict.patch b/0052-libxl-add-support-for-running-bootloader-in-restrict.patch
deleted file mode 100644
index 08691b9..0000000
--- a/0052-libxl-add-support-for-running-bootloader-in-restrict.patch
+++ /dev/null
@@ -1,251 +0,0 @@
-From 42bf49d74b711ca7fef37bcde12928220c8e9700 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Mon, 25 Sep 2023 14:30:20 +0200
-Subject: [PATCH 52/55] libxl: add support for running bootloader in restricted
- mode
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Much like the device model depriv mode, add the same kind of support for the
-bootloader. Such feature allows passing a UID as a parameter for the
-bootloader to run as, together with the bootloader itself taking the necessary
-actions to isolate.
-
-Note that the user to run the bootloader as must have the right permissions to
-access the guest disk image (in read mode only), and that the bootloader will
-be run in non-interactive mode when restricted.
-
-If enabled bootloader restrict mode will attempt to re-use the user(s) from the
-QEMU depriv implementation if no user is provided on the configuration file or
-the environment. See docs/features/qemu-deprivilege.pandoc for more
-information about how to setup those users.
-
-Bootloader restrict mode is not enabled by default as it requires certain
-setup to be done first (setup of the user(s) to use in restrict mode).
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-(cherry picked from commit 1f762642d2cad1a40634e3280361928109d902f1)
----
- docs/man/xl.1.pod.in | 33 +++++++++++
- tools/libs/light/libxl_bootloader.c | 89 ++++++++++++++++++++++++++++-
- tools/libs/light/libxl_dm.c | 8 +--
- tools/libs/light/libxl_internal.h | 8 +++
- 4 files changed, 131 insertions(+), 7 deletions(-)
-
-diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
-index 101e14241d..4831e12242 100644
---- a/docs/man/xl.1.pod.in
-+++ b/docs/man/xl.1.pod.in
-@@ -1957,6 +1957,39 @@ ignored:
-
- =back
-
-+=head1 ENVIRONMENT VARIABLES
-+
-+The following environment variables shall affect the execution of xl:
-+
-+=over 4
-+
-+=item LIBXL_BOOTLOADER_RESTRICT
-+
-+Attempt to restrict the bootloader after startup, to limit the
-+consequences of security vulnerabilities due to parsing guest
-+owned image files.
-+
-+See docs/features/qemu-deprivilege.pandoc for more information
-+on how to setup the unprivileged users.
-+
-+Note that running the bootloader in restricted mode also implies using
-+non-interactive mode, and the disk image must be readable by the
-+restricted user.
-+
-+Having this variable set is equivalent to enabling the option, even if the
-+value is 0.
-+
-+=item LIBXL_BOOTLOADER_USER
-+
-+When using bootloader_restrict, run the bootloader as this user. If
-+not set the default QEMU restrict users will be used.
-+
-+NOTE: Each domain MUST have a SEPARATE username.
-+
-+See docs/features/qemu-deprivilege.pandoc for more information.
-+
-+=back
-+
- =head1 SEE ALSO
-
- The following man pages:
-diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
-index 108329b4a5..23c0ef3e89 100644
---- a/tools/libs/light/libxl_bootloader.c
-+++ b/tools/libs/light/libxl_bootloader.c
-@@ -14,6 +14,7 @@
-
- #include "libxl_osdeps.h" /* must come before any other headers */
-
-+#include <pwd.h>
- #include <termios.h>
- #ifdef HAVE_UTMP_H
- #include <utmp.h>
-@@ -42,8 +43,71 @@ static void bootloader_arg(libxl__bootloader_state *bl, const char *arg)
- bl->args[bl->nargs++] = arg;
- }
-
--static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
-- const char *bootloader_path)
-+static int bootloader_uid(libxl__gc *gc, domid_t guest_domid,
-+ const char *user, uid_t *intended_uid)
-+{
-+ struct passwd *user_base, user_pwbuf;
-+ int rc;
-+
-+ if (user) {
-+ rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base);
-+ if (rc) return rc;
-+
-+ if (!user_base) {
-+ LOGD(ERROR, guest_domid, "Couldn't find user %s", user);
-+ return ERROR_INVAL;
-+ }
-+
-+ *intended_uid = user_base->pw_uid;
-+ return 0;
-+ }
-+
-+ /* Re-use QEMU user range for the bootloader. */
-+ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_RANGE_BASE,
-+ &user_pwbuf, &user_base);
-+ if (rc) return rc;
-+
-+ if (user_base) {
-+ struct passwd *user_clash, user_clash_pwbuf;
-+ uid_t temp_uid = user_base->pw_uid + guest_domid;
-+
-+ rc = userlookup_helper_getpwuid(gc, temp_uid, &user_clash_pwbuf,
-+ &user_clash);
-+ if (rc) return rc;
-+
-+ if (user_clash) {
-+ LOGD(ERROR, guest_domid,
-+ "wanted to use uid %ld (%s + %d) but that is user %s !",
-+ (long)temp_uid, LIBXL_QEMU_USER_RANGE_BASE,
-+ guest_domid, user_clash->pw_name);
-+ return ERROR_INVAL;
-+ }
-+
-+ *intended_uid = temp_uid;
-+ return 0;
-+ }
-+
-+ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_SHARED, &user_pwbuf,
-+ &user_base);
-+ if (rc) return rc;
-+
-+ if (user_base) {
-+ LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s",
-+ LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED);
-+ *intended_uid = user_base->pw_uid;
-+
-+ return 0;
-+ }
-+
-+ LOGD(ERROR, guest_domid,
-+ "Could not find user %s or range base pseudo-user %s, cannot restrict",
-+ LIBXL_QEMU_USER_SHARED, LIBXL_QEMU_USER_RANGE_BASE);
-+
-+ return ERROR_INVAL;
-+}
-+
-+static int make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
-+ const char *bootloader_path)
- {
- const libxl_domain_build_info *info = bl->info;
-
-@@ -61,6 +125,23 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
- ARG(GCSPRINTF("--ramdisk=%s", info->ramdisk));
- if (info->cmdline && *info->cmdline != '\0')
- ARG(GCSPRINTF("--args=%s", info->cmdline));
-+ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
-+ getenv("LIBXL_BOOTLOADER_USER")) {
-+ uid_t uid = -1;
-+ int rc = bootloader_uid(gc, bl->domid, getenv("LIBXL_BOOTLOADER_USER"),
-+ &uid);
-+
-+ if (rc) return rc;
-+
-+ assert(uid != -1);
-+ if (!uid) {
-+ LOGD(ERROR, bl->domid, "bootloader restrict UID is 0 (root)!");
-+ return ERROR_INVAL;
-+ }
-+ LOGD(DEBUG, bl->domid, "using uid %ld", (long)uid);
-+ ARG(GCSPRINTF("--runas=%ld", (long)uid));
-+ ARG("--quiet");
-+ }
-
- ARG(GCSPRINTF("--output=%s", bl->outputpath));
- ARG("--output-format=simple0");
-@@ -79,6 +160,7 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
- /* Sentinel for execv */
- ARG(NULL);
-
-+ return 0;
- #undef ARG
- }
-
-@@ -443,7 +525,8 @@ static void bootloader_disk_attached_cb(libxl__egc *egc,
- bootloader = bltmp;
- }
-
-- make_bootloader_args(gc, bl, bootloader);
-+ rc = make_bootloader_args(gc, bl, bootloader);
-+ if (rc) goto out;
-
- bl->openpty.ao = ao;
- bl->openpty.callback = bootloader_gotptys;
-diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
-index fc264a3a13..14b593110f 100644
---- a/tools/libs/light/libxl_dm.c
-+++ b/tools/libs/light/libxl_dm.c
-@@ -80,10 +80,10 @@ static int libxl__create_qemu_logfile(libxl__gc *gc, char *name)
- * On error, return a libxl-style error code.
- */
- #define DEFINE_USERLOOKUP_HELPER(NAME,SPEC_TYPE,STRUCTNAME,SYSCONF) \
-- static int userlookup_helper_##NAME(libxl__gc *gc, \
-- SPEC_TYPE spec, \
-- struct STRUCTNAME *resultbuf, \
-- struct STRUCTNAME **out) \
-+ int userlookup_helper_##NAME(libxl__gc *gc, \
-+ SPEC_TYPE spec, \
-+ struct STRUCTNAME *resultbuf, \
-+ struct STRUCTNAME **out) \
- { \
- struct STRUCTNAME *resultp = NULL; \
- char *buf = NULL; \
-diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
-index 7ad38de30e..f1e3a9a15b 100644
---- a/tools/libs/light/libxl_internal.h
-+++ b/tools/libs/light/libxl_internal.h
-@@ -4873,6 +4873,14 @@ struct libxl__cpu_policy {
- struct xc_msr *msr;
- };
-
-+struct passwd;
-+_hidden int userlookup_helper_getpwnam(libxl__gc*, const char *user,
-+ struct passwd *res,
-+ struct passwd **out);
-+_hidden int userlookup_helper_getpwuid(libxl__gc*, uid_t uid,
-+ struct passwd *res,
-+ struct passwd **out);
-+
- #endif
-
- /*
---
-2.42.0
-
diff --git a/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch b/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch
deleted file mode 100644
index 8c790d3..0000000
--- a/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch
+++ /dev/null
@@ -1,158 +0,0 @@
-From 46d00dbf4c22b28910f73f66a03e5cabe50b5395 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Thu, 28 Sep 2023 12:22:35 +0200
-Subject: [PATCH 53/55] libxl: limit bootloader execution in restricted mode
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce a timeout for bootloader execution when running in restricted mode.
-
-Allow overwriting the default time out with an environment provided value.
-
-This is part of XSA-443 / CVE-2023-34325
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-(cherry picked from commit 9c114178ffd700112e91f5ec66cf5151b9c9a8cc)
----
- docs/man/xl.1.pod.in | 8 ++++++
- tools/libs/light/libxl_bootloader.c | 40 +++++++++++++++++++++++++++++
- tools/libs/light/libxl_internal.h | 2 ++
- 3 files changed, 50 insertions(+)
-
-diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
-index 4831e12242..c3eb6570ab 100644
---- a/docs/man/xl.1.pod.in
-+++ b/docs/man/xl.1.pod.in
-@@ -1988,6 +1988,14 @@ NOTE: Each domain MUST have a SEPARATE username.
-
- See docs/features/qemu-deprivilege.pandoc for more information.
-
-+=item LIBXL_BOOTLOADER_TIMEOUT
-+
-+Timeout in seconds for bootloader execution when running in restricted mode.
-+Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used.
-+
-+If defined the value must be an unsigned integer between 0 and INT_MAX,
-+otherwise behavior is undefined. Setting to 0 disables the timeout.
-+
- =back
-
- =head1 SEE ALSO
-diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
-index 23c0ef3e89..ee26d08f37 100644
---- a/tools/libs/light/libxl_bootloader.c
-+++ b/tools/libs/light/libxl_bootloader.c
-@@ -30,6 +30,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
- libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
- static void bootloader_display_copyfail(libxl__egc *egc,
- libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
-+static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
-+ const struct timeval *requested_abs, int rc);
- static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc,
- int rc);
- static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
-@@ -297,6 +299,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
- bl->ptys[0].master = bl->ptys[0].slave = 0;
- bl->ptys[1].master = bl->ptys[1].slave = 0;
- libxl__ev_child_init(&bl->child);
-+ libxl__ev_time_init(&bl->time);
- libxl__domaindeathcheck_init(&bl->deathcheck);
- bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes);
- bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display);
-@@ -314,6 +317,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
- libxl__domaindeathcheck_stop(gc,&bl->deathcheck);
- libxl__datacopier_kill(&bl->keystrokes);
- libxl__datacopier_kill(&bl->display);
-+ libxl__ev_time_deregister(gc, &bl->time);
- for (i=0; i<2; i++) {
- libxl__carefd_close(bl->ptys[i].master);
- libxl__carefd_close(bl->ptys[i].slave);
-@@ -375,6 +379,7 @@ static void bootloader_stop(libxl__egc *egc,
-
- libxl__datacopier_kill(&bl->keystrokes);
- libxl__datacopier_kill(&bl->display);
-+ libxl__ev_time_deregister(gc, &bl->time);
- if (libxl__ev_child_inuse(&bl->child)) {
- r = kill(bl->child.pid, SIGTERM);
- if (r) LOGED(WARN, bl->domid, "%sfailed to kill bootloader [%lu]",
-@@ -637,6 +642,25 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
-
- struct termios termattr;
-
-+ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
-+ getenv("LIBXL_BOOTLOADER_USER")) {
-+ const char *timeout_env = getenv("LIBXL_BOOTLOADER_TIMEOUT");
-+ int timeout = timeout_env ? atoi(timeout_env)
-+ : LIBXL_BOOTLOADER_TIMEOUT;
-+
-+ if (timeout) {
-+ /* Set execution timeout */
-+ rc = libxl__ev_time_register_rel(ao, &bl->time,
-+ bootloader_timeout,
-+ timeout * 1000);
-+ if (rc) {
-+ LOGED(ERROR, bl->domid,
-+ "unable to register timeout for bootloader execution");
-+ goto out;
-+ }
-+ }
-+ }
-+
- pid_t pid = libxl__ev_child_fork(gc, &bl->child, bootloader_finished);
- if (pid == -1) {
- rc = ERROR_FAIL;
-@@ -702,6 +726,21 @@ static void bootloader_display_copyfail(libxl__egc *egc,
- libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
- bootloader_copyfail(egc, "bootloader output", bl, 1, rc,onwrite,errnoval);
- }
-+static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
-+ const struct timeval *requested_abs, int rc)
-+{
-+ libxl__bootloader_state *bl = CONTAINER_OF(ev, *bl, time);
-+ STATE_AO_GC(bl->ao);
-+
-+ libxl__ev_time_deregister(gc, &bl->time);
-+
-+ assert(libxl__ev_child_inuse(&bl->child));
-+ LOGD(ERROR, bl->domid, "killing bootloader because of timeout");
-+
-+ libxl__ev_child_kill_deregister(ao, &bl->child, SIGKILL);
-+
-+ bootloader_callback(egc, bl, rc);
-+}
-
- static void bootloader_domaindeath(libxl__egc *egc,
- libxl__domaindeathcheck *dc,
-@@ -718,6 +757,7 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
- STATE_AO_GC(bl->ao);
- int rc;
-
-+ libxl__ev_time_deregister(gc, &bl->time);
- libxl__datacopier_kill(&bl->keystrokes);
- libxl__datacopier_kill(&bl->display);
-
-diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
-index f1e3a9a15b..d05783617f 100644
---- a/tools/libs/light/libxl_internal.h
-+++ b/tools/libs/light/libxl_internal.h
-@@ -102,6 +102,7 @@
- #define LIBXL_QMP_CMD_TIMEOUT 10
- #define LIBXL_STUBDOM_START_TIMEOUT 30
- #define LIBXL_QEMU_BODGE_TIMEOUT 2
-+#define LIBXL_BOOTLOADER_TIMEOUT 120
- #define LIBXL_XENCONSOLE_LIMIT 1048576
- #define LIBXL_XENCONSOLE_PROTOCOL "vt100"
- #define LIBXL_MAXMEM_CONSTANT 1024
-@@ -3744,6 +3745,7 @@ struct libxl__bootloader_state {
- libxl__openpty_state openpty;
- libxl__openpty_result ptys[2]; /* [0] is for bootloader */
- libxl__ev_child child;
-+ libxl__ev_time time;
- libxl__domaindeathcheck deathcheck;
- int nargs, argsspace;
- const char **args;
---
-2.42.0
-
diff --git a/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch b/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
deleted file mode 100644
index af72c9a..0000000
--- a/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
+++ /dev/null
@@ -1,104 +0,0 @@
-From 3f8b444072fd8615288d9d11e53fbf0b6a8a7750 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 26 Sep 2023 20:03:36 +0100
-Subject: [PATCH 54/55] x86/svm: Fix asymmetry with AMD DR MASK context
- switching
-
-The handling of MSR_DR{0..3}_MASK is asymmetric between PV and HVM guests.
-
-HVM guests context switch in based on the guest view of DBEXT, whereas PV
-guest switch in base on the host capability. Both guest types leave the
-context dirty for the next vCPU.
-
-This leads to the following issue:
-
- * PV or HVM vCPU has debugging active (%dr7 + mask)
- * Switch out deactivates %dr7 but leaves other state stale in hardware
- * HVM vCPU with debugging activate but can't see DBEXT is switched in
- * Switch in loads %dr7 but leaves the mask MSRs alone
-
-Now, the HVM vCPU is operating in the context of the prior vCPU's mask MSR,
-and furthermore in a case where it genuinely expects there to be no masking
-MSRs.
-
-As a stopgap, adjust the HVM path to switch in/out the masks based on host
-capabilities rather than guest visibility (i.e. like the PV path). Adjustment
-of the of the intercepts still needs to be dependent on the guest visibility
-of DBEXT.
-
-This is part of XSA-444 / CVE-2023-34327
-
-Fixes: c097f54912d3 ("x86/SVM: support data breakpoint extension registers")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 5d54282f984bb9a7a65b3d12208584f9fdf1c8e1)
----
- xen/arch/x86/hvm/svm/svm.c | 24 ++++++++++++++++++------
- xen/arch/x86/traps.c | 5 +++++
- 2 files changed, 23 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index e8f50e7c5e..fd32600ae3 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -339,6 +339,10 @@ static void svm_save_dr(struct vcpu *v)
- v->arch.hvm.flag_dr_dirty = 0;
- vmcb_set_dr_intercepts(vmcb, ~0u);
-
-+ /*
-+ * The guest can only have changed the mask MSRs if we previous dropped
-+ * intercepts. Re-read them from hardware.
-+ */
- if ( v->domain->arch.cpuid->extd.dbext )
- {
- svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
-@@ -370,17 +374,25 @@ static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
-
- ASSERT(v == current);
-
-- if ( v->domain->arch.cpuid->extd.dbext )
-+ /*
-+ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
-+ * context-switch-out. If we're activating %dr7 for the guest, we must
-+ * sync the DR_MASKs too, whether or not the guest can see them.
-+ */
-+ if ( boot_cpu_has(X86_FEATURE_DBEXT) )
- {
-- svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-- svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-- svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-- svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
--
- wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.msrs->dr_mask[0]);
- wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.msrs->dr_mask[1]);
- wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.msrs->dr_mask[2]);
- wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.msrs->dr_mask[3]);
-+
-+ if ( v->domain->arch.cpuid->extd.dbext )
-+ {
-+ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-+ svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-+ svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-+ svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
-+ }
- }
-
- write_debugreg(0, v->arch.dr[0]);
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index e65cc60041..06c4f3868b 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -2281,6 +2281,11 @@ void activate_debugregs(const struct vcpu *curr)
- if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
- write_debugreg(7, curr->arch.dr7);
-
-+ /*
-+ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
-+ * context-switch-out. If we're activating %dr7 for the guest, we must
-+ * sync the DR_MASKs too, whether or not the guest can see them.
-+ */
- if ( boot_cpu_has(X86_FEATURE_DBEXT) )
- {
- wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.msrs->dr_mask[0]);
---
-2.42.0
-
diff --git a/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch b/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
deleted file mode 100644
index 5838e7f..0000000
--- a/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From 0b56bed864ca9b572473957f0254aefa797216f2 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 26 Sep 2023 20:03:36 +0100
-Subject: [PATCH 55/55] x86/pv: Correct the auditing of guest breakpoint
- addresses
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The use of access_ok() is buggy, because it permits access to the compat
-translation area. 64bit PV guests don't use the XLAT area, but on AMD
-hardware, the DBEXT feature allows a breakpoint to match up to a 4G aligned
-region, allowing the breakpoint to reach outside of the XLAT area.
-
-Prior to c/s cda16c1bb223 ("x86: mirror compat argument translation area for
-32-bit PV"), the live GDT was within 4G of the XLAT area.
-
-All together, this allowed a malicious 64bit PV guest on AMD hardware to place
-a breakpoint over the live GDT, and trigger a #DB livelock (CVE-2015-8104).
-
-Introduce breakpoint_addr_ok() and explain why __addr_ok() happens to be an
-appropriate check in this case.
-
-For Xen 4.14 and later, this is a latent bug because the XLAT area has moved
-to be on its own with nothing interesting adjacent. For Xen 4.13 and older on
-AMD hardware, this fixes a PV-trigger-able DoS.
-
-This is part of XSA-444 / CVE-2023-34328.
-
-Fixes: 65e355490817 ("x86/PV: support data breakpoint extension registers")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit dc9d9aa62ddeb14abd5672690d30789829f58f7e)
----
- xen/arch/x86/include/asm/debugreg.h | 20 ++++++++++++++++++++
- xen/arch/x86/pv/misc-hypercalls.c | 2 +-
- 2 files changed, 21 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/include/asm/debugreg.h b/xen/arch/x86/include/asm/debugreg.h
-index c57914efc6..cc29826524 100644
---- a/xen/arch/x86/include/asm/debugreg.h
-+++ b/xen/arch/x86/include/asm/debugreg.h
-@@ -77,6 +77,26 @@
- asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \
- __val; \
- })
-+
-+/*
-+ * Architecturally, %dr{0..3} can have any arbitrary value. However, Xen
-+ * can't allow the guest to breakpoint the Xen address range, so we limit the
-+ * guest to the lower canonical half, or above the Xen range in the higher
-+ * canonical half.
-+ *
-+ * Breakpoint lengths are specified to mask the low order address bits,
-+ * meaning all breakpoints are naturally aligned. With %dr7, the widest
-+ * breakpoint is 8 bytes. With DBEXT, the widest breakpoint is 4G. Both of
-+ * the Xen boundaries have >4G alignment.
-+ *
-+ * In principle we should account for HYPERVISOR_COMPAT_VIRT_START(d), but
-+ * 64bit Xen has never enforced this for compat guests, and there's no problem
-+ * (to Xen) if the guest breakpoints it's alias of the M2P. Skipping this
-+ * aspect simplifies the logic, and causes us not to reject a migrating guest
-+ * which operated fine on prior versions of Xen.
-+ */
-+#define breakpoint_addr_ok(a) __addr_ok(a)
-+
- long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
- void activate_debugregs(const struct vcpu *);
-
-diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
-index aaaf70eb63..f8636de907 100644
---- a/xen/arch/x86/pv/misc-hypercalls.c
-+++ b/xen/arch/x86/pv/misc-hypercalls.c
-@@ -72,7 +72,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- switch ( reg )
- {
- case 0 ... 3:
-- if ( !access_ok(value, sizeof(long)) )
-+ if ( !breakpoint_addr_ok(value) )
- return -EPERM;
-
- v->arch.dr[reg] = value;
---
-2.42.0
-
diff --git a/info.txt b/info.txt
index 26a1905..0a99509 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0 for 4.17.3-pre
+Xen upstream patchset #0 for 4.17.4-pre
Containing patches from
-RELEASE-4.17.2 (b86c313a4a9c3ec4c9f825d9b99131753296485f)
+RELEASE-4.17.3 (07f413d7ffb06eab36045bd19f53555de1cacf62)
to
-staging-4.17 (0b56bed864ca9b572473957f0254aefa797216f2)
+staging-4.17 (091466ba55d1e2e75738f751818ace2e3ed08ccf)