diff options
Diffstat (limited to '0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch')
-rw-r--r-- | 0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch | 279 |
1 files changed, 279 insertions, 0 deletions
diff --git a/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch new file mode 100644 index 0000000..62be72a --- /dev/null +++ b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch @@ -0,0 +1,279 @@ +From 1e26afa846fb9a00b9155280eeae3b8cb8375dd6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Tue, 11 Oct 2022 15:08:14 +0200 +Subject: [PATCH 46/67] x86/shadow: tolerate failure in shadow_prealloc() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Prevent _shadow_prealloc() from calling BUG() when unable to fulfill +the pre-allocation and instead return true/false. Modify +shadow_prealloc() to crash the domain on allocation failure (if the +domain is not already dying), as shadow cannot operate normally after +that. Modify callers to also gracefully handle {_,}shadow_prealloc() +failing to fulfill the request. + +Note this in turn requires adjusting the callers of +sh_make_monitor_table() also to handle it returning INVALID_MFN. +sh_update_paging_modes() is also modified to add additional error +paths in case of allocation failure, some of those will return with +null monitor page tables (and the domain likely crashed). This is no +different that current error paths, but the newly introduced ones are +more likely to trigger. + +The now added failure points in sh_update_paging_modes() also require +that on some error return paths the previous structures are cleared, +and thus monitor table is null. + +While there adjust the 'type' parameter type of shadow_prealloc() to +unsigned int rather than u32. + +This is part of CVE-2022-33746 / XSA-410. + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Tim Deegan <tim@xen.org> +master commit: b7f93c6afb12b6061e2d19de2f39ea09b569ac68 +master date: 2022-10-11 14:22:53 +0200 +--- + xen/arch/x86/mm/shadow/common.c | 69 ++++++++++++++++++++++++-------- + xen/arch/x86/mm/shadow/hvm.c | 4 +- + xen/arch/x86/mm/shadow/multi.c | 11 +++-- + xen/arch/x86/mm/shadow/private.h | 3 +- + 4 files changed, 66 insertions(+), 21 deletions(-) + +diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c +index 0ab2ac6b7a3c..fc4f7f78ce43 100644 +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -36,6 +36,7 @@ + #include <asm/flushtlb.h> + #include <asm/shadow.h> + #include <xen/numa.h> ++#include <public/sched.h> + #include "private.h" + + DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags); +@@ -927,14 +928,15 @@ static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn) + + /* Make sure there are at least count order-sized pages + * available in the shadow page pool. */ +-static void _shadow_prealloc(struct domain *d, unsigned int pages) ++static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages) + { + struct vcpu *v; + struct page_info *sp, *t; + mfn_t smfn; + int i; + +- if ( d->arch.paging.shadow.free_pages >= pages ) return; ++ if ( d->arch.paging.shadow.free_pages >= pages ) ++ return true; + + /* Shouldn't have enabled shadows if we've no vcpus. */ + ASSERT(d->vcpu && d->vcpu[0]); +@@ -950,7 +952,8 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) + sh_unpin(d, smfn); + + /* See if that freed up enough space */ +- if ( d->arch.paging.shadow.free_pages >= pages ) return; ++ if ( d->arch.paging.shadow.free_pages >= pages ) ++ return true; + } + + /* Stage two: all shadow pages are in use in hierarchies that are +@@ -973,7 +976,7 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) + if ( d->arch.paging.shadow.free_pages >= pages ) + { + guest_flush_tlb_mask(d, d->dirty_cpumask); +- return; ++ return true; + } + } + } +@@ -986,7 +989,12 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); +- BUG(); ++ ++ ASSERT(d->is_dying); ++ ++ guest_flush_tlb_mask(d, d->dirty_cpumask); ++ ++ return false; + } + + /* Make sure there are at least count pages of the order according to +@@ -994,9 +1002,19 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) + * This must be called before any calls to shadow_alloc(). Since this + * will free existing shadows to make room, it must be called early enough + * to avoid freeing shadows that the caller is currently working on. */ +-void shadow_prealloc(struct domain *d, u32 type, unsigned int count) ++bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count) + { +- return _shadow_prealloc(d, shadow_size(type) * count); ++ bool ret = _shadow_prealloc(d, shadow_size(type) * count); ++ ++ if ( !ret && !d->is_dying && ++ (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) ) ++ /* ++ * Failing to allocate memory required for shadow usage can only result in ++ * a domain crash, do it here rather that relying on every caller to do it. ++ */ ++ domain_crash(d); ++ ++ return ret; + } + + /* Deliberately free all the memory we can: this will tear down all of +@@ -1215,7 +1233,7 @@ void shadow_free(struct domain *d, mfn_t smfn) + static struct page_info * + shadow_alloc_p2m_page(struct domain *d) + { +- struct page_info *pg; ++ struct page_info *pg = NULL; + + /* This is called both from the p2m code (which never holds the + * paging lock) and the log-dirty code (which always does). */ +@@ -1233,16 +1251,18 @@ shadow_alloc_p2m_page(struct domain *d) + d->arch.paging.shadow.p2m_pages, + shadow_min_acceptable_pages(d)); + } +- paging_unlock(d); +- return NULL; ++ goto out; + } + +- shadow_prealloc(d, SH_type_p2m_table, 1); ++ if ( !shadow_prealloc(d, SH_type_p2m_table, 1) ) ++ goto out; ++ + pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); + d->arch.paging.shadow.p2m_pages++; + d->arch.paging.shadow.total_pages--; + ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + ++ out: + paging_unlock(d); + + return pg; +@@ -1333,7 +1353,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages, bool *preempted) + else if ( d->arch.paging.shadow.total_pages > pages ) + { + /* Need to return memory to domheap */ +- _shadow_prealloc(d, 1); ++ if ( !_shadow_prealloc(d, 1) ) ++ return -ENOMEM; ++ + sp = page_list_remove_head(&d->arch.paging.shadow.freelist); + ASSERT(sp); + /* +@@ -2401,12 +2423,13 @@ static void sh_update_paging_modes(struct vcpu *v) + if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) ) + { + int i; ++ ++ if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) ) ++ return; ++ + for(i = 0; i < SHADOW_OOS_PAGES; i++) +- { +- shadow_prealloc(d, SH_type_oos_snapshot, 1); + v->arch.paging.shadow.oos_snapshot[i] = + shadow_alloc(d, SH_type_oos_snapshot, 0); +- } + } + #endif /* OOS */ + +@@ -2470,6 +2493,9 @@ static void sh_update_paging_modes(struct vcpu *v) + mfn_t mmfn = sh_make_monitor_table( + v, v->arch.paging.mode->shadow.shadow_levels); + ++ if ( mfn_eq(mmfn, INVALID_MFN) ) ++ return; ++ + v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn); + make_cr3(v, mmfn); + hvm_update_host_cr3(v); +@@ -2508,6 +2534,12 @@ static void sh_update_paging_modes(struct vcpu *v) + v->arch.hvm.monitor_table = pagetable_null(); + new_mfn = sh_make_monitor_table( + v, v->arch.paging.mode->shadow.shadow_levels); ++ if ( mfn_eq(new_mfn, INVALID_MFN) ) ++ { ++ sh_destroy_monitor_table(v, old_mfn, ++ old_mode->shadow.shadow_levels); ++ return; ++ } + v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn); + SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", + mfn_x(new_mfn)); +@@ -2593,7 +2625,12 @@ void sh_set_toplevel_shadow(struct vcpu *v, + if ( !mfn_valid(smfn) ) + { + /* Make sure there's enough free shadow memory. */ +- shadow_prealloc(d, root_type, 1); ++ if ( !shadow_prealloc(d, root_type, 1) ) ++ { ++ new_entry = pagetable_null(); ++ goto install_new_entry; ++ } ++ + /* Shadow the page. */ + smfn = make_shadow(v, gmfn, root_type); + } +diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c +index 87fc57704f25..d68796c495b7 100644 +--- a/xen/arch/x86/mm/shadow/hvm.c ++++ b/xen/arch/x86/mm/shadow/hvm.c +@@ -700,7 +700,9 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels) + ASSERT(!pagetable_get_pfn(v->arch.hvm.monitor_table)); + + /* Guarantee we can get the memory we need */ +- shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS); ++ if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) ) ++ return INVALID_MFN; ++ + m4mfn = shadow_alloc(d, SH_type_monitor_table, 0); + mfn_to_page(m4mfn)->shadow_flags = 4; + +diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c +index 7e0494cf7faa..6a9f82d39ce6 100644 +--- a/xen/arch/x86/mm/shadow/multi.c ++++ b/xen/arch/x86/mm/shadow/multi.c +@@ -2825,9 +2825,14 @@ static int sh_page_fault(struct vcpu *v, + * Preallocate shadow pages *before* removing writable accesses + * otherwhise an OOS L1 might be demoted and promoted again with + * writable mappings. */ +- shadow_prealloc(d, +- SH_type_l1_shadow, +- GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1); ++ if ( !shadow_prealloc(d, SH_type_l1_shadow, ++ GUEST_PAGING_LEVELS < 4 ++ ? 1 : GUEST_PAGING_LEVELS - 1) ) ++ { ++ paging_unlock(d); ++ put_gfn(d, gfn_x(gfn)); ++ return 0; ++ } + + rc = gw_remove_write_accesses(v, va, &gw); + +diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h +index 911db46e7399..3fe0388e7c4f 100644 +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -351,7 +351,8 @@ void shadow_promote(struct domain *d, mfn_t gmfn, u32 type); + void shadow_demote(struct domain *d, mfn_t gmfn, u32 type); + + /* Shadow page allocation functions */ +-void shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count); ++bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type, ++ unsigned int count); + mfn_t shadow_alloc(struct domain *d, + u32 shadow_type, + unsigned long backpointer); +-- +2.37.3 + |