diff options
Diffstat (limited to '0060-xen-sched-fix-cpu-hotplug.patch')
-rw-r--r-- | 0060-xen-sched-fix-cpu-hotplug.patch | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/0060-xen-sched-fix-cpu-hotplug.patch b/0060-xen-sched-fix-cpu-hotplug.patch new file mode 100644 index 0000000..3e158f4 --- /dev/null +++ b/0060-xen-sched-fix-cpu-hotplug.patch @@ -0,0 +1,307 @@ +From d17680808b4c8015e31070c971e1ee548170ae34 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Tue, 11 Oct 2022 15:15:41 +0200 +Subject: [PATCH 60/67] xen/sched: fix cpu hotplug + +Cpu unplugging is calling schedule_cpu_rm() via stop_machine_run() with +interrupts disabled, thus any memory allocation or freeing must be +avoided. + +Since commit 5047cd1d5dea ("xen/common: Use enhanced +ASSERT_ALLOC_CONTEXT in xmalloc()") this restriction is being enforced +via an assertion, which will now fail. + +Fix this by allocating needed memory before entering stop_machine_run() +and freeing any memory only after having finished stop_machine_run(). + +Fixes: 1ec410112cdd ("xen/sched: support differing granularity in schedule_cpu_[add/rm]()") +Reported-by: Gao Ruifeng <ruifeng.gao@intel.com> +Signed-off-by: Juergen Gross <jgross@suse.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +Tested-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: d84473689611eed32fd90b27e614f28af767fa3f +master date: 2022-09-05 11:42:30 +0100 +--- + xen/common/sched/core.c | 25 +++++++++++--- + xen/common/sched/cpupool.c | 69 +++++++++++++++++++++++++++++--------- + xen/common/sched/private.h | 5 +-- + 3 files changed, 77 insertions(+), 22 deletions(-) + +diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c +index 2decb1161a63..900aab8f66a7 100644 +--- a/xen/common/sched/core.c ++++ b/xen/common/sched/core.c +@@ -3231,7 +3231,7 @@ out: + * by alloc_cpu_rm_data() is modified only in case the cpu in question is + * being moved from or to a cpupool. + */ +-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu) ++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc) + { + struct cpu_rm_data *data; + const struct sched_resource *sr; +@@ -3244,6 +3244,17 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu) + if ( !data ) + goto out; + ++ if ( aff_alloc ) ++ { ++ if ( !alloc_affinity_masks(&data->affinity) ) ++ { ++ XFREE(data); ++ goto out; ++ } ++ } ++ else ++ memset(&data->affinity, 0, sizeof(data->affinity)); ++ + data->old_ops = sr->scheduler; + data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv; + data->ppriv_old = sr->sched_priv; +@@ -3264,6 +3275,7 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu) + { + while ( idx > 0 ) + sched_res_free(&data->sr[--idx]->rcu); ++ free_affinity_masks(&data->affinity); + XFREE(data); + goto out; + } +@@ -3286,6 +3298,7 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu) + { + sched_free_udata(mem->old_ops, mem->vpriv_old); + sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu); ++ free_affinity_masks(&mem->affinity); + + xfree(mem); + } +@@ -3296,17 +3309,18 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu) + * The cpu is already marked as "free" and not valid any longer for its + * cpupool. + */ +-int schedule_cpu_rm(unsigned int cpu) ++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data) + { + struct sched_resource *sr; +- struct cpu_rm_data *data; + struct sched_unit *unit; + spinlock_t *old_lock; + unsigned long flags; + int idx = 0; + unsigned int cpu_iter; ++ bool free_data = !data; + +- data = alloc_cpu_rm_data(cpu); ++ if ( !data ) ++ data = alloc_cpu_rm_data(cpu, false); + if ( !data ) + return -ENOMEM; + +@@ -3374,7 +3388,8 @@ int schedule_cpu_rm(unsigned int cpu) + sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu); + + rcu_read_unlock(&sched_res_rculock); +- free_cpu_rm_data(data, cpu); ++ if ( free_data ) ++ free_cpu_rm_data(data, cpu); + + return 0; + } +diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c +index 45b6ff99561a..b5a948639aad 100644 +--- a/xen/common/sched/cpupool.c ++++ b/xen/common/sched/cpupool.c +@@ -402,22 +402,28 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c) + } + + /* Update affinities of all domains in a cpupool. */ +-static void cpupool_update_node_affinity(const struct cpupool *c) ++static void cpupool_update_node_affinity(const struct cpupool *c, ++ struct affinity_masks *masks) + { +- struct affinity_masks masks; ++ struct affinity_masks local_masks; + struct domain *d; + +- if ( !alloc_affinity_masks(&masks) ) +- return; ++ if ( !masks ) ++ { ++ if ( !alloc_affinity_masks(&local_masks) ) ++ return; ++ masks = &local_masks; ++ } + + rcu_read_lock(&domlist_read_lock); + + for_each_domain_in_cpupool(d, c) +- domain_update_node_aff(d, &masks); ++ domain_update_node_aff(d, masks); + + rcu_read_unlock(&domlist_read_lock); + +- free_affinity_masks(&masks); ++ if ( masks == &local_masks ) ++ free_affinity_masks(masks); + } + + /* +@@ -451,15 +457,17 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) + + rcu_read_unlock(&sched_res_rculock); + +- cpupool_update_node_affinity(c); ++ cpupool_update_node_affinity(c, NULL); + + return 0; + } + +-static int cpupool_unassign_cpu_finish(struct cpupool *c) ++static int cpupool_unassign_cpu_finish(struct cpupool *c, ++ struct cpu_rm_data *mem) + { + int cpu = cpupool_moving_cpu; + const cpumask_t *cpus; ++ struct affinity_masks *masks = mem ? &mem->affinity : NULL; + int ret; + + if ( c != cpupool_cpu_moving ) +@@ -482,7 +490,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c) + */ + if ( !ret ) + { +- ret = schedule_cpu_rm(cpu); ++ ret = schedule_cpu_rm(cpu, mem); + if ( ret ) + cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus); + else +@@ -494,7 +502,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c) + } + rcu_read_unlock(&sched_res_rculock); + +- cpupool_update_node_affinity(c); ++ cpupool_update_node_affinity(c, masks); + + return ret; + } +@@ -558,7 +566,7 @@ static long cpupool_unassign_cpu_helper(void *info) + cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu); + spin_lock(&cpupool_lock); + +- ret = cpupool_unassign_cpu_finish(c); ++ ret = cpupool_unassign_cpu_finish(c, NULL); + + spin_unlock(&cpupool_lock); + debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret); +@@ -701,7 +709,7 @@ static int cpupool_cpu_add(unsigned int cpu) + * This function is called in stop_machine context, so we can be sure no + * non-idle vcpu is active on the system. + */ +-static void cpupool_cpu_remove(unsigned int cpu) ++static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem) + { + int ret; + +@@ -709,7 +717,7 @@ static void cpupool_cpu_remove(unsigned int cpu) + + if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) ) + { +- ret = cpupool_unassign_cpu_finish(cpupool0); ++ ret = cpupool_unassign_cpu_finish(cpupool0, mem); + BUG_ON(ret); + } + cpumask_clear_cpu(cpu, &cpupool_free_cpus); +@@ -775,7 +783,7 @@ static void cpupool_cpu_remove_forced(unsigned int cpu) + { + ret = cpupool_unassign_cpu_start(c, master_cpu); + BUG_ON(ret); +- ret = cpupool_unassign_cpu_finish(c); ++ ret = cpupool_unassign_cpu_finish(c, NULL); + BUG_ON(ret); + } + } +@@ -993,12 +1001,24 @@ void dump_runq(unsigned char key) + static int cpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) + { ++ static struct cpu_rm_data *mem; ++ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_DOWN_FAILED: ++ if ( system_state <= SYS_STATE_active ) ++ { ++ if ( mem ) ++ { ++ free_cpu_rm_data(mem, cpu); ++ mem = NULL; ++ } ++ rc = cpupool_cpu_add(cpu); ++ } ++ break; + case CPU_ONLINE: + if ( system_state <= SYS_STATE_active ) + rc = cpupool_cpu_add(cpu); +@@ -1006,12 +1026,31 @@ static int cpu_callback( + case CPU_DOWN_PREPARE: + /* Suspend/Resume don't change assignments of cpus to cpupools. */ + if ( system_state <= SYS_STATE_active ) ++ { + rc = cpupool_cpu_remove_prologue(cpu); ++ if ( !rc ) ++ { ++ ASSERT(!mem); ++ mem = alloc_cpu_rm_data(cpu, true); ++ rc = mem ? 0 : -ENOMEM; ++ } ++ } + break; + case CPU_DYING: + /* Suspend/Resume don't change assignments of cpus to cpupools. */ + if ( system_state <= SYS_STATE_active ) +- cpupool_cpu_remove(cpu); ++ { ++ ASSERT(mem); ++ cpupool_cpu_remove(cpu, mem); ++ } ++ break; ++ case CPU_DEAD: ++ if ( system_state <= SYS_STATE_active ) ++ { ++ ASSERT(mem); ++ free_cpu_rm_data(mem, cpu); ++ mem = NULL; ++ } + break; + case CPU_RESUME_FAILED: + cpupool_cpu_remove_forced(cpu); +diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h +index ff3185425219..3bab78ccb240 100644 +--- a/xen/common/sched/private.h ++++ b/xen/common/sched/private.h +@@ -603,6 +603,7 @@ void free_affinity_masks(struct affinity_masks *affinity); + + /* Memory allocation related data for schedule_cpu_rm(). */ + struct cpu_rm_data { ++ struct affinity_masks affinity; + const struct scheduler *old_ops; + void *ppriv_old; + void *vpriv_old; +@@ -617,9 +618,9 @@ struct scheduler *scheduler_alloc(unsigned int sched_id); + void scheduler_free(struct scheduler *sched); + int cpu_disable_scheduler(unsigned int cpu); + int schedule_cpu_add(unsigned int cpu, struct cpupool *c); +-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu); ++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc); + void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu); +-int schedule_cpu_rm(unsigned int cpu); ++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *mem); + int sched_move_domain(struct domain *d, struct cpupool *c); + struct cpupool *cpupool_get_by_id(unsigned int poolid); + void cpupool_put(struct cpupool *pool); +-- +2.37.3 + |