diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 8845418d5cb0c8e79f1a40b9c4cf0a6eda57a7a4..0945e70d92a0eb051d6a565d2f736f00f4c4cf1e 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -1,8 +1,8 @@
 # Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
 # Maintainer: Natanael Copa <ncopa@alpinelinux.org>
 pkgname=xen
-pkgver=4.18.0
-pkgrel=5
+pkgver=4.18.2
+pkgrel=0
 pkgdesc="Xen hypervisor"
 url="https://www.xenproject.org/"
 arch="x86_64 armv7 aarch64"
@@ -367,6 +367,10 @@ options="!strip"
 #   4.18.0-r5:
 #     - CVE-2023-28746 XSA-452
 #     - CVE-2024-2193 XSA-453
+#   4.18.2-r0:
+#     - CVE-2023-46842 XSA-454
+#     - CVE-2024-31142 XSA-455
+#     - CVE-2024-2201 XSA-456
 
 case "$CARCH" in
 x86*)
@@ -412,8 +416,6 @@ source="https://downloads.xenproject.org/release/xen/$pkgver/xen-$pkgver.tar.gz
 	https://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz
 	https://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz
 
-	xen-stable-4.18-20240312.patch
-
 	mini-os-__divmoddi4.patch
 	qemu-xen_paths.patch
 
@@ -700,8 +702,7 @@ qemu_openrc() {
 }
 
 sha512sums="
-4cc9fd155144045a173c5f8ecc45f149817f1034eec618cb6f8b0494ef2fb5b95c4c60cf0bf4bec4bef8a622c35b6a3cb7dedc38e6d95e726f1611c73ddb3273  xen-4.18.0.tar.gz
-8df958195290a39b54493766e7555d71c68083d75edd13a2f77ad237d6b6fb52bce816b9e975c0c14024a01042e599415360dcf475f7d2e0c6bee8f9fd2ed6ef  xen-stable-4.18-20240312.patch
+c5feb450155883b5d2e7f43b05a64e7215b661b7d2f438d8f5a0896bd57283379ee11ca8e2e7a1d8787813cc6f1a260253fcb8688ed7d61a2bfb636db1626941  xen-4.18.2.tar.gz
 2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf  gmp-4.3.2.tar.bz2
 c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb  grub-0.97.tar.gz
 1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d  lwip-1.3.0.tar.gz
diff --git a/main/xen/xen-stable-4.18-20240312.patch b/main/xen/xen-stable-4.18-20240312.patch
deleted file mode 100644
index 78d40c444599019de20dfbd739a99b80d92d4ea8..0000000000000000000000000000000000000000
--- a/main/xen/xen-stable-4.18-20240312.patch
+++ /dev/null
@@ -1,8490 +0,0 @@
-From 52be29df793f282822436c8c13e0948a01aee1ad Mon Sep 17 00:00:00 2001
-From: Tamas K Lengyel <tamas@tklengyel.com>
-Date: Thu, 23 Nov 2023 12:10:46 +0100
-Subject: [PATCH 01/70] x86/mem_sharing: add missing m2p entry when mapping
- shared_info page
-
-When mapping in the shared_info page to a fork the m2p entry wasn't set
-resulting in the shared_info being reset even when the fork reset was called
-with only reset_state and not reset_memory. This results in an extra
-unnecessary TLB flush.
-
-Fixes: 1a0000ac775 ("mem_sharing: map shared_info page to same gfn during fork")
-Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 23eb39acf011ef9bbe02ed4619c55f208fbcd39b
-master date: 2023-10-31 16:10:14 +0000
----
- xen/arch/x86/mm/mem_sharing.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
-index 94b6b782ef..142258f16a 100644
---- a/xen/arch/x86/mm/mem_sharing.c
-+++ b/xen/arch/x86/mm/mem_sharing.c
-@@ -1847,6 +1847,8 @@ static int copy_special_pages(struct domain *cd, struct domain *d)
-                                 p2m_ram_rw, p2m->default_access, -1);
-             if ( rc )
-                 return rc;
-+
-+            set_gpfn_from_mfn(mfn_x(new_mfn), gfn_x(old_gfn));
-         }
-     }
- 
--- 
-2.44.0
-
-
-From 880e06fdea401493a3f408deb0f411f7aeccee27 Mon Sep 17 00:00:00 2001
-From: David Woodhouse <dwmw@amazon.co.uk>
-Date: Thu, 23 Nov 2023 12:11:21 +0100
-Subject: [PATCH 02/70] x86/pv-shim: fix grant table operations for 32-bit
- guests
-
-When switching to call the shim functions from the normal handlers, the
-compat_grant_table_op() function was omitted, leaving it calling the
-real grant table operations in !PV_SHIM_EXCLUSIVE builds. This leaves a
-32-bit shim guest failing to set up its real grant table with the parent
-hypervisor.
-
-Fixes: e7db635f4428 ("x86/pv-shim: Don't modify the hypercall table")
-Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 93ec30bc545f15760039c23ee4b97b80c0b3b3b3
-master date: 2023-10-31 16:10:14 +0000
----
- xen/common/compat/grant_table.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c
-index e00bc24a34..af98eade17 100644
---- a/xen/common/compat/grant_table.c
-+++ b/xen/common/compat/grant_table.c
-@@ -63,6 +63,11 @@ int compat_grant_table_op(
-     unsigned int i, cmd_op;
-     XEN_GUEST_HANDLE_PARAM(void) cnt_uop;
- 
-+#ifdef CONFIG_PV_SHIM
-+    if ( unlikely(pv_shim) )
-+        return pv_shim_grant_table_op(cmd, uop, count);
-+#endif
-+
-     set_xen_guest_handle(cnt_uop, NULL);
-     cmd_op = cmd & GNTTABOP_CMD_MASK;
-     if ( cmd_op != GNTTABOP_cache_flush )
--- 
-2.44.0
-
-
-From 9e8edd4c75564530a6fb98f5abba267edb906313 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 23 Nov 2023 12:12:18 +0100
-Subject: [PATCH 03/70] x86/x2apic: remove usage of ACPI_FADT_APIC_CLUSTER
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The ACPI FADT APIC_CLUSTER flag mandates that when the interrupt delivery is
-Logical mode APIC must be configured for Cluster destination model.  However in
-apic_x2apic_probe() such flag is incorrectly used to gate whether Physical mode
-can be used.
-
-Since Xen when in x2APIC mode only uses Logical mode together with Cluster
-model completely remove checking for ACPI_FADT_APIC_CLUSTER, as Xen always
-fulfills the requirement signaled by the flag.
-
-Fixes: eb40ae41b658 ('x86/Kconfig: add option for default x2APIC destination mode')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 26a449ce32cef33f2cb50602be19fcc0c4223ba9
-master date: 2023-11-02 10:50:26 +0100
----
- xen/arch/x86/genapic/x2apic.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
-index ca1db27157..707deef98c 100644
---- a/xen/arch/x86/genapic/x2apic.c
-+++ b/xen/arch/x86/genapic/x2apic.c
-@@ -231,8 +231,7 @@ const struct genapic *__init apic_x2apic_probe(void)
-          */
-         x2apic_phys = iommu_intremap != iommu_intremap_full ||
-                       (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) ||
--                      (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) &&
--                       !(acpi_gbl_FADT.flags & ACPI_FADT_APIC_CLUSTER));
-+                      IS_ENABLED(CONFIG_X2APIC_PHYSICAL);
-     }
-     else if ( !x2apic_phys )
-         switch ( iommu_intremap )
--- 
-2.44.0
-
-
-From fcb1016bbd476e17c72b1837ae2a3eaac517fa52 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 23 Nov 2023 12:12:47 +0100
-Subject: [PATCH 04/70] x86/i8259: do not assume interrupts always target CPU0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Sporadically we have seen the following during AP bringup on AMD platforms
-only:
-
-microcode: CPU59 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
-microcode: CPU60 updated from revision 0x830104d to 0x830107a, date = 2023-05-17
-CPU60: No irq handler for vector 27 (IRQ -2147483648)
-microcode: CPU61 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
-
-This is similar to the issue raised on Linux commit 36e9e1eab777e, where they
-observed i8259 (active) vectors getting delivered to CPUs different than 0.
-
-On AMD or Hygon platforms adjust the target CPU mask of i8259 interrupt
-descriptors to contain all possible CPUs, so that APs will reserve the vector
-at startup if any legacy IRQ is still delivered through the i8259.  Note that
-if the IO-APIC takes over those interrupt descriptors the CPU mask will be
-reset.
-
-Spurious i8259 interrupt vectors however (IRQ7 and IRQ15) can be injected even
-when all i8259 pins are masked, and hence would need to be handled on all CPUs.
-
-Continue to reserve PIC vectors on CPU0 only, but do check for such spurious
-interrupts on all CPUs if the vendor is AMD or Hygon.  Note that once the
-vectors get used by devices detecting PIC spurious interrupts will no longer be
-possible, however the device driver should be able to cope with spurious
-interrupts.  Such PIC spurious interrupts occurring when the vector is in use
-by a local APIC routed source will lead to an extra EOI, which might
-unintentionally clear a different vector from ISR.  Note this is already the
-current behavior, so assume it's infrequent enough to not cause real issues.
-
-Finally, adjust the printed message to display the CPU where the spurious
-interrupt has been received, so it looks like:
-
-microcode: CPU1 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
-cpu1: spurious 8259A interrupt: IRQ7
-microcode: CPU2 updated from revision 0x830104d to 0x830107a, date = 2023-05-17
-
-Amends: 3fba06ba9f8b ('x86/IRQ: re-use legacy vector ranges on APs')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 87f37449d586b4d407b75235bb0a171e018e25ec
-master date: 2023-11-02 10:50:59 +0100
----
- xen/arch/x86/i8259.c | 21 +++++++++++++++++++--
- xen/arch/x86/irq.c   | 11 ++++++++++-
- 2 files changed, 29 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c
-index ed9f55abe5..e0fa1f96b4 100644
---- a/xen/arch/x86/i8259.c
-+++ b/xen/arch/x86/i8259.c
-@@ -222,7 +222,8 @@ static bool _mask_and_ack_8259A_irq(unsigned int irq)
-         is_real_irq = false;
-         /* Report spurious IRQ, once per IRQ line. */
-         if (!(spurious_irq_mask & irqmask)) {
--            printk("spurious 8259A interrupt: IRQ%d.\n", irq);
-+            printk("cpu%u: spurious 8259A interrupt: IRQ%u\n",
-+                   smp_processor_id(), irq);
-             spurious_irq_mask |= irqmask;
-         }
-         /*
-@@ -349,7 +350,23 @@ void __init init_IRQ(void)
-             continue;
-         desc->handler = &i8259A_irq_type;
-         per_cpu(vector_irq, cpu)[LEGACY_VECTOR(irq)] = irq;
--        cpumask_copy(desc->arch.cpu_mask, cpumask_of(cpu));
-+
-+        /*
-+         * The interrupt affinity logic never targets interrupts to offline
-+         * CPUs, hence it's safe to use cpumask_all here.
-+         *
-+         * Legacy PIC interrupts are only targeted to CPU0, but depending on
-+         * the platform they can be distributed to any online CPU in hardware.
-+         * Note this behavior has only been observed on AMD hardware. In order
-+         * to cope install all active legacy vectors on all CPUs.
-+         *
-+         * IO-APIC will change the destination mask if/when taking ownership of
-+         * the interrupt.
-+         */
-+        cpumask_copy(desc->arch.cpu_mask,
-+                     (boot_cpu_data.x86_vendor &
-+                      (X86_VENDOR_AMD | X86_VENDOR_HYGON) ? &cpumask_all
-+                                                          : cpumask_of(cpu)));
-         desc->arch.vector = LEGACY_VECTOR(irq);
-     }
-     
-diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
-index f42ad539dc..16d9fceba1 100644
---- a/xen/arch/x86/irq.c
-+++ b/xen/arch/x86/irq.c
-@@ -1920,7 +1920,16 @@ void do_IRQ(struct cpu_user_regs *regs)
-                 kind = "";
-             if ( !(vector >= FIRST_LEGACY_VECTOR &&
-                    vector <= LAST_LEGACY_VECTOR &&
--                   !smp_processor_id() &&
-+                   (!smp_processor_id() ||
-+                    /*
-+                     * For AMD/Hygon do spurious PIC interrupt
-+                     * detection on all CPUs, as it has been observed
-+                     * that during unknown circumstances spurious PIC
-+                     * interrupts have been delivered to CPUs
-+                     * different than the BSP.
-+                     */
-+                    (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD |
-+                                                 X86_VENDOR_HYGON))) &&
-                    bogus_8259A_irq(vector - FIRST_LEGACY_VECTOR)) )
-             {
-                 printk("CPU%u: No irq handler for vector %02x (IRQ %d%s)\n",
--- 
-2.44.0
-
-
-From 40bfa9dd57f1efdd0f0dc974e80a438d9db90874 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 23 Nov 2023 12:13:31 +0100
-Subject: [PATCH 05/70] x86/spec-ctrl: Add SRSO whitepaper URL
-
-... now that it exists in public.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 78a86b26868c12ae1cc3dd2a8bb9aa5eebaa41fd
-master date: 2023-11-07 17:47:34 +0000
----
- xen/arch/x86/spec_ctrl.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 6fd7d44ce4..a8d8af22f6 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -903,6 +903,9 @@ static bool __init should_use_eager_fpu(void)
-     }
- }
- 
-+/*
-+ * https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf
-+ */
- static void __init srso_calculations(bool hw_smt_enabled)
- {
-     if ( !(boot_cpu_data.x86_vendor &
--- 
-2.44.0
-
-
-From 3f9390fea5c51a6d64596d295902d28931eeca4c Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Thu, 23 Nov 2023 12:13:53 +0100
-Subject: [PATCH 06/70] xen/sched: fix sched_move_domain()
-
-When moving a domain out of a cpupool running with the credit2
-scheduler and having multiple run-queues, the following ASSERT() can
-be observed:
-
-(XEN) Xen call trace:
-(XEN)    [<ffff82d04023a700>] R credit2.c#csched2_unit_remove+0xe3/0xe7
-(XEN)    [<ffff82d040246adb>] S sched_move_domain+0x2f3/0x5b1
-(XEN)    [<ffff82d040234cf7>] S cpupool.c#cpupool_move_domain_locked+0x1d/0x3b
-(XEN)    [<ffff82d040236025>] S cpupool_move_domain+0x24/0x35
-(XEN)    [<ffff82d040206513>] S domain_kill+0xa5/0x116
-(XEN)    [<ffff82d040232b12>] S do_domctl+0xe5f/0x1951
-(XEN)    [<ffff82d0402276ba>] S timer.c#timer_lock+0x69/0x143
-(XEN)    [<ffff82d0402dc71b>] S pv_hypercall+0x44e/0x4a9
-(XEN)    [<ffff82d0402012b7>] S lstar_enter+0x137/0x140
-(XEN)
-(XEN)
-(XEN) ****************************************
-(XEN) Panic on CPU 1:
-(XEN) Assertion 'svc->rqd == c2rqd(sched_unit_master(unit))' failed at common/sched/credit2.c:1159
-(XEN) ****************************************
-
-This is happening as sched_move_domain() is setting a different cpu
-for a scheduling unit without telling the scheduler. When this unit is
-removed from the scheduler, the ASSERT() will trigger.
-
-In non-debug builds the result is usually a clobbered pointer, leading
-to another crash a short time later.
-
-Fix that by swapping the two involved actions (setting another cpu and
-removing the unit from the scheduler).
-
-Link: https://github.com/Dasharo/dasharo-issues/issues/488
-Fixes: 70fadc41635b ("xen/cpupool: support moving domain between cpupools with different granularity")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@cloud.com>
-master commit: 4709ec82917668c2df958ef91b4f21c049c76bee
-master date: 2023-11-20 10:49:29 +0100
----
- xen/common/sched/core.c | 12 +++++++-----
- 1 file changed, 7 insertions(+), 5 deletions(-)
-
-diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
-index 12deefa745..eba0cea4bb 100644
---- a/xen/common/sched/core.c
-+++ b/xen/common/sched/core.c
-@@ -732,18 +732,20 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
-     old_domdata = d->sched_priv;
- 
-     /*
--     * Temporarily move all units to same processor to make locking
--     * easier when moving the new units to the new processors.
-+     * Remove all units from the old scheduler, and temporarily move them to
-+     * the same processor to make locking easier when moving the new units to
-+     * new processors.
-      */
-     new_p = cpumask_first(d->cpupool->cpu_valid);
-     for_each_sched_unit ( d, unit )
-     {
--        spinlock_t *lock = unit_schedule_lock_irq(unit);
-+        spinlock_t *lock;
-+
-+        sched_remove_unit(old_ops, unit);
- 
-+        lock = unit_schedule_lock_irq(unit);
-         sched_set_res(unit, get_sched_res(new_p));
-         spin_unlock_irq(lock);
--
--        sched_remove_unit(old_ops, unit);
-     }
- 
-     old_units = d->sched_unit_list;
--- 
-2.44.0
-
-
-From 90a6d821757edf1202c527143b8a05b0d2a3dfaa Mon Sep 17 00:00:00 2001
-From: Frediano Ziglio <frediano.ziglio@cloud.com>
-Date: Wed, 6 Dec 2023 10:37:13 +0100
-Subject: [PATCH 07/70] x86/mem_sharing: Release domain if we are not able to
- enable memory sharing
-
-In case it's not possible to enable memory sharing (mem_sharing_control
-fails) we just return the error code without releasing the domain
-acquired some lines above by rcu_lock_live_remote_domain_by_id().
-
-Fixes: 72f8d45d69b8 ("x86/mem_sharing: enable mem_sharing on first memop")
-Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Tamas K Lengyel <tamas@tklengyel.com>
-master commit: fbcec32d6d3ea0ac329301925b317478316209ed
-master date: 2023-11-27 12:06:13 +0000
----
- xen/arch/x86/mm/mem_sharing.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
-index 142258f16a..429d27ef85 100644
---- a/xen/arch/x86/mm/mem_sharing.c
-+++ b/xen/arch/x86/mm/mem_sharing.c
-@@ -2013,7 +2013,7 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
- 
-     if ( !mem_sharing_enabled(d) &&
-          (rc = mem_sharing_control(d, true, 0)) )
--        return rc;
-+        goto out;
- 
-     switch ( mso.op )
-     {
--- 
-2.44.0
-
-
-From 480168fcb3135f0da6e7a6b3b754c78fabc24d4f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Wed, 6 Dec 2023 10:38:03 +0100
-Subject: [PATCH 08/70] livepatch: do not use .livepatch.funcs section to store
- internal state
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Currently the livepatch logic inside of Xen will use fields of struct
-livepatch_func in order to cache internal state of patched functions.  Note
-this is a field that is part of the payload, and is loaded as an ELF section
-(.livepatch.funcs), taking into account the SHF_* flags in the section
-header.
-
-The flags for the .livepatch.funcs section, as set by livepatch-build-tools,
-are SHF_ALLOC, which leads to its contents (the array of livepatch_func
-structures) being placed in read-only memory:
-
-Section Headers:
-  [Nr] Name              Type             Address           Offset
-       Size              EntSize          Flags  Link  Info  Align
-[...]
-  [ 4] .livepatch.funcs  PROGBITS         0000000000000000  00000080
-       0000000000000068  0000000000000000   A       0     0     8
-
-This previously went unnoticed, as all writes to the fields of livepatch_func
-happen in the critical region that had WP disabled in CR0.  After 8676092a0f16
-however WP is no longer toggled in CR0 for patch application, and only the
-hypervisor .text mappings are made write-accessible.  That leads to the
-following page fault when attempting to apply a livepatch:
-
-----[ Xen-4.19-unstable  x86_64  debug=y  Tainted:   C    ]----
-CPU:    4
-RIP:    e008:[<ffff82d040221e81>] common/livepatch.c#apply_payload+0x45/0x1e1
-[...]
-Xen call trace:
-   [<ffff82d040221e81>] R common/livepatch.c#apply_payload+0x45/0x1e1
-   [<ffff82d0402235b2>] F check_for_livepatch_work+0x385/0xaa5
-   [<ffff82d04032508f>] F arch/x86/domain.c#idle_loop+0x92/0xee
-
-Pagetable walk from ffff82d040625079:
- L4[0x105] = 000000008c6c9063 ffffffffffffffff
- L3[0x141] = 000000008c6c6063 ffffffffffffffff
- L2[0x003] = 000000086a1e7063 ffffffffffffffff
- L1[0x025] = 800000086ca5d121 ffffffffffffffff
-
-****************************************
-Panic on CPU 4:
-FATAL PAGE FAULT
-[error_code=0003]
-Faulting linear address: ffff82d040625079
-****************************************
-
-Fix this by moving the internal Xen function patching state out of
-livepatch_func into an area not allocated as part of the ELF payload.  While
-there also constify the array of livepatch_func structures in order to prevent
-further surprises.
-
-Note there's still one field (old_addr) that gets set during livepatch load.  I
-consider this fine since the field is read-only after load, and at the point
-the field gets set the underlying mapping hasn't been made read-only yet.
-
-Fixes: 8676092a0f16 ('x86/livepatch: Fix livepatch application when CET is active')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-
-xen/livepatch: fix livepatch tests
-
-The current set of in-tree livepatch tests in xen/test/livepatch started
-failing after the constify of the payload funcs array, and the movement of the
-status data into a separate array.
-
-Fix the tests so they respect the constness of the funcs array and also make
-use of the new location of the per-func state data.
-
-Fixes: 82182ad7b46e ('livepatch: do not use .livepatch.funcs section to store internal state')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: 82182ad7b46e0f7a3856bb12c7a9bf2e2a4570bc
-master date: 2023-11-27 15:16:01 +0100
-master commit: 902377b690f42ddf44ae91c4b0751d597f1cd694
-master date: 2023-11-29 10:46:42 +0000
----
- xen/arch/arm/arm32/livepatch.c                |  9 +++--
- xen/arch/arm/arm64/livepatch.c                |  9 +++--
- xen/arch/arm/livepatch.c                      |  9 +++--
- xen/arch/x86/livepatch.c                      | 26 +++++++------
- xen/common/livepatch.c                        | 25 ++++++++----
- xen/include/public/sysctl.h                   |  5 +--
- xen/include/xen/livepatch.h                   | 38 +++++++++++++------
- xen/include/xen/livepatch_payload.h           |  3 +-
- xen/test/livepatch/xen_action_hooks.c         | 12 +++---
- xen/test/livepatch/xen_action_hooks_marker.c  | 20 ++++++----
- xen/test/livepatch/xen_action_hooks_noapply.c | 22 ++++++-----
- xen/test/livepatch/xen_action_hooks_nofunc.c  |  6 +--
- .../livepatch/xen_action_hooks_norevert.c     | 24 +++++++-----
- xen/test/livepatch/xen_prepost_hooks.c        |  8 ++--
- xen/test/livepatch/xen_prepost_hooks_fail.c   |  2 +-
- 15 files changed, 130 insertions(+), 88 deletions(-)
-
-diff --git a/xen/arch/arm/arm32/livepatch.c b/xen/arch/arm/arm32/livepatch.c
-index 3c50283b2a..80d2659b78 100644
---- a/xen/arch/arm/arm32/livepatch.c
-+++ b/xen/arch/arm/arm32/livepatch.c
-@@ -11,23 +11,24 @@
- #include <asm/page.h>
- #include <asm/livepatch.h>
- 
--void arch_livepatch_apply(struct livepatch_func *func)
-+void arch_livepatch_apply(const struct livepatch_func *func,
-+                          struct livepatch_fstate *state)
- {
-     uint32_t insn;
-     uint32_t *new_ptr;
-     unsigned int i, len;
- 
--    BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE > sizeof(func->opaque));
-+    BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE > sizeof(state->insn_buffer));
-     BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE != sizeof(insn));
- 
-     ASSERT(vmap_of_xen_text);
- 
--    len = livepatch_insn_len(func);
-+    len = livepatch_insn_len(func, state);
-     if ( !len )
-         return;
- 
-     /* Save old ones. */
--    memcpy(func->opaque, func->old_addr, len);
-+    memcpy(state->insn_buffer, func->old_addr, len);
- 
-     if ( func->new_addr )
-     {
-diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
-index 62d2ef373a..df2cebedde 100644
---- a/xen/arch/arm/arm64/livepatch.c
-+++ b/xen/arch/arm/arm64/livepatch.c
-@@ -15,23 +15,24 @@
- #include <asm/insn.h>
- #include <asm/livepatch.h>
- 
--void arch_livepatch_apply(struct livepatch_func *func)
-+void arch_livepatch_apply(const struct livepatch_func *func,
-+                          struct livepatch_fstate *state)
- {
-     uint32_t insn;
-     uint32_t *new_ptr;
-     unsigned int i, len;
- 
--    BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE > sizeof(func->opaque));
-+    BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE > sizeof(state->insn_buffer));
-     BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE != sizeof(insn));
- 
-     ASSERT(vmap_of_xen_text);
- 
--    len = livepatch_insn_len(func);
-+    len = livepatch_insn_len(func, state);
-     if ( !len )
-         return;
- 
-     /* Save old ones. */
--    memcpy(func->opaque, func->old_addr, len);
-+    memcpy(state->insn_buffer, func->old_addr, len);
- 
-     if ( func->new_addr )
-         insn = aarch64_insn_gen_branch_imm((unsigned long)func->old_addr,
-diff --git a/xen/arch/arm/livepatch.c b/xen/arch/arm/livepatch.c
-index d646379c8c..bbca1e5a5e 100644
---- a/xen/arch/arm/livepatch.c
-+++ b/xen/arch/arm/livepatch.c
-@@ -69,7 +69,7 @@ void arch_livepatch_revive(void)
- int arch_livepatch_verify_func(const struct livepatch_func *func)
- {
-     /* If NOPing only do up to maximum amount we can put in the ->opaque. */
--    if ( !func->new_addr && (func->new_size > sizeof(func->opaque) ||
-+    if ( !func->new_addr && (func->new_size > LIVEPATCH_OPAQUE_SIZE ||
-          func->new_size % ARCH_PATCH_INSN_SIZE) )
-         return -EOPNOTSUPP;
- 
-@@ -79,15 +79,16 @@ int arch_livepatch_verify_func(const struct livepatch_func *func)
-     return 0;
- }
- 
--void arch_livepatch_revert(const struct livepatch_func *func)
-+void arch_livepatch_revert(const struct livepatch_func *func,
-+                           struct livepatch_fstate *state)
- {
-     uint32_t *new_ptr;
-     unsigned int len;
- 
-     new_ptr = func->old_addr - (void *)_start + vmap_of_xen_text;
- 
--    len = livepatch_insn_len(func);
--    memcpy(new_ptr, func->opaque, len);
-+    len = livepatch_insn_len(func, state);
-+    memcpy(new_ptr, state->insn_buffer, len);
- 
-     clean_and_invalidate_dcache_va_range(new_ptr, len);
- }
-diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c
-index a54d991c5f..ee539f001b 100644
---- a/xen/arch/x86/livepatch.c
-+++ b/xen/arch/x86/livepatch.c
-@@ -95,7 +95,7 @@ int arch_livepatch_verify_func(const struct livepatch_func *func)
-     if ( !func->new_addr )
-     {
-         /* Only do up to maximum amount we can put in the ->opaque. */
--        if ( func->new_size > sizeof(func->opaque) )
-+        if ( func->new_size > LIVEPATCH_OPAQUE_SIZE )
-             return -EOPNOTSUPP;
- 
-         if ( func->old_size < func->new_size )
-@@ -123,13 +123,14 @@ int arch_livepatch_verify_func(const struct livepatch_func *func)
-  * "noinline" to cause control flow change and thus invalidate I$ and
-  * cause refetch after modification.
-  */
--void noinline arch_livepatch_apply(struct livepatch_func *func)
-+void noinline arch_livepatch_apply(const struct livepatch_func *func,
-+                                   struct livepatch_fstate *state)
- {
-     uint8_t *old_ptr;
--    uint8_t insn[sizeof(func->opaque)];
-+    uint8_t insn[sizeof(state->insn_buffer)];
-     unsigned int len;
- 
--    func->patch_offset = 0;
-+    state->patch_offset = 0;
-     old_ptr = func->old_addr;
- 
-     /*
-@@ -141,14 +142,14 @@ void noinline arch_livepatch_apply(struct livepatch_func *func)
-      * ENDBR64 or similar instructions).
-      */
-     if ( is_endbr64(old_ptr) || is_endbr64_poison(func->old_addr) )
--        func->patch_offset += ENDBR64_LEN;
-+        state->patch_offset += ENDBR64_LEN;
- 
-     /* This call must be done with ->patch_offset already set. */
--    len = livepatch_insn_len(func);
-+    len = livepatch_insn_len(func, state);
-     if ( !len )
-         return;
- 
--    memcpy(func->opaque, old_ptr + func->patch_offset, len);
-+    memcpy(state->insn_buffer, old_ptr + state->patch_offset, len);
-     if ( func->new_addr )
-     {
-         int32_t val;
-@@ -156,7 +157,7 @@ void noinline arch_livepatch_apply(struct livepatch_func *func)
-         BUILD_BUG_ON(ARCH_PATCH_INSN_SIZE != (1 + sizeof(val)));
- 
-         insn[0] = 0xe9; /* Relative jump. */
--        val = func->new_addr - (func->old_addr + func->patch_offset +
-+        val = func->new_addr - (func->old_addr + state->patch_offset +
-                                 ARCH_PATCH_INSN_SIZE);
- 
-         memcpy(&insn[1], &val, sizeof(val));
-@@ -164,17 +165,18 @@ void noinline arch_livepatch_apply(struct livepatch_func *func)
-     else
-         add_nops(insn, len);
- 
--    memcpy(old_ptr + func->patch_offset, insn, len);
-+    memcpy(old_ptr + state->patch_offset, insn, len);
- }
- 
- /*
-  * "noinline" to cause control flow change and thus invalidate I$ and
-  * cause refetch after modification.
-  */
--void noinline arch_livepatch_revert(const struct livepatch_func *func)
-+void noinline arch_livepatch_revert(const struct livepatch_func *func,
-+                                    struct livepatch_fstate *state)
- {
--    memcpy(func->old_addr + func->patch_offset, func->opaque,
--           livepatch_insn_len(func));
-+    memcpy(func->old_addr + state->patch_offset, state->insn_buffer,
-+           livepatch_insn_len(func, state));
- }
- 
- /*
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index d89a904bd4..e635606c10 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -260,6 +260,9 @@ static void free_payload_data(struct payload *payload)
-     vfree((void *)payload->text_addr);
- 
-     payload->pages = 0;
-+
-+    /* fstate gets allocated strictly after move_payload. */
-+    XFREE(payload->fstate);
- }
- 
- /*
-@@ -656,6 +659,7 @@ static int prepare_payload(struct payload *payload,
- {
-     const struct livepatch_elf_sec *sec;
-     unsigned int i;
-+    struct livepatch_func *funcs;
-     struct livepatch_func *f;
-     struct virtual_region *region;
-     const Elf_Note *n;
-@@ -666,14 +670,19 @@ static int prepare_payload(struct payload *payload,
-         if ( !section_ok(elf, sec, sizeof(*payload->funcs)) )
-             return -EINVAL;
- 
--        payload->funcs = sec->load_addr;
-+        payload->funcs = funcs = sec->load_addr;
-         payload->nfuncs = sec->sec->sh_size / sizeof(*payload->funcs);
- 
-+        payload->fstate = xzalloc_array(typeof(*payload->fstate),
-+                                        payload->nfuncs);
-+        if ( !payload->fstate )
-+            return -ENOMEM;
-+
-         for ( i = 0; i < payload->nfuncs; i++ )
-         {
-             int rc;
- 
--            f = &(payload->funcs[i]);
-+            f = &(funcs[i]);
- 
-             if ( f->version != LIVEPATCH_PAYLOAD_VERSION )
-             {
-@@ -1361,7 +1370,7 @@ static int apply_payload(struct payload *data)
-     ASSERT(!local_irq_is_enabled());
- 
-     for ( i = 0; i < data->nfuncs; i++ )
--        common_livepatch_apply(&data->funcs[i]);
-+        common_livepatch_apply(&data->funcs[i], &data->fstate[i]);
- 
-     arch_livepatch_revive();
- 
-@@ -1397,7 +1406,7 @@ static int revert_payload(struct payload *data)
-     }
- 
-     for ( i = 0; i < data->nfuncs; i++ )
--        common_livepatch_revert(&data->funcs[i]);
-+        common_livepatch_revert(&data->funcs[i], &data->fstate[i]);
- 
-     /*
-      * Since we are running with IRQs disabled and the hooks may call common
-@@ -1438,9 +1447,10 @@ static inline bool was_action_consistent(const struct payload *data, livepatch_f
- 
-     for ( i = 0; i < data->nfuncs; i++ )
-     {
--        struct livepatch_func *f = &(data->funcs[i]);
-+        const struct livepatch_func *f = &(data->funcs[i]);
-+        const struct livepatch_fstate *s = &(data->fstate[i]);
- 
--        if ( f->applied != expected_state )
-+        if ( s->applied != expected_state )
-         {
-             printk(XENLOG_ERR LIVEPATCH "%s: Payload has a function: '%s' with inconsistent applied state.\n",
-                    data->name, f->name ?: "noname");
-@@ -2157,7 +2167,8 @@ static void cf_check livepatch_printall(unsigned char key)
- 
-         for ( i = 0; i < data->nfuncs; i++ )
-         {
--            struct livepatch_func *f = &(data->funcs[i]);
-+            const struct livepatch_func *f = &(data->funcs[i]);
-+
-             printk("    %s patch %p(%u) with %p (%u)\n",
-                    f->name, f->old_addr, f->old_size, f->new_addr, f->new_size);
- 
-diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
-index f1eba78405..9b19679cae 100644
---- a/xen/include/public/sysctl.h
-+++ b/xen/include/public/sysctl.h
-@@ -991,10 +991,7 @@ struct livepatch_func {
-     uint32_t new_size;
-     uint32_t old_size;
-     uint8_t version;        /* MUST be LIVEPATCH_PAYLOAD_VERSION. */
--    uint8_t opaque[LIVEPATCH_OPAQUE_SIZE];
--    uint8_t applied;
--    uint8_t patch_offset;
--    uint8_t _pad[6];
-+    uint8_t _pad[39];
-     livepatch_expectation_t expect;
- };
- typedef struct livepatch_func livepatch_func_t;
-diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
-index 9fdb29c382..537d3d58b6 100644
---- a/xen/include/xen/livepatch.h
-+++ b/xen/include/xen/livepatch.h
-@@ -13,6 +13,9 @@ struct xen_sysctl_livepatch_op;
- 
- #include <xen/elfstructs.h>
- #include <xen/errno.h> /* For -ENOSYS or -EOVERFLOW */
-+
-+#include <public/sysctl.h> /* For LIVEPATCH_OPAQUE_SIZE */
-+
- #ifdef CONFIG_LIVEPATCH
- 
- /*
-@@ -51,6 +54,12 @@ struct livepatch_symbol {
-     bool_t new_symbol;
- };
- 
-+struct livepatch_fstate {
-+    unsigned int patch_offset;
-+    enum livepatch_func_state applied;
-+    uint8_t insn_buffer[LIVEPATCH_OPAQUE_SIZE];
-+};
-+
- int livepatch_op(struct xen_sysctl_livepatch_op *);
- void check_for_livepatch_work(void);
- unsigned long livepatch_symbols_lookup_by_name(const char *symname);
-@@ -87,10 +96,11 @@ void arch_livepatch_init(void);
- int arch_livepatch_verify_func(const struct livepatch_func *func);
- 
- static inline
--unsigned int livepatch_insn_len(const struct livepatch_func *func)
-+unsigned int livepatch_insn_len(const struct livepatch_func *func,
-+                                const struct livepatch_fstate *state)
- {
-     if ( !func->new_addr )
--        return func->new_size - func->patch_offset;
-+        return func->new_size - state->patch_offset;
- 
-     return ARCH_PATCH_INSN_SIZE;
- }
-@@ -117,39 +127,43 @@ int arch_livepatch_safety_check(void);
- int arch_livepatch_quiesce(void);
- void arch_livepatch_revive(void);
- 
--void arch_livepatch_apply(struct livepatch_func *func);
--void arch_livepatch_revert(const struct livepatch_func *func);
-+void arch_livepatch_apply(const struct livepatch_func *func,
-+                          struct livepatch_fstate *state);
-+void arch_livepatch_revert(const struct livepatch_func *func,
-+                           struct livepatch_fstate *state);
- void arch_livepatch_post_action(void);
- 
- void arch_livepatch_mask(void);
- void arch_livepatch_unmask(void);
- 
--static inline void common_livepatch_apply(struct livepatch_func *func)
-+static inline void common_livepatch_apply(const struct livepatch_func *func,
-+                                          struct livepatch_fstate *state)
- {
-     /* If the action has been already executed on this function, do nothing. */
--    if ( func->applied == LIVEPATCH_FUNC_APPLIED )
-+    if ( state->applied == LIVEPATCH_FUNC_APPLIED )
-     {
-         printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n",
-                 __func__, func->name);
-         return;
-     }
- 
--    arch_livepatch_apply(func);
--    func->applied = LIVEPATCH_FUNC_APPLIED;
-+    arch_livepatch_apply(func, state);
-+    state->applied = LIVEPATCH_FUNC_APPLIED;
- }
- 
--static inline void common_livepatch_revert(struct livepatch_func *func)
-+static inline void common_livepatch_revert(const struct livepatch_func *func,
-+                                           struct livepatch_fstate *state)
- {
-     /* If the apply action hasn't been executed on this function, do nothing. */
--    if ( !func->old_addr || func->applied == LIVEPATCH_FUNC_NOT_APPLIED )
-+    if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
-     {
-         printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n",
-                 __func__, func->name);
-         return;
-     }
- 
--    arch_livepatch_revert(func);
--    func->applied = LIVEPATCH_FUNC_NOT_APPLIED;
-+    arch_livepatch_revert(func, state);
-+    state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
- }
- #else
- 
-diff --git a/xen/include/xen/livepatch_payload.h b/xen/include/xen/livepatch_payload.h
-index 9f5f064205..b9cd4f2096 100644
---- a/xen/include/xen/livepatch_payload.h
-+++ b/xen/include/xen/livepatch_payload.h
-@@ -52,7 +52,8 @@ struct payload {
-     size_t ro_size;                      /* .. and its size (if any). */
-     unsigned int pages;                  /* Total pages for [text,rw,ro]_addr */
-     struct list_head applied_list;       /* Linked to 'applied_list'. */
--    struct livepatch_func *funcs;        /* The array of functions to patch. */
-+    const struct livepatch_func *funcs;  /* The array of functions to patch. */
-+    struct livepatch_fstate *fstate;     /* State of patched functions. */
-     unsigned int nfuncs;                 /* Nr of functions to patch. */
-     const struct livepatch_symbol *symtab; /* All symbols. */
-     const char *strtab;                  /* Pointer to .strtab. */
-diff --git a/xen/test/livepatch/xen_action_hooks.c b/xen/test/livepatch/xen_action_hooks.c
-index 39b5313027..fa0b3ab35f 100644
---- a/xen/test/livepatch/xen_action_hooks.c
-+++ b/xen/test/livepatch/xen_action_hooks.c
-@@ -26,9 +26,10 @@ static int apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        func->applied = LIVEPATCH_FUNC_APPLIED;
-+        fstate->applied = LIVEPATCH_FUNC_APPLIED;
-         apply_cnt++;
- 
-         printk(KERN_DEBUG "%s: applying: %s\n", __func__, func->name);
-@@ -47,9 +48,10 @@ static int revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        func->applied = LIVEPATCH_FUNC_NOT_APPLIED;
-+        fstate->applied = LIVEPATCH_FUNC_NOT_APPLIED;
-         revert_cnt++;
- 
-         printk(KERN_DEBUG "%s: reverting: %s\n", __func__, func->name);
-@@ -68,7 +70,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         printk(KERN_DEBUG "%s: reverted: %s\n", __func__, func->name);
-     }
-diff --git a/xen/test/livepatch/xen_action_hooks_marker.c b/xen/test/livepatch/xen_action_hooks_marker.c
-index 4f807a577f..d2e22f70d1 100644
---- a/xen/test/livepatch/xen_action_hooks_marker.c
-+++ b/xen/test/livepatch/xen_action_hooks_marker.c
-@@ -23,9 +23,10 @@ static int pre_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre applied: %s\n", __func__, func->name);
-     }
- 
-@@ -42,9 +43,10 @@ static void post_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied != LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: post applied: %s\n", __func__, func->name);
-     }
- 
-@@ -59,9 +61,10 @@ static int pre_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied != LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre reverted: %s\n", __func__, func->name);
-     }
- 
-@@ -78,9 +81,10 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
-     }
- 
-diff --git a/xen/test/livepatch/xen_action_hooks_noapply.c b/xen/test/livepatch/xen_action_hooks_noapply.c
-index 4c55c156a6..646a5fd2f0 100644
---- a/xen/test/livepatch/xen_action_hooks_noapply.c
-+++ b/xen/test/livepatch/xen_action_hooks_noapply.c
-@@ -25,9 +25,10 @@ static int pre_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre applied: %s\n", __func__, func->name);
-     }
- 
-@@ -44,7 +45,7 @@ static int apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         apply_cnt++;
-         printk(KERN_DEBUG "%s: applying: %s\n", __func__, func->name);
-@@ -63,10 +64,11 @@ static void post_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
-         BUG_ON(apply_cnt != 1);
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: post applied: %s\n", __func__, func->name);
-     }
- 
-@@ -81,9 +83,10 @@ static int pre_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre reverted: %s\n", __func__, func->name);
-     }
- 
-@@ -100,9 +103,10 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
-     }
- 
-diff --git a/xen/test/livepatch/xen_action_hooks_nofunc.c b/xen/test/livepatch/xen_action_hooks_nofunc.c
-index 2b4e90436f..077c4c1738 100644
---- a/xen/test/livepatch/xen_action_hooks_nofunc.c
-+++ b/xen/test/livepatch/xen_action_hooks_nofunc.c
-@@ -23,7 +23,7 @@ static int apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         apply_cnt++;
-         printk(KERN_DEBUG "%s: applying: %s\n", __func__, func->name);
-@@ -42,7 +42,7 @@ static int revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         revert_cnt++;
-         printk(KERN_DEBUG "%s: reverting: %s\n", __func__, func->name);
-@@ -61,7 +61,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         printk(KERN_DEBUG "%s: reverted: %s\n", __func__, func->name);
-     }
-diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
-index ef77e72071..3e21ade6ab 100644
---- a/xen/test/livepatch/xen_action_hooks_norevert.c
-+++ b/xen/test/livepatch/xen_action_hooks_norevert.c
-@@ -25,9 +25,10 @@ static int pre_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre applied: %s\n", __func__, func->name);
-     }
- 
-@@ -44,9 +45,10 @@ static void post_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied != LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: post applied: %s\n", __func__, func->name);
-     }
- 
-@@ -61,9 +63,10 @@ static int pre_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
--        BUG_ON(func->applied != LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
-         printk(KERN_DEBUG "%s: pre reverted: %s\n", __func__, func->name);
-     }
- 
-@@ -80,7 +83,7 @@ static int revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         revert_cnt++;
-         printk(KERN_DEBUG "%s: reverting: %s\n", __func__, func->name);
-@@ -99,16 +102,17 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
-+        struct livepatch_fstate *fstate = &payload->fstate[i];
- 
-         BUG_ON(revert_cnt != 1);
--        BUG_ON(func->applied != LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
- 
-         /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */
-         arch_livepatch_quiesce();
-         common_livepatch_revert(payload);
-         arch_livepatch_revive();
--        BUG_ON(func->applied == LIVEPATCH_FUNC_APPLIED);
-+        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
- 
-         printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
-     }
-diff --git a/xen/test/livepatch/xen_prepost_hooks.c b/xen/test/livepatch/xen_prepost_hooks.c
-index 889377d6eb..17f5af6a19 100644
---- a/xen/test/livepatch/xen_prepost_hooks.c
-+++ b/xen/test/livepatch/xen_prepost_hooks.c
-@@ -30,7 +30,7 @@ static int pre_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         pre_apply_cnt++;
-         printk(KERN_DEBUG "%s: applying: %s\n", __func__, func->name);
-@@ -49,7 +49,7 @@ static void post_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         post_apply_cnt++;
-         printk(KERN_DEBUG "%s: applied: %s\n", __func__, func->name);
-@@ -66,7 +66,7 @@ static int pre_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         pre_revert_cnt++;
-         printk(KERN_DEBUG "%s: reverting: %s\n", __func__, func->name);
-@@ -86,7 +86,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         post_revert_cnt++;
-         printk(KERN_DEBUG "%s: reverted: %s\n", __func__, func->name);
-diff --git a/xen/test/livepatch/xen_prepost_hooks_fail.c b/xen/test/livepatch/xen_prepost_hooks_fail.c
-index c6feb5d32d..52fd7f642e 100644
---- a/xen/test/livepatch/xen_prepost_hooks_fail.c
-+++ b/xen/test/livepatch/xen_prepost_hooks_fail.c
-@@ -24,7 +24,7 @@ static int pre_apply_hook(livepatch_payload_t *payload)
- 
-     for (i = 0; i < payload->nfuncs; i++)
-     {
--        struct livepatch_func *func = &payload->funcs[i];
-+        const struct livepatch_func *func = &payload->funcs[i];
- 
-         printk(KERN_DEBUG "%s: pre applying: %s\n", __func__, func->name);
-     }
--- 
-2.44.0
-
-
-From 61d032e322b178a49983359b0dfd64a42c1f5fca Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Wed, 6 Dec 2023 10:39:15 +0100
-Subject: [PATCH 09/70] xen/x86: In x2APIC mode, derive LDR from APIC ID
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Both Intel and AMD manuals agree that in x2APIC mode, the APIC LDR and ID
-registers are derivable from each other through a fixed formula.
-
-Xen uses that formula, but applies it to vCPU IDs (which are sequential)
-rather than x2APIC IDs (which are not, at the moment). As I understand it,
-this is an attempt to tightly pack vCPUs into clusters so each cluster has
-16 vCPUs rather than 8, but this is a spec violation.
-
-This patch fixes the implementation so we follow the x2APIC spec for new
-VMs, while preserving the behaviour (buggy or fixed) for migrated-in VMs.
-
-While touching that area, remove the existing printk statement in
-vlapic_load_fixup() (as the checks it performed didn't make sense in x2APIC
-mode and wouldn't affect the outcome) and put another printk as an else
-branch so we get warnings trying to load nonsensical LDR values we don't
-know about.
-
-Fixes: f9e0cccf7b35 ("x86/HVM: fix ID handling of x2APIC emulation")
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 90309854fd2440fb08b4c808f47d7670ba0d250d
-master date: 2023-11-29 10:05:55 +0100
----
- xen/arch/x86/hvm/vlapic.c             | 64 +++++++++++++++++++--------
- xen/arch/x86/include/asm/hvm/domain.h |  3 ++
- 2 files changed, 48 insertions(+), 19 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
-index c7ce82d064..ba569043ea 100644
---- a/xen/arch/x86/hvm/vlapic.c
-+++ b/xen/arch/x86/hvm/vlapic.c
-@@ -1061,13 +1061,26 @@ static const struct hvm_mmio_ops vlapic_mmio_ops = {
-     .write = vlapic_mmio_write,
- };
- 
-+static uint32_t x2apic_ldr_from_id(uint32_t id)
-+{
-+    return ((id & ~0xf) << 12) | (1 << (id & 0xf));
-+}
-+
- static void set_x2apic_id(struct vlapic *vlapic)
- {
--    u32 id = vlapic_vcpu(vlapic)->vcpu_id;
--    u32 ldr = ((id & ~0xf) << 12) | (1 << (id & 0xf));
-+    const struct vcpu *v = vlapic_vcpu(vlapic);
-+    uint32_t apic_id = v->vcpu_id * 2;
-+    uint32_t apic_ldr = x2apic_ldr_from_id(apic_id);
- 
--    vlapic_set_reg(vlapic, APIC_ID, id * 2);
--    vlapic_set_reg(vlapic, APIC_LDR, ldr);
-+    /*
-+     * Workaround for migrated domains to derive LDRs as the source host
-+     * would've.
-+     */
-+    if ( v->domain->arch.hvm.bug_x2apic_ldr_vcpu_id )
-+        apic_ldr = x2apic_ldr_from_id(v->vcpu_id);
-+
-+    vlapic_set_reg(vlapic, APIC_ID, apic_id);
-+    vlapic_set_reg(vlapic, APIC_LDR, apic_ldr);
- }
- 
- int guest_wrmsr_apic_base(struct vcpu *v, uint64_t val)
-@@ -1498,27 +1511,40 @@ static int cf_check lapic_save_regs(struct vcpu *v, hvm_domain_context_t *h)
-  */
- static void lapic_load_fixup(struct vlapic *vlapic)
- {
--    uint32_t id = vlapic->loaded.id;
-+    const struct vcpu *v = vlapic_vcpu(vlapic);
-+    uint32_t good_ldr = x2apic_ldr_from_id(vlapic->loaded.id);
- 
--    if ( vlapic_x2apic_mode(vlapic) && id && vlapic->loaded.ldr == 1 )
-+    /* Skip fixups on xAPIC mode, or if the x2APIC LDR is already correct */
-+    if ( !vlapic_x2apic_mode(vlapic) ||
-+         (vlapic->loaded.ldr == good_ldr) )
-+        return;
-+
-+    if ( vlapic->loaded.ldr == 1 )
-     {
--        /*
--         * This is optional: ID != 0 contradicts LDR == 1. It's being added
--         * to aid in eventual debugging of issues arising from the fixup done
--         * here, but can be dropped as soon as it is found to conflict with
--         * other (future) changes.
--         */
--        if ( GET_xAPIC_ID(id) != vlapic_vcpu(vlapic)->vcpu_id * 2 ||
--             id != SET_xAPIC_ID(GET_xAPIC_ID(id)) )
--            printk(XENLOG_G_WARNING "%pv: bogus APIC ID %#x loaded\n",
--                   vlapic_vcpu(vlapic), id);
-+       /*
-+        * Xen <= 4.4 may have a bug by which all the APICs configured in
-+        * x2APIC mode got LDR = 1, which is inconsistent on every vCPU
-+        * except for the one with ID = 0. We'll fix the bug now and assign
-+        * an LDR value consistent with the APIC ID.
-+        */
-         set_x2apic_id(vlapic);
-     }
--    else /* Undo an eventual earlier fixup. */
-+    else if ( vlapic->loaded.ldr == x2apic_ldr_from_id(v->vcpu_id) )
-     {
--        vlapic_set_reg(vlapic, APIC_ID, id);
--        vlapic_set_reg(vlapic, APIC_LDR, vlapic->loaded.ldr);
-+        /*
-+         * Migrations from Xen 4.4 to date (4.19 dev window, Nov 2023) may
-+         * have LDR drived from the vCPU ID, not the APIC ID. We must preserve
-+         * LDRs so new vCPUs use consistent derivations and existing guests,
-+         * which may have already read the LDR at the source host, aren't
-+         * surprised when interrupts stop working the way they did at the
-+         * other end.
-+         */
-+        v->domain->arch.hvm.bug_x2apic_ldr_vcpu_id = true;
-     }
-+    else
-+        printk(XENLOG_G_WARNING
-+               "%pv: bogus x2APIC record: ID %#x, LDR %#x, expected LDR %#x\n",
-+               v, vlapic->loaded.id, vlapic->loaded.ldr, good_ldr);
- }
- 
- static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
-diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h
-index 6e53ce4449..dd9d837e84 100644
---- a/xen/arch/x86/include/asm/hvm/domain.h
-+++ b/xen/arch/x86/include/asm/hvm/domain.h
-@@ -106,6 +106,9 @@ struct hvm_domain {
- 
-     bool                   is_s3_suspended;
- 
-+    /* Compatibility setting for a bug in x2APIC LDR */
-+    bool bug_x2apic_ldr_vcpu_id;
-+
-     /* hypervisor intercepted msix table */
-     struct list_head       msixtbl_list;
- 
--- 
-2.44.0
-
-
-From 3af9d1cbb602a9dcbab2e43fab74a881c2e05d81 Mon Sep 17 00:00:00 2001
-From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Date: Wed, 6 Dec 2023 10:39:55 +0100
-Subject: [PATCH 10/70] tools/xg: Fix potential memory leak in cpu policy
- getters/setters
-
-They allocate two different hypercall buffers, but leak the first
-allocation if the second one failed due to an early return that bypasses
-cleanup.
-
-Remove the early exit and go through _post() instead. Invoking _post() is
-benign even if _pre() failed.
-
-Fixes: 6b85e427098c ('x86/sysctl: Implement XEN_SYSCTL_get_cpu_policy')
-Fixes: 60529dfeca14 ('x86/domctl: Implement XEN_DOMCTL_get_cpu_policy')
-Fixes: 14ba07e6f816 ('x86/domctl: Implement XEN_DOMCTL_set_cpumsr_policy')
-Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 1571ff7a987b88b20598a6d49910457f3b2c59f1
-master date: 2023-12-01 10:53:07 +0100
----
- tools/libs/guest/xg_cpuid_x86.c | 86 +++++++++++++++------------------
- 1 file changed, 39 insertions(+), 47 deletions(-)
-
-diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c
-index f2b1e80901..3a74bb2b37 100644
---- a/tools/libs/guest/xg_cpuid_x86.c
-+++ b/tools/libs/guest/xg_cpuid_x86.c
-@@ -136,20 +136,20 @@ static int get_system_cpu_policy(xc_interface *xch, uint32_t index,
-     DECLARE_HYPERCALL_BOUNCE(msrs,
-                              *nr_msrs * sizeof(*msrs),
-                              XC_HYPERCALL_BUFFER_BOUNCE_OUT);
--    int ret;
--
--    if ( xc_hypercall_bounce_pre(xch, leaves) ||
--         xc_hypercall_bounce_pre(xch, msrs) )
--        return -1;
-+    int ret = -1;
- 
--    sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
--    sysctl.u.cpu_policy.index = index;
--    sysctl.u.cpu_policy.nr_leaves = *nr_leaves;
--    set_xen_guest_handle(sysctl.u.cpu_policy.leaves, leaves);
--    sysctl.u.cpu_policy.nr_msrs = *nr_msrs;
--    set_xen_guest_handle(sysctl.u.cpu_policy.msrs, msrs);
--
--    ret = do_sysctl(xch, &sysctl);
-+    if ( !xc_hypercall_bounce_pre(xch, leaves) &&
-+         !xc_hypercall_bounce_pre(xch, msrs) )
-+    {
-+        sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
-+        sysctl.u.cpu_policy.index = index;
-+        sysctl.u.cpu_policy.nr_leaves = *nr_leaves;
-+        set_xen_guest_handle(sysctl.u.cpu_policy.leaves, leaves);
-+        sysctl.u.cpu_policy.nr_msrs = *nr_msrs;
-+        set_xen_guest_handle(sysctl.u.cpu_policy.msrs, msrs);
-+
-+        ret = do_sysctl(xch, &sysctl);
-+    }
- 
-     xc_hypercall_bounce_post(xch, leaves);
-     xc_hypercall_bounce_post(xch, msrs);
-@@ -174,20 +174,20 @@ static int get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
-     DECLARE_HYPERCALL_BOUNCE(msrs,
-                              *nr_msrs * sizeof(*msrs),
-                              XC_HYPERCALL_BUFFER_BOUNCE_OUT);
--    int ret;
--
--    if ( xc_hypercall_bounce_pre(xch, leaves) ||
--         xc_hypercall_bounce_pre(xch, msrs) )
--        return -1;
--
--    domctl.cmd = XEN_DOMCTL_get_cpu_policy;
--    domctl.domain = domid;
--    domctl.u.cpu_policy.nr_leaves = *nr_leaves;
--    set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves);
--    domctl.u.cpu_policy.nr_msrs = *nr_msrs;
--    set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs);
-+    int ret = -1;
- 
--    ret = do_domctl(xch, &domctl);
-+    if ( !xc_hypercall_bounce_pre(xch, leaves) &&
-+         !xc_hypercall_bounce_pre(xch, msrs) )
-+    {
-+        domctl.cmd = XEN_DOMCTL_get_cpu_policy;
-+        domctl.domain = domid;
-+        domctl.u.cpu_policy.nr_leaves = *nr_leaves;
-+        set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves);
-+        domctl.u.cpu_policy.nr_msrs = *nr_msrs;
-+        set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs);
-+
-+        ret = do_domctl(xch, &domctl);
-+    }
- 
-     xc_hypercall_bounce_post(xch, leaves);
-     xc_hypercall_bounce_post(xch, msrs);
-@@ -214,32 +214,24 @@ int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
-     DECLARE_HYPERCALL_BOUNCE(msrs,
-                              nr_msrs * sizeof(*msrs),
-                              XC_HYPERCALL_BUFFER_BOUNCE_IN);
--    int ret;
--
--    if ( err_leaf_p )
--        *err_leaf_p = -1;
--    if ( err_subleaf_p )
--        *err_subleaf_p = -1;
--    if ( err_msr_p )
--        *err_msr_p = -1;
-+    int ret = -1;
- 
--    if ( xc_hypercall_bounce_pre(xch, leaves) )
--        return -1;
--
--    if ( xc_hypercall_bounce_pre(xch, msrs) )
--        return -1;
--
--    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
--    domctl.domain = domid;
--    domctl.u.cpu_policy.nr_leaves = nr_leaves;
--    set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves);
--    domctl.u.cpu_policy.nr_msrs = nr_msrs;
--    set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs);
-     domctl.u.cpu_policy.err_leaf = -1;
-     domctl.u.cpu_policy.err_subleaf = -1;
-     domctl.u.cpu_policy.err_msr = -1;
- 
--    ret = do_domctl(xch, &domctl);
-+    if ( !xc_hypercall_bounce_pre(xch, leaves) &&
-+         !xc_hypercall_bounce_pre(xch, msrs) )
-+    {
-+        domctl.cmd = XEN_DOMCTL_set_cpu_policy;
-+        domctl.domain = domid;
-+        domctl.u.cpu_policy.nr_leaves = nr_leaves;
-+        set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves);
-+        domctl.u.cpu_policy.nr_msrs = nr_msrs;
-+        set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs);
-+
-+        ret = do_domctl(xch, &domctl);
-+    }
- 
-     xc_hypercall_bounce_post(xch, leaves);
-     xc_hypercall_bounce_post(xch, msrs);
--- 
-2.44.0
-
-
-From 18f900b77b3a85acadc2fe152ea354a02569acab Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Wed, 6 Dec 2023 10:40:19 +0100
-Subject: [PATCH 11/70] x86emul: avoid triggering event related assertions
-
-The assertion at the end of x86_emulate_wrapper() as well as the ones
-in x86_emul_{hw_exception,pagefault}() can trigger if we ignore
-X86EMUL_EXCEPTION coming back from certain hook functions. Squash
-exceptions when merely probing MSRs, plus on SWAPGS'es "best effort"
-error handling path.
-
-In adjust_bnd() add another assertion after the read_xcr(0, ...)
-invocation, paralleling the one in x86emul_get_fpu() - XCR0 reads should
-never fault when XSAVE is (implicitly) known to be available.
-
-Also update the respective comment in x86_emulate_wrapper().
-
-Fixes: 14a6be89ec04 ("x86emul: correct EFLAGS.TF handling")
-Fixes: cb2626c75813 ("x86emul: conditionally clear BNDn for branches")
-Fixes: 6eb43fcf8a0b ("x86emul: support SWAPGS")
-Reported-by: AFL
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 787d11c5aaf4d3411d4658cff137cd49b0bd951b
-master date: 2023-12-05 09:57:05 +0100
----
- xen/arch/x86/x86_emulate/0f01.c        |  6 ++++--
- xen/arch/x86/x86_emulate/0fae.c        |  3 +++
- xen/arch/x86/x86_emulate/x86_emulate.c | 28 +++++++++++++++++++++-----
- 3 files changed, 30 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/x86/x86_emulate/0f01.c b/xen/arch/x86/x86_emulate/0f01.c
-index ba43fc394b..1ba99609d6 100644
---- a/xen/arch/x86/x86_emulate/0f01.c
-+++ b/xen/arch/x86/x86_emulate/0f01.c
-@@ -200,8 +200,10 @@ int x86emul_0f01(struct x86_emulate_state *s,
-         if ( (rc = ops->write_segment(x86_seg_gs, &sreg,
-                                       ctxt)) != X86EMUL_OKAY )
-         {
--            /* Best effort unwind (i.e. no error checking). */
--            ops->write_msr(MSR_SHADOW_GS_BASE, msr_val, ctxt);
-+            /* Best effort unwind (i.e. no real error checking). */
-+            if ( ops->write_msr(MSR_SHADOW_GS_BASE, msr_val,
-+                                ctxt) == X86EMUL_EXCEPTION )
-+                x86_emul_reset_event(ctxt);
-             goto done;
-         }
-         break;
-diff --git a/xen/arch/x86/x86_emulate/0fae.c b/xen/arch/x86/x86_emulate/0fae.c
-index 00840b1d07..ba77af58f2 100644
---- a/xen/arch/x86/x86_emulate/0fae.c
-+++ b/xen/arch/x86/x86_emulate/0fae.c
-@@ -55,7 +55,10 @@ int x86emul_0fae(struct x86_emulate_state *s,
-                     cr4 = X86_CR4_OSFXSR;
-                 if ( !ops->read_msr ||
-                      ops->read_msr(MSR_EFER, &msr_val, ctxt) != X86EMUL_OKAY )
-+                {
-+                    x86_emul_reset_event(ctxt);
-                     msr_val = 0;
-+                }
-                 if ( !(cr4 & X86_CR4_OSFXSR) ||
-                      (mode_64bit() && mode_ring0() && (msr_val & EFER_FFXSE)) )
-                     s->op_bytes = offsetof(struct x86_fxsr, xmm[0]);
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
-index 94caec1d14..cf780da501 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
-@@ -1143,10 +1143,18 @@ static bool is_branch_step(struct x86_emulate_ctxt *ctxt,
-                            const struct x86_emulate_ops *ops)
- {
-     uint64_t debugctl;
-+    int rc = X86EMUL_UNHANDLEABLE;
- 
--    return ops->read_msr &&
--           ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl, ctxt) == X86EMUL_OKAY &&
--           (debugctl & IA32_DEBUGCTLMSR_BTF);
-+    if ( !ops->read_msr ||
-+         (rc = ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl,
-+                             ctxt)) != X86EMUL_OKAY )
-+    {
-+        if ( rc == X86EMUL_EXCEPTION )
-+            x86_emul_reset_event(ctxt);
-+        debugctl = 0;
-+    }
-+
-+    return debugctl & IA32_DEBUGCTLMSR_BTF;
- }
- 
- static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
-@@ -1160,13 +1168,21 @@ static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
- 
-     if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ||
-          !(xcr0 & X86_XCR0_BNDREGS) || !(xcr0 & X86_XCR0_BNDCSR) )
-+    {
-+        ASSERT(!ctxt->event_pending);
-         return;
-+    }
- 
-     if ( !mode_ring0() )
-         bndcfg = read_bndcfgu();
-     else if ( !ops->read_msr ||
--              ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg, ctxt) != X86EMUL_OKAY )
-+              (rc = ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg,
-+                                  ctxt)) != X86EMUL_OKAY )
-+    {
-+        if ( rc == X86EMUL_EXCEPTION )
-+            x86_emul_reset_event(ctxt);
-         return;
-+    }
-     if ( (bndcfg & IA32_BNDCFGS_ENABLE) && !(bndcfg & IA32_BNDCFGS_PRESERVE) )
-     {
-         /*
-@@ -8677,7 +8693,9 @@ int x86_emulate_wrapper(
-      * An event being pending should exactly match returning
-      * X86EMUL_EXCEPTION.  (If this trips, the chances are a codepath has
-      * called hvm_inject_hw_exception() rather than using
--     * x86_emul_hw_exception().)
-+     * x86_emul_hw_exception(), or the invocation of a hook has caused an
-+     * exception to be raised, while the caller was only checking for
-+     * success/failure.)
-      */
-     ASSERT(ctxt->event_pending == (rc == X86EMUL_EXCEPTION));
- 
--- 
-2.44.0
-
-
-From 5ac87c8afd2ae2b1a9fd46a9b80d9152d650fb26 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Wed, 6 Dec 2023 10:40:54 +0100
-Subject: [PATCH 12/70] xen/sched: fix adding offline cpu to cpupool
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Trying to add an offline cpu to a cpupool can crash the hypervisor,
-as the probably non-existing percpu area of the cpu is accessed before
-the availability of the cpu is being tested. This can happen in case
-the cpupool's granularity is "core" or "socket".
-
-Fix that by testing the cpu to be online.
-
-Fixes: cb563d7665f2 ("xen/sched: support core scheduling for moving cpus to/from cpupools")
-Reported-by: RenÃ© Winther HÃ¸jgaard <renewin@proton.me>
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 06e8d65d33896aa90f5b6d9b2bce7f11433b33c9
-master date: 2023-12-05 09:57:38 +0100
----
- xen/common/sched/cpupool.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
-index 2e094b0cfa..ad8f608462 100644
---- a/xen/common/sched/cpupool.c
-+++ b/xen/common/sched/cpupool.c
-@@ -892,6 +892,8 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
-         if ( cpu >= nr_cpu_ids )
-             goto addcpu_out;
-         ret = -ENODEV;
-+        if ( !cpu_online(cpu) )
-+            goto addcpu_out;
-         cpus = sched_get_opt_cpumask(c->gran, cpu);
-         if ( !cpumask_subset(cpus, &cpupool_free_cpus) ||
-              cpumask_intersects(cpus, &cpupool_locked_cpus) )
--- 
-2.44.0
-
-
-From 25b7f9ed0f8c7e138a2cecb113bd377c613153d7 Mon Sep 17 00:00:00 2001
-From: Stewart Hildebrand <stewart.hildebrand@amd.com>
-Date: Wed, 6 Dec 2023 10:41:19 +0100
-Subject: [PATCH 13/70] xen/domain: fix error path in domain_create()
-
-If rangeset_new() fails, err would not be set to an appropriate error
-code. Set it to -ENOMEM.
-
-Fixes: 580c458699e3 ("xen/domain: Call arch_domain_create() as early as possible in domain_create()")
-Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ff1178062094837d55ef342070e58316c43a54c9
-master date: 2023-12-05 10:00:51 +0100
----
- xen/common/domain.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/xen/common/domain.c b/xen/common/domain.c
-index 8f9ab01c0c..003f4ab125 100644
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -703,6 +703,7 @@ struct domain *domain_create(domid_t domid,
-         watchdog_domain_init(d);
-         init_status |= INIT_watchdog;
- 
-+        err = -ENOMEM;
-         d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
-         d->irq_caps   = rangeset_new(d, "Interrupts", 0);
-         if ( !d->iomem_caps || !d->irq_caps )
--- 
-2.44.0
-
-
-From a56d598e13db413f98e149f8e10cc13e8d4c1635 Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Tue, 12 Dec 2023 14:26:18 +0100
-Subject: [PATCH 14/70] Only compile the hypervisor with
- -Wdeclaration-after-statement
-
-Right now, all tools and hypervisor will be complied with the option
--Wdeclaration-after-statement. While most of the code in the hypervisor
-is controlled by us, for tools we may import external libraries.
-
-The build will fail if one of them are using the construct we are
-trying to prevent. This is the case when building against Python 3.12
-and Yocto:
-
-| In file included from /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/Python.h:44,
-|                  from xen/lowlevel/xc/xc.c:8:
-| /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/object.h: In function 'Py_SIZE':
-| /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/object.h:233:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement]
-|   233 |     PyVarObject *var_ob = _PyVarObject_CAST(ob);
-|       |     ^~~~~~~~~~~
-| In file included from /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/Python.h:53:
-| /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/cpython/longintrepr.h: In function '_PyLong_CompactValue':
-| /srv/storage/alex/yocto/build-virt/tmp/work/core2-64-poky-linux/xen-tools/4.17+stable/recipe-sysroot/usr/include/python3.12/cpython/longintrepr.h:121:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement]
-|   121 |     Py_ssize_t sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK);
-|       |     ^~~~~~~~~~
-| cc1: all warnings being treated as errors
-
-Looking at the tools directory, a fair few directory already add
--Wno-declaration-after-statement to inhibit the default behavior.
-
-We have always build the hypervisor with the flag, so for now remove
-only the flag for anything but the hypervisor. We can decide at later
-time whether we want to relax.
-
-Also remove the -Wno-declaration-after-statement in some subdirectory
-as the flag is now unnecessary.
-
-Part of the commit message was take from Alexander's first proposal:
-
-Link: https://lore.kernel.org/xen-devel/20231128174729.3880113-1-alex@linutronix.de/
-Reported-by: Alexander Kanavin <alex@linutronix.de>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Tested-by: Jason Andryuk <jandryuk@gmail.com>
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-
-xen/hypervisor: Don't use cc-option-add for -Wdeclaration-after-statement
-
-Per Andrew's comment in [1] all the compilers we support should
-recognize the flag.
-
-I forgot to address the comment while committing.
-
-[1] fcf00090-304a-49f7-8a61-a54347e90a3b@citrix.com
-
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-master commit: 40be6307ec005539635e7b8fcef67e989dc441f6
-master date: 2023-12-06 19:12:40 +0000
-master commit: d4bfd3899886d0fbe259c20660dadb1e00170f2d
-master date: 2023-12-06 19:19:59 +0000
----
- Config.mk                   | 2 --
- stubdom/Makefile            | 2 +-
- stubdom/vtpmmgr/Makefile    | 2 +-
- tools/libs/light/Makefile   | 3 +--
- tools/libs/util/Makefile    | 3 +--
- tools/tests/depriv/Makefile | 2 --
- tools/xl/Makefile           | 3 +--
- xen/Makefile                | 1 +
- 8 files changed, 6 insertions(+), 12 deletions(-)
-
-diff --git a/Config.mk b/Config.mk
-index 29b0d1e12a..2a3e16d0bd 100644
---- a/Config.mk
-+++ b/Config.mk
-@@ -177,8 +177,6 @@ CFLAGS += -std=gnu99
- 
- CFLAGS += -Wall -Wstrict-prototypes
- 
--$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
--$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
- $(call cc-option-add,CFLAGS,CC,-Wno-unused-but-set-variable)
- $(call cc-option-add,CFLAGS,CC,-Wno-unused-local-typedefs)
- 
-diff --git a/stubdom/Makefile b/stubdom/Makefile
-index 0ddfce1ba2..888fa20d72 100644
---- a/stubdom/Makefile
-+++ b/stubdom/Makefile
-@@ -245,7 +245,7 @@ tpm_emulator-$(XEN_TARGET_ARCH): tpm_emulator-$(TPMEMU_VERSION).tar.gz
- 	patch -d $@ -p1 < vtpm-command-duration.patch
- 	patch -d $@ -p1 < vtpm-tpm_bn_t-addr.patch
- 	mkdir $@/build
--	cd $@/build; CC=${CC} $(CMAKE) .. -DCMAKE_C_FLAGS:STRING="-std=c99 -DTPM_NO_EXTERN $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -Wno-declaration-after-statement"
-+	cd $@/build; CC=${CC} $(CMAKE) .. -DCMAKE_C_FLAGS:STRING="-std=c99 -DTPM_NO_EXTERN $(TARGET_CPPFLAGS) $(TARGET_CFLAGS)"
- 	touch $@
- 
- TPMEMU_STAMPFILE=$(CROSS_ROOT)/$(GNU_TARGET_ARCH)-xen-elf/lib/libtpm.a
-diff --git a/stubdom/vtpmmgr/Makefile b/stubdom/vtpmmgr/Makefile
-index 6dae034a07..c29bb49838 100644
---- a/stubdom/vtpmmgr/Makefile
-+++ b/stubdom/vtpmmgr/Makefile
-@@ -17,7 +17,7 @@ OBJS += vtpm_disk.o disk_tpm.o disk_io.o disk_crypto.o disk_read.o disk_write.o
- OBJS += mgmt_authority.o
- 
- CFLAGS+=-Werror -Iutil -Icrypto -Itcs
--CFLAGS+=-Wno-declaration-after-statement -Wno-unused-label
-+CFLAGS+=-Wno-unused-label
- 
- build: $(TARGET)
- $(TARGET): $(OBJS)
-diff --git a/tools/libs/light/Makefile b/tools/libs/light/Makefile
-index ba4c1b7933..37e4d16709 100644
---- a/tools/libs/light/Makefile
-+++ b/tools/libs/light/Makefile
-@@ -38,8 +38,7 @@ vpath static_tables.c $(ACPI_PATH)/
- 
- OBJS-$(CONFIG_X86) += $(ACPI_OBJS)
- 
--CFLAGS += -Wno-format-zero-length -Wmissing-declarations \
--	-Wno-declaration-after-statement -Wformat-nonliteral
-+CFLAGS += -Wno-format-zero-length -Wmissing-declarations -Wformat-nonliteral
- 
- CFLAGS-$(CONFIG_X86) += -DCONFIG_PCI_SUPP_LEGACY_IRQ
- 
-diff --git a/tools/libs/util/Makefile b/tools/libs/util/Makefile
-index c3b21875dc..936ec90a31 100644
---- a/tools/libs/util/Makefile
-+++ b/tools/libs/util/Makefile
-@@ -9,8 +9,7 @@ OBJS-y += libxlu_disk.o
- OBJS-y += libxlu_vif.o
- OBJS-y += libxlu_pci.o
- 
--CFLAGS += -Wno-format-zero-length -Wmissing-declarations \
--	-Wno-declaration-after-statement -Wformat-nonliteral
-+CFLAGS += -Wno-format-zero-length -Wmissing-declarations -Wformat-nonliteral
- CFLAGS += $(CFLAGS_libxenctrl)
- 
- CFLAGS += $(PTHREAD_CFLAGS)
-diff --git a/tools/tests/depriv/Makefile b/tools/tests/depriv/Makefile
-index 7d9e3b01bb..5404a12f47 100644
---- a/tools/tests/depriv/Makefile
-+++ b/tools/tests/depriv/Makefile
-@@ -1,8 +1,6 @@
- XEN_ROOT=$(CURDIR)/../../..
- include $(XEN_ROOT)/tools/Rules.mk
- 
--CFLAGS += -Wno-declaration-after-statement
--
- CFLAGS += $(CFLAGS_xeninclude)
- CFLAGS += $(CFLAGS_libxenctrl)
- CFLAGS += $(CFLAGS_libxencall)
-diff --git a/tools/xl/Makefile b/tools/xl/Makefile
-index 5f7aa5f46c..d742e96a5b 100644
---- a/tools/xl/Makefile
-+++ b/tools/xl/Makefile
-@@ -5,8 +5,7 @@
- XEN_ROOT = $(CURDIR)/../..
- include $(XEN_ROOT)/tools/Rules.mk
- 
--CFLAGS += -Wno-format-zero-length -Wmissing-declarations \
--	-Wno-declaration-after-statement -Wformat-nonliteral
-+CFLAGS += -Wno-format-zero-length -Wmissing-declarations -Wformat-nonliteral
- CFLAGS += -fPIC
- 
- CFLAGS += $(PTHREAD_CFLAGS)
-diff --git a/xen/Makefile b/xen/Makefile
-index e39290f638..a92709b43e 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -392,6 +392,7 @@ CFLAGS-$(CONFIG_CC_SPLIT_SECTIONS) += -ffunction-sections -fdata-sections
- 
- CFLAGS += -nostdinc -fno-builtin -fno-common
- CFLAGS += -Werror -Wredundant-decls -Wno-pointer-arith
-+CFLAGS += -Wdeclaration-after-statement
- $(call cc-option-add,CFLAGS,CC,-Wvla)
- CFLAGS += -pipe -D__XEN__ -include $(srctree)/include/xen/config.h
- CFLAGS-$(CONFIG_DEBUG_INFO) += -g
--- 
-2.44.0
-
-
-From 48eb9e91990b3fd42f8e847780f6cdb188245b4a Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 12 Dec 2023 14:26:35 +0100
-Subject: [PATCH 15/70] xen/sched: fix sched_move_domain()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Do cleanup in sched_move_domain() in a dedicated service function,
-which is called either in error case with newly allocated data, or in
-success case with the old data to be freed.
-
-This will at once fix some subtle bugs which sneaked in due to
-forgetting to overwrite some pointers in the error case.
-
-Fixes: 70fadc41635b ("xen/cpupool: support moving domain between cpupools with different granularity")
-Reported-by: RenÃ© Winther HÃ¸jgaard <renewin@proton.me>
-Initial-fix-by: Jan Beulich <jbeulich@suse.com>
-Initial-fix-by: George Dunlap <george.dunlap@cloud.com>
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: George Dunlap <george.dunlap@cloud.com>
-master commit: 23792cc0f22cff4e106d838b83aa9ae1cb6ffaf4
-master date: 2023-12-07 13:37:25 +0000
----
- xen/common/sched/core.c | 47 +++++++++++++++++++++++------------------
- 1 file changed, 27 insertions(+), 20 deletions(-)
-
-diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
-index eba0cea4bb..901782bbb4 100644
---- a/xen/common/sched/core.c
-+++ b/xen/common/sched/core.c
-@@ -647,6 +647,24 @@ static void sched_move_irqs(const struct sched_unit *unit)
-         vcpu_move_irqs(v);
- }
- 
-+static void sched_move_domain_cleanup(const struct scheduler *ops,
-+                                      struct sched_unit *units,
-+                                      void *domdata)
-+{
-+    struct sched_unit *unit, *old_unit;
-+
-+    for ( unit = units; unit; )
-+    {
-+        if ( unit->priv )
-+            sched_free_udata(ops, unit->priv);
-+        old_unit = unit;
-+        unit = unit->next_in_list;
-+        xfree(old_unit);
-+    }
-+
-+    sched_free_domdata(ops, domdata);
-+}
-+
- /*
-  * Move a domain from one cpupool to another.
-  *
-@@ -686,7 +704,6 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
-     void *old_domdata;
-     unsigned int gran = cpupool_get_granularity(c);
-     unsigned int n_units = d->vcpu[0] ? DIV_ROUND_UP(d->max_vcpus, gran) : 0;
--    int ret = 0;
- 
-     for_each_vcpu ( d, v )
-     {
-@@ -699,8 +716,9 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
-     domdata = sched_alloc_domdata(c->sched, d);
-     if ( IS_ERR(domdata) )
-     {
--        ret = PTR_ERR(domdata);
--        goto out;
-+        rcu_read_unlock(&sched_res_rculock);
-+
-+        return PTR_ERR(domdata);
-     }
- 
-     for ( unit_idx = 0; unit_idx < n_units; unit_idx++ )
-@@ -718,10 +736,10 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
- 
-         if ( !unit || !unit->priv )
-         {
--            old_units = new_units;
--            old_domdata = domdata;
--            ret = -ENOMEM;
--            goto out_free;
-+            sched_move_domain_cleanup(c->sched, new_units, domdata);
-+            rcu_read_unlock(&sched_res_rculock);
-+
-+            return -ENOMEM;
-         }
- 
-         unit_ptr = &unit->next_in_list;
-@@ -808,22 +826,11 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
- 
-     domain_unpause(d);
- 
-- out_free:
--    for ( unit = old_units; unit; )
--    {
--        if ( unit->priv )
--            sched_free_udata(c->sched, unit->priv);
--        old_unit = unit;
--        unit = unit->next_in_list;
--        xfree(old_unit);
--    }
--
--    sched_free_domdata(old_ops, old_domdata);
-+    sched_move_domain_cleanup(old_ops, old_units, old_domdata);
- 
-- out:
-     rcu_read_unlock(&sched_res_rculock);
- 
--    return ret;
-+    return 0;
- }
- 
- void sched_destroy_vcpu(struct vcpu *v)
--- 
-2.44.0
-
-
-From a4f3f5a62c10a5adc898cf45261783209f5bc037 Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Tue, 12 Dec 2023 14:27:10 +0100
-Subject: [PATCH 16/70] xen/arm: page: Avoid pointer overflow on cache clean &
- invalidate
-
-On Arm32, after cleaning and invalidating the last dcache line of the top
-domheap page i.e. VA = 0xfffff000 (as a result of flushing the page to
-RAM), we end up adding the value of a dcache line size to the pointer
-once again, which results in a pointer arithmetic overflow (with 64B line
-size, operation 0xffffffc0 + 0x40 overflows to 0x0). Such behavior is
-undefined and given the wide range of compiler versions we support, it is
-difficult to determine what could happen in such scenario.
-
-Modify clean_and_invalidate_dcache_va_range() as well as
-clean_dcache_va_range() and invalidate_dcache_va_range() due to similarity
-of handling to prevent pointer arithmetic overflow. Modify the loops to
-use an additional variable to store the index of the next cacheline.
-Add an assert to prevent passing a region that wraps around which is
-illegal and would end up in a page fault anyway (region 0-2MB is
-unmapped). Lastly, return early if size passed is 0.
-
-Note that on Arm64, we don't have this problem given that the max VA
-space we support is 48-bits.
-
-This is XSA-447 / CVE-2023-46837.
-
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-master commit: 190b7f49af6487a9665da63d43adc9d9a5fbd01e
-master date: 2023-12-12 14:01:00 +0100
----
- xen/arch/arm/include/asm/page.h | 35 ++++++++++++++++++++++++++-------
- 1 file changed, 28 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
-index aa0080e8d7..645331fc89 100644
---- a/xen/arch/arm/include/asm/page.h
-+++ b/xen/arch/arm/include/asm/page.h
-@@ -162,6 +162,13 @@ static inline size_t read_dcache_line_bytes(void)
- static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
- {
-     size_t cacheline_mask = dcache_line_bytes - 1;
-+    unsigned long idx = 0;
-+
-+    if ( !size )
-+        return 0;
-+
-+    /* Passing a region that wraps around is illegal */
-+    ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
- 
-     dsb(sy);           /* So the CPU issues all writes to the range */
- 
-@@ -174,11 +181,11 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
-     }
- 
-     for ( ; size >= dcache_line_bytes;
--            p += dcache_line_bytes, size -= dcache_line_bytes )
--        asm volatile (__invalidate_dcache_one(0) : : "r" (p));
-+            idx += dcache_line_bytes, size -= dcache_line_bytes )
-+        asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx));
- 
-     if ( size > 0 )
--        asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
-+        asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
- 
-     dsb(sy);           /* So we know the flushes happen before continuing */
- 
-@@ -188,14 +195,21 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
- static inline int clean_dcache_va_range(const void *p, unsigned long size)
- {
-     size_t cacheline_mask = dcache_line_bytes - 1;
-+    unsigned long idx = 0;
-+
-+    if ( !size )
-+        return 0;
-+
-+    /* Passing a region that wraps around is illegal */
-+    ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
- 
-     dsb(sy);           /* So the CPU issues all writes to the range */
-     size += (uintptr_t)p & cacheline_mask;
-     size = (size + cacheline_mask) & ~cacheline_mask;
-     p = (void *)((uintptr_t)p & ~cacheline_mask);
-     for ( ; size >= dcache_line_bytes;
--            p += dcache_line_bytes, size -= dcache_line_bytes )
--        asm volatile (__clean_dcache_one(0) : : "r" (p));
-+            idx += dcache_line_bytes, size -= dcache_line_bytes )
-+        asm volatile (__clean_dcache_one(0) : : "r" (p + idx));
-     dsb(sy);           /* So we know the flushes happen before continuing */
-     /* ARM callers assume that dcache_* functions cannot fail. */
-     return 0;
-@@ -205,14 +219,21 @@ static inline int clean_and_invalidate_dcache_va_range
-     (const void *p, unsigned long size)
- {
-     size_t cacheline_mask = dcache_line_bytes - 1;
-+    unsigned long idx = 0;
-+
-+    if ( !size )
-+        return 0;
-+
-+    /* Passing a region that wraps around is illegal */
-+    ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
- 
-     dsb(sy);         /* So the CPU issues all writes to the range */
-     size += (uintptr_t)p & cacheline_mask;
-     size = (size + cacheline_mask) & ~cacheline_mask;
-     p = (void *)((uintptr_t)p & ~cacheline_mask);
-     for ( ; size >= dcache_line_bytes;
--            p += dcache_line_bytes, size -= dcache_line_bytes )
--        asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
-+            idx += dcache_line_bytes, size -= dcache_line_bytes )
-+        asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
-     dsb(sy);         /* So we know the flushes happen before continuing */
-     /* ARM callers assume that dcache_* functions cannot fail. */
-     return 0;
--- 
-2.44.0
-
-
-From 1792d1723b7fb45a20b145d2de4d233913b22c09 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 12 Dec 2023 14:45:52 +0100
-Subject: [PATCH 17/70] x86/x2apic: introduce a mixed physical/cluster mode
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current implementation of x2APIC requires to either use Cluster Logical or
-Physical mode for all interrupts.  However the selection of Physical vs Logical
-is not done at APIC setup, an APIC can be addressed both in Physical or Logical
-destination modes concurrently.
-
-Introduce a new x2APIC mode called Mixed, which uses Logical Cluster mode for
-IPIs, and Physical mode for external interrupts, thus attempting to use the
-best method for each interrupt type.
-
-Using Physical mode for external interrupts allows more vectors to be used, and
-interrupt balancing to be more accurate.
-
-Using Logical Cluster mode for IPIs allows fewer accesses to the ICR register
-when sending those, as multiple CPUs can be targeted with a single ICR register
-write.
-
-A simple test calling flush_tlb_all() 10000 times on a tight loop on AMD EPYC
-9754 with 512 CPUs gives the following figures in nano seconds:
-
-x mixed
-+ phys
-* cluster
-    N           Min           Max        Median           Avg        Stddev
-x  25 3.5131328e+08 3.5716441e+08 3.5410987e+08 3.5432659e+08     1566737.4
-+  12  1.231082e+09  1.238824e+09 1.2370528e+09 1.2357981e+09     2853892.9
-Difference at 95.0% confidence
-	8.81472e+08 +/- 1.46849e+06
-	248.774% +/- 0.96566%
-	(Student's t, pooled s = 2.05985e+06)
-*  11 3.5099276e+08 3.5561459e+08 3.5461234e+08 3.5415668e+08     1415071.9
-No difference proven at 95.0% confidence
-
-So Mixed has no difference when compared to Cluster mode, and Physical mode is
-248% slower when compared to either Mixed or Cluster modes with a 95%
-confidence.
-
-Note that Xen uses Cluster mode by default, and hence is already using the
-fastest way for IPI delivery at the cost of reducing the amount of vectors
-available system-wide.
-
-Make the newly introduced mode the default one.
-
-Note the printing of the APIC addressing mode done in connect_bsp_APIC() has
-been removed, as with the newly introduced mixed mode this would require more
-fine grained printing, or else would be incorrect.  The addressing mode can
-already be derived from the APIC driver in use, which is printed by different
-helpers.
-
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Henry Wang <Henry.Wang@arm.com>
-master commit: e3c409d59ac87ccdf97b8c7708c81efa8069cb31
-master date: 2023-11-07 09:59:48 +0000
----
- CHANGELOG.md                      |  7 +++
- docs/misc/xen-command-line.pandoc | 12 ++++
- xen/arch/x86/Kconfig              | 35 +++++++++--
- xen/arch/x86/apic.c               |  6 +-
- xen/arch/x86/genapic/x2apic.c     | 98 +++++++++++++++++++++++--------
- 5 files changed, 123 insertions(+), 35 deletions(-)
-
-diff --git a/CHANGELOG.md b/CHANGELOG.md
-index 7fb4d366c3..5aa01dae5d 100644
---- a/CHANGELOG.md
-+++ b/CHANGELOG.md
-@@ -4,6 +4,13 @@ Notable changes to Xen will be documented in this file.
- 
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- 
-+## [4.18.1](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.1)
-+
-+### Added
-+ - On x86:
-+   - Introduce a new x2APIC driver that uses Cluster Logical addressing mode
-+     for IPIs and Physical addressing mode for external interrupts.
-+
- ## [4.18.0](https://xenbits.xenproject.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.0) - 2023-11-16
- 
- ### Changed
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 9a19a04157..8e65f8bd18 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2804,6 +2804,15 @@ the watchdog.
- 
- Permit use of x2apic setup for SMP environments.
- 
-+### x2apic-mode (x86)
-+> `= physical | cluster |Â mixed`
-+
-+> Default: `physical` if **FADT** mandates physical mode, otherwise set at
-+>          build time by CONFIG_X2APIC_{PHYSICAL,LOGICAL,MIXED}.
-+
-+In the case that x2apic is in use, this option switches between modes to
-+address APICs in the system as interrupt destinations.
-+
- ### x2apic_phys (x86)
- > `= <boolean>`
- 
-@@ -2814,6 +2823,9 @@ In the case that x2apic is in use, this option switches between physical and
- clustered mode.  The default, given no hint from the **FADT**, is cluster
- mode.
- 
-+**WARNING: `x2apic_phys` is deprecated and superseded by `x2apic-mode`.
-+The latter takes precedence if both are set.**
-+
- ### xenheap_megabytes (arm32)
- > `= <size>`
- 
-diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
-index eac77573bd..1acdffc51c 100644
---- a/xen/arch/x86/Kconfig
-+++ b/xen/arch/x86/Kconfig
-@@ -228,11 +228,18 @@ config XEN_ALIGN_2M
- 
- endchoice
- 
--config X2APIC_PHYSICAL
--	bool "x2APIC Physical Destination mode"
-+choice
-+	prompt "x2APIC Driver default"
-+	default X2APIC_MIXED
- 	help
--	  Use x2APIC Physical Destination mode by default when available.
-+	  Select APIC addressing when x2APIC is enabled.
-+
-+	  The default mode is mixed which should provide the best aspects
-+	  of both physical and cluster modes.
- 
-+config X2APIC_PHYSICAL
-+	bool "Physical Destination mode"
-+	help
- 	  When using this mode APICs are addressed using the Physical
- 	  Destination mode, which allows using all dynamic vectors on each
- 	  CPU independently.
-@@ -242,9 +249,27 @@ config X2APIC_PHYSICAL
- 	  destination inter processor interrupts (IPIs) slightly slower than
- 	  Logical Destination mode.
- 
--	  The mode when this option is not selected is Logical Destination.
-+config X2APIC_CLUSTER
-+	bool "Cluster Destination mode"
-+	help
-+	  When using this mode APICs are addressed using the Cluster Logical
-+	  Destination mode.
-+
-+	  Cluster Destination has the benefit of sending IPIs faster since
-+	  multiple APICs can be targeted as destinations of a single IPI.
-+	  However the vector space is shared between all CPUs on the cluster,
-+	  and hence using this mode reduces the number of available vectors
-+	  when compared to Physical mode.
- 
--	  If unsure, say N.
-+config X2APIC_MIXED
-+	bool "Mixed Destination mode"
-+	help
-+	  When using this mode APICs are addressed using the Cluster Logical
-+	  Destination mode for IPIs and Physical mode for external interrupts.
-+
-+	  Should provide the best of both modes.
-+
-+endchoice
- 
- config GUEST
- 	bool
-diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
-index f1264ce7ed..6acdd0ec14 100644
---- a/xen/arch/x86/apic.c
-+++ b/xen/arch/x86/apic.c
-@@ -229,11 +229,7 @@ void __init connect_bsp_APIC(void)
-         outb(0x01, 0x23);
-     }
- 
--    printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
--           !INT_DEST_MODE ? "Physical"
--                          : init_apic_ldr == init_apic_ldr_flat ? "Flat"
--                                                                : "Clustered",
--           nr_ioapics);
-+    printk("Enabling APIC mode.  Using %d I/O APICs\n", nr_ioapics);
-     enable_apic_mode();
- }
- 
-diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
-index 707deef98c..b88c7a96fe 100644
---- a/xen/arch/x86/genapic/x2apic.c
-+++ b/xen/arch/x86/genapic/x2apic.c
-@@ -180,6 +180,36 @@ static const struct genapic __initconstrel apic_x2apic_cluster = {
-     .send_IPI_self = send_IPI_self_x2apic
- };
- 
-+/*
-+ * Mixed x2APIC mode: use physical for external (device) interrupts, and
-+ * cluster for inter processor interrupts.  Such mode has the benefits of not
-+ * sharing the vector space with all CPUs on the cluster, while still allowing
-+ * IPIs to be more efficiently delivered by not having to perform an ICR write
-+ * for each target CPU.
-+ */
-+static const struct genapic __initconstrel apic_x2apic_mixed = {
-+    APIC_INIT("x2apic_mixed", NULL),
-+
-+    /*
-+     * The following fields are exclusively used by external interrupts and
-+     * hence are set to use Physical destination mode handlers.
-+     */
-+    .int_delivery_mode = dest_Fixed,
-+    .int_dest_mode = 0 /* physical delivery */,
-+    .vector_allocation_cpumask = vector_allocation_cpumask_phys,
-+    .cpu_mask_to_apicid = cpu_mask_to_apicid_phys,
-+
-+    /*
-+     * The following fields are exclusively used by IPIs and hence are set to
-+     * use Cluster Logical destination mode handlers.  Note that init_apic_ldr
-+     * is not used by IPIs, but the per-CPU fields it initializes are only used
-+     * by the IPI hooks.
-+     */
-+    .init_apic_ldr = init_apic_ldr_x2apic_cluster,
-+    .send_IPI_mask = send_IPI_mask_x2apic_cluster,
-+    .send_IPI_self = send_IPI_self_x2apic,
-+};
-+
- static int cf_check update_clusterinfo(
-     struct notifier_block *nfb, unsigned long action, void *hcpu)
- {
-@@ -220,38 +250,56 @@ static struct notifier_block x2apic_cpu_nfb = {
- static int8_t __initdata x2apic_phys = -1;
- boolean_param("x2apic_phys", x2apic_phys);
- 
-+enum {
-+   unset, physical, cluster, mixed
-+} static __initdata x2apic_mode = unset;
-+
-+static int __init cf_check parse_x2apic_mode(const char *s)
-+{
-+    if ( !cmdline_strcmp(s, "physical") )
-+        x2apic_mode = physical;
-+    else if ( !cmdline_strcmp(s, "cluster") )
-+        x2apic_mode = cluster;
-+    else if ( !cmdline_strcmp(s, "mixed") )
-+        x2apic_mode = mixed;
-+    else
-+        return -EINVAL;
-+
-+    return 0;
-+}
-+custom_param("x2apic-mode", parse_x2apic_mode);
-+
- const struct genapic *__init apic_x2apic_probe(void)
- {
--    if ( x2apic_phys < 0 )
-+    /* Honour the legacy cmdline setting if it's the only one provided. */
-+    if ( x2apic_mode == unset && x2apic_phys >= 0 )
-+        x2apic_mode = x2apic_phys ? physical : cluster;
-+
-+    if ( x2apic_mode == unset )
-     {
--        /*
--         * Force physical mode if there's no (full) interrupt remapping support:
--         * The ID in clustered mode requires a 32 bit destination field due to
--         * the usage of the high 16 bits to hold the cluster ID.
--         */
--        x2apic_phys = iommu_intremap != iommu_intremap_full ||
--                      (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) ||
--                      IS_ENABLED(CONFIG_X2APIC_PHYSICAL);
--    }
--    else if ( !x2apic_phys )
--        switch ( iommu_intremap )
-+        if ( acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL )
-         {
--        case iommu_intremap_off:
--        case iommu_intremap_restricted:
--            printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -"
--                   " forcing phys mode\n",
--                   iommu_intremap == iommu_intremap_off ? "without"
--                                                        : "with restricted");
--            x2apic_phys = true;
--            break;
--
--        case iommu_intremap_full:
--            break;
-+            printk(XENLOG_INFO "ACPI FADT forcing x2APIC physical mode\n");
-+            x2apic_mode = physical;
-         }
-+        else
-+            x2apic_mode = IS_ENABLED(CONFIG_X2APIC_MIXED) ? mixed
-+                          : (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) ? physical
-+                                                                : cluster);
-+    }
- 
--    if ( x2apic_phys )
-+    if ( x2apic_mode == physical )
-         return &apic_x2apic_phys;
- 
-+    if ( x2apic_mode == cluster && iommu_intremap != iommu_intremap_full )
-+    {
-+        printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -"
-+               " forcing mixed mode\n",
-+               iommu_intremap == iommu_intremap_off ? "without"
-+                                                    : "with restricted");
-+        x2apic_mode = mixed;
-+    }
-+
-     if ( !this_cpu(cluster_cpus) )
-     {
-         update_clusterinfo(NULL, CPU_UP_PREPARE,
-@@ -260,7 +308,7 @@ const struct genapic *__init apic_x2apic_probe(void)
-         register_cpu_notifier(&x2apic_cpu_nfb);
-     }
- 
--    return &apic_x2apic_cluster;
-+    return x2apic_mode == cluster ? &apic_x2apic_cluster : &apic_x2apic_mixed;
- }
- 
- void __init check_x2apic_preenabled(void)
--- 
-2.44.0
-
-
-From 637da04812fba259a5d06591ec535345637a4407 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 30 Jan 2024 14:33:48 +0100
-Subject: [PATCH 18/70] pci: fail device assignment if phantom functions cannot
- be assigned
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current behavior is that no error is reported if (some) phantom functions
-fail to be assigned during device add or assignment, so the operation succeeds
-even if some phantom functions are not correctly setup.
-
-This can lead to devices possibly being successfully assigned to a domU while
-some of the device phantom functions are still assigned to dom0.  Even when the
-device is assigned domIO before being assigned to a domU phantom functions
-might fail to be assigned to domIO, and also fail to be assigned to the domU,
-leaving them assigned to dom0.
-
-Since the device can generate requests using the IDs of those phantom
-functions, given the scenario above a device in such state would be in control
-of a domU, but still capable of generating transactions that use a context ID
-targeting dom0 owned memory.
-
-Modify device assign in order to attempt to deassign the device if phantom
-functions failed to be assigned.
-
-Note that device addition is not modified in the same way, as in that case the
-device is assigned to a trusted domain, and hence partial assign can lead to
-device malfunction but not a security issue.
-
-This is XSA-449 / CVE-2023-46839
-
-Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e
-master date: 2024-01-30 14:28:01 +0100
----
- xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------
- 1 file changed, 21 insertions(+), 6 deletions(-)
-
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index 04d00c7c37..e99837b6e1 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -1439,11 +1439,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
- 
-     pdev->fault.count = 0;
- 
--    if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
--                          pci_to_dev(pdev), flag)) )
--        goto done;
-+    rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev),
-+                    flag);
- 
--    for ( ; pdev->phantom_stride; rc = 0 )
-+    while ( pdev->phantom_stride && !rc )
-     {
-         devfn += pdev->phantom_stride;
-         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-@@ -1454,8 +1453,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
- 
-  done:
-     if ( rc )
--        printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n",
--               d, &PCI_SBDF(seg, bus, devfn), rc);
-+    {
-+        printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n",
-+               d, devfn != pdev->devfn ? "phantom function " : "",
-+               &PCI_SBDF(seg, bus, devfn), rc);
-+
-+        if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) )
-+        {
-+            /*
-+             * Device with phantom functions that failed to both assign and
-+             * rollback.  Mark the device as broken and crash the target domain,
-+             * as the state of the functions at this point is unknown and Xen
-+             * has no way to assert consistent context assignment among them.
-+             */
-+            pdev->broken = true;
-+            if ( !is_hardware_domain(d) && d != dom_io )
-+                domain_crash(d);
-+        }
-+    }
-     /* The device is assigned to dom_io so mark it as quarantined */
-     else if ( d == dom_io )
-         pdev->quarantine = true;
--- 
-2.44.0
-
-
-From c7ac596a575a05d6ff1e35c3ff98bc4d143712d2 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 30 Jan 2024 14:34:40 +0100
-Subject: [PATCH 19/70] VT-d: Fix "else" vs "#endif" misplacement
-
-In domain_pgd_maddr() the "#endif" is misplaced with respect to "else".  This
-generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body
-is executed unconditionally.
-
-Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's
-clearer to follow.  This in turn involves adjusting p2m_get_pagetable() to
-compile when CONFIG_HVM is disabled.
-
-This is XSA-450 / CVE-2023-46840.
-
-Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only")
-Reported-by: Teddy Astie <teddy.astie@vates.tech>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426
-master date: 2024-01-30 14:29:15 +0100
----
- xen/arch/x86/include/asm/p2m.h      | 9 ++++++++-
- xen/drivers/passthrough/vtd/iommu.c | 4 +---
- 2 files changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
-index 40545f5fa8..1e0b0e2dcc 100644
---- a/xen/arch/x86/include/asm/p2m.h
-+++ b/xen/arch/x86/include/asm/p2m.h
-@@ -435,7 +435,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
-     return p2m->p2m_class == p2m_alternate;
- }
- 
--#define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
-+#ifdef CONFIG_HVM
-+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m)
-+{
-+    return p2m->phys_table;
-+}
-+#else
-+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m);
-+#endif
- 
- /*
-  * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index e13b7d99db..9ed616e211 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -438,15 +438,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr,
- 
-     if ( pgd_maddr )
-         /* nothing */;
--#ifdef CONFIG_HVM
--    else if ( iommu_use_hap_pt(d) )
-+    else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) )
-     {
-         pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d));
- 
-         pgd_maddr = pagetable_get_paddr(pgt);
-     }
-     else
--#endif
-     {
-         if ( !hd->arch.vtd.pgd_maddr )
-         {
--- 
-2.44.0
-
-
-From 62b3d7f8e45a7ec1597f0ed61a99d1f423b22315 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 1 Feb 2024 17:58:17 +0100
-Subject: [PATCH 20/70] x86/amd: Extend CPU erratum #1474 fix to more affected
- models
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Erratum #1474 has now been extended to cover models from family 17h ranges
-00-2Fh, so the errata now covers all the models released under Family
-17h (Zen, Zen+ and Zen2).
-
-Additionally extend the workaround to Family 18h (Hygon), since it's based on
-the Zen architecture and very likely affected.
-
-Rename all the zen2 related symbols to fam17, since the errata doesn't
-exclusively affect Zen2 anymore.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba
-master date: 2023-12-21 12:19:40 +0000
----
- xen/arch/x86/cpu/amd.c | 27 ++++++++++++++-------------
- 1 file changed, 14 insertions(+), 13 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index 0f305312ff..d43288ae97 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk;
- bool __ro_after_init amd_legacy_ssbd;
- bool __initdata amd_virt_spec_ctrl;
- 
--static bool __read_mostly zen2_c6_disabled;
-+static bool __read_mostly fam17_c6_disabled;
- 
- static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
- 				 unsigned int *hi)
-@@ -978,24 +978,24 @@ void amd_check_zenbleed(void)
- 		       val & chickenbit ? "chickenbit" : "microcode");
- }
- 
--static void cf_check zen2_disable_c6(void *arg)
-+static void cf_check fam17_disable_c6(void *arg)
- {
- 	/* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */
- 	const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22));
- 	uint64_t val;
- 
--	if (!zen2_c6_disabled) {
-+	if (!fam17_c6_disabled) {
- 		printk(XENLOG_WARNING
-     "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n");
--		zen2_c6_disabled = true;
-+		fam17_c6_disabled = true;
- 		/*
- 		 * Prevent CPU hotplug so that started CPUs will either see
--		 * zen2_c6_disabled set, or will be handled by
-+		 * zen_c6_disabled set, or will be handled by
- 		 * smp_call_function().
- 		 */
- 		while (!get_cpu_maps())
- 			process_pending_softirqs();
--		smp_call_function(zen2_disable_c6, NULL, 0);
-+		smp_call_function(fam17_disable_c6, NULL, 0);
- 		put_cpu_maps();
- 	}
- 
-@@ -1294,8 +1294,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c)
- 	amd_check_zenbleed();
- 	amd_check_erratum_1485();
- 
--	if (zen2_c6_disabled)
--		zen2_disable_c6(NULL);
-+	if (fam17_c6_disabled)
-+		fam17_disable_c6(NULL);
- 
- 	check_syscfg_dram_mod_en();
- 
-@@ -1307,7 +1307,7 @@ const struct cpu_dev amd_cpu_dev = {
- 	.c_init		= init_amd,
- };
- 
--static int __init cf_check zen2_c6_errata_check(void)
-+static int __init cf_check amd_check_erratum_1474(void)
- {
- 	/*
- 	 * Errata #1474: A Core May Hang After About 1044 Days
-@@ -1315,7 +1315,8 @@ static int __init cf_check zen2_c6_errata_check(void)
- 	 */
- 	s_time_t delta;
- 
--	if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
-+	if (cpu_has_hypervisor ||
-+	    (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18))
- 		return 0;
- 
- 	/*
-@@ -1330,10 +1331,10 @@ static int __init cf_check zen2_c6_errata_check(void)
- 	if (delta > 0) {
- 		static struct timer errata_c6;
- 
--		init_timer(&errata_c6, zen2_disable_c6, NULL, 0);
-+		init_timer(&errata_c6, fam17_disable_c6, NULL, 0);
- 		set_timer(&errata_c6, NOW() + delta);
- 	} else
--		zen2_disable_c6(NULL);
-+		fam17_disable_c6(NULL);
- 
- 	return 0;
- }
-@@ -1341,4 +1342,4 @@ static int __init cf_check zen2_c6_errata_check(void)
-  * Must be executed after early_time_init() for tsc_ticks2ns() to have been
-  * calibrated.  That prevents us doing the check in init_amd().
-  */
--presmp_initcall(zen2_c6_errata_check);
-+presmp_initcall(amd_check_erratum_1474);
--- 
-2.44.0
-
-
-From b26c30a408255454f8ceb4e49e3c4385aa32fbc3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 1 Feb 2024 17:58:59 +0100
-Subject: [PATCH 21/70] CirrusCI: drop FreeBSD 12
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Went EOL by the end of December 2023, and the pkg repos have been shut down.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20
-master date: 2024-01-15 12:20:11 +0100
----
- .cirrus.yml | 6 ------
- 1 file changed, 6 deletions(-)
-
-diff --git a/.cirrus.yml b/.cirrus.yml
-index 7e0beb200d..63f3afb104 100644
---- a/.cirrus.yml
-+++ b/.cirrus.yml
-@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE
-     - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin
-     - gmake -j`sysctl -n hw.ncpu` clang=y
- 
--task:
--  name: 'FreeBSD 12'
--  freebsd_instance:
--    image_family: freebsd-12-4
--  << : *FREEBSD_TEMPLATE
--
- task:
-   name: 'FreeBSD 13'
-   freebsd_instance:
--- 
-2.44.0
-
-
-From 6ccf064b0ce1d06449565129ab944b4fd9531b3a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 1 Feb 2024 17:59:25 +0100
-Subject: [PATCH 22/70] x86/intel: ensure Global Performance Counter Control is
- setup correctly
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL
-MSR contains per-counter enable bits that is ANDed with the enable bit in the
-counter EVNTSEL MSR in order for a PMC counter to be enabled.
-
-So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable
-bits being set by default, but at least on some Intel Sapphire and Emerald
-Rapids this is no longer the case, and Xen reports:
-
-Testing NMI watchdog on all CPUs: 0 40 stuck
-
-The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0
-doesn't start counting when the enable bit in EVNTSEL0 is set, due to the
-relevant enable bit in PERF_GLOBAL_CTRL not being set.
-
-Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the
-general-purpose PMCs are enabled.  Doing so brings the state of the package-BSP
-PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system.
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c
-master date: 2024-01-17 10:40:52 +0100
----
- xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++-
- 1 file changed, 22 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
-index a8ba3191e6..aef8e4506c 100644
---- a/xen/arch/x86/cpu/intel.c
-+++ b/xen/arch/x86/cpu/intel.c
-@@ -533,9 +533,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c)
- 	init_intel_cacheinfo(c);
- 	if (c->cpuid_level > 9) {
- 		unsigned eax = cpuid_eax(10);
-+		unsigned int cnt = (eax >> 8) & 0xff;
-+
- 		/* Check for version and the number of counters */
--		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-+		if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) {
-+			uint64_t global_ctrl;
-+			unsigned int cnt_mask = (1UL << cnt) - 1;
-+
-+			/*
-+			 * On (some?) Sapphire/Emerald Rapids platforms each
-+			 * package-BSP starts with all the enable bits for the
-+			 * general-purpose PMCs cleared.  Adjust so counters
-+			 * can be enabled from EVNTSEL.
-+			 */
-+			rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
-+			if ((global_ctrl & cnt_mask) != cnt_mask) {
-+				printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#"
-+				       PRIx64 " adjusting to %#" PRIx64 "\n",
-+				       smp_processor_id(), global_ctrl,
-+				       global_ctrl | cnt_mask);
-+				wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-+				       global_ctrl | cnt_mask);
-+			}
- 			__set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
-+		}
- 	}
- 
- 	if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
--- 
-2.44.0
-
-
-From 4cc0f88c42f374c7a8e2d05e38777fa18619482e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 1 Feb 2024 17:59:57 +0100
-Subject: [PATCH 23/70] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT
-
-When receiving an INIT, a prior bugfix tried to ignore the INIT and continue
-onwards.
-
-Unfortunately it's not safe to return at that point in vmx_vmexit_handler().
-Just out of context in the first hunk is a local_irqs_enabled() which is
-depended-upon by the return-to-guest path, causing the following checklock
-failure in debug builds:
-
-  (XEN) Error: INIT received - ignoring
-  (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1
-  (XEN) Xen BUG at common/spinlock.c:132
-  (XEN) ----[ Xen-4.19-unstable  x86_64  debug=y  Tainted:     H  ]----
-  ...
-  (XEN) Xen call trace:
-  (XEN)    [<ffff82d040238e10>] R check_lock+0xcd/0xe1
-  (XEN)    [<ffff82d040238fe3>] F _spin_lock+0x1b/0x60
-  (XEN)    [<ffff82d0402ed6a8>] F pt_update_irq+0x32/0x3bb
-  (XEN)    [<ffff82d0402b9632>] F vmx_intr_assist+0x3b/0x51d
-  (XEN)    [<ffff82d040206447>] F vmx_asm_vmexit_handler+0xf7/0x210
-
-Luckily, this is benign in release builds.  Accidentally having IRQs disabled
-when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern.
-
-Drop the problematic early return.  In hindsight, it's wrong to skip other
-normal VMExit steps.
-
-Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received")
-Reported-by: Reima ISHII <ishiir@g.ecc.u-tokyo.ac.jp>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4
-master date: 2024-01-18 20:59:06 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 1edc7f1e91..964891934b 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4100,7 +4100,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- 
-     case EXIT_REASON_INIT:
-         printk(XENLOG_ERR "Error: INIT received - ignoring\n");
--        return; /* Renter the guest without further processing */
-+        break;
-     }
- 
-     /* Now enable interrupts so it's safe to take locks. */
-@@ -4385,6 +4385,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
-         break;
-     }
-     case EXIT_REASON_EXTERNAL_INTERRUPT:
-+    case EXIT_REASON_INIT:
-         /* Already handled above. */
-         break;
-     case EXIT_REASON_TRIPLE_FAULT:
--- 
-2.44.0
-
-
-From 00550e808c10c67710ebb8867200eda1fbee332c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 1 Feb 2024 18:00:32 +0100
-Subject: [PATCH 24/70] x86/vmx: Disallow the use of inactivity states
-
-Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
-enter the vCPU.  Luckily for us, nested-virt is explicitly unsupported for
-security bugs.
-
-The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the
-SDM in Vol3 27.7 "Special Features of VM Entry":
-
-  If VM entry ends with the logical processor in an inactive activity state,
-  the VM entry generates any special bus cycle that is normally generated when
-  that activity state is entered from the active state.
-
-Also,
-
-  Some activity states unconditionally block certain events.
-
-I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a
-VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than
-SIPIs.
-
-Both of these activity states are for the TXT ACM to use, not for regular
-hypervisors, and Xen doesn't support dropping the HLT intercept either.
-
-There are two paths in Xen which operate on ACTIVITY_STATE.
-
-1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork.
-
-   As regular VMs can't use any inactivity states, this is just duplicating
-   the 0 from construct_vmcs().  Retain the ability to query activity_state,
-   but crash the domain on any attempt to set an inactivity state.
-
-2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[].
-
-   Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC,
-   and remove ACTIVITY_STATE from vmcs_gstate_field[].
-
-   In virtual_vmentry(), we should trigger a VMEntry failure for the use of
-   any inactivity states, but there's no support for that in the code at all
-   so leave a TODO for when we finally start working on nested-virt in
-   earnest.
-
-Reported-by: Reima Ishii <ishiir@g.ecc.u-tokyo.ac.jp>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
-master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2
-master date: 2024-01-18 20:59:06 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c              | 5 ++++-
- xen/arch/x86/hvm/vmx/vvmx.c             | 9 +++++++--
- xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 +
- 3 files changed, 12 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 964891934b..28dece7c6b 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -1558,7 +1558,10 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v,
- {
-     vmx_vmcs_enter(v);
- 
--    __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
-+    if ( nrs->vmx.activity_state )
-+        domain_crash(v->domain, "Attempt to set %pv activity_state %#lx\n",
-+                     v, nrs->vmx.activity_state);
-+
-     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info);
-     __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
- 
-diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
-index 16b0ef82b6..fd0ae39166 100644
---- a/xen/arch/x86/hvm/vmx/vvmx.c
-+++ b/xen/arch/x86/hvm/vmx/vvmx.c
-@@ -899,7 +899,10 @@ static const u16 vmcs_gstate_field[] = {
-     GUEST_LDTR_AR_BYTES,
-     GUEST_TR_AR_BYTES,
-     GUEST_INTERRUPTIBILITY_INFO,
-+    /*
-+     * ACTIVITY_STATE is handled specially.
-     GUEST_ACTIVITY_STATE,
-+     */
-     GUEST_SYSENTER_CS,
-     GUEST_PREEMPTION_TIMER,
-     /* natural */
-@@ -1200,6 +1203,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs)
-     nvcpu->nv_vmentry_pending = 0;
-     nvcpu->nv_vmswitch_in_progress = 1;
- 
-+    /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */
-+
-     /*
-      * EFER handling:
-      * hvm_set_efer won't work if CR0.PG = 1, so we change the value
-@@ -2316,8 +2321,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
-         data = hvm_cr4_guest_valid_bits(d);
-         break;
-     case MSR_IA32_VMX_MISC:
--        /* Do not support CR3-target feature now */
--        data = host_data & ~VMX_MISC_CR3_TARGET;
-+        /* Do not support CR3-targets or activity states. */
-+        data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK);
-         break;
-     case MSR_IA32_VMX_EPT_VPID_CAP:
-         data = nept_get_ept_vpid_cap();
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-index d07fcb2bc9..8de9977eb3 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-@@ -277,6 +277,7 @@ extern u32 vmx_secondary_exec_control;
- #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
- extern u64 vmx_ept_vpid_cap;
- 
-+#define VMX_MISC_ACTIVITY_MASK                  0x000001c0
- #define VMX_MISC_PROC_TRACE                     0x00004000
- #define VMX_MISC_CR3_TARGET                     0x01ff0000
- #define VMX_MISC_VMWRITE_ALL                    0x20000000
--- 
-2.44.0
-
-
-From 579a622eb41cf4e1ae4d94100985a81eebda23b9 Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Thu, 1 Feb 2024 18:01:27 +0100
-Subject: [PATCH 25/70] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps
- to $(targets)
-
-At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op
-under the hood) results in a crash. This is due to a profiler trying to
-access data in the .init.* sections (libfdt for Arm and libelf for x86)
-that are stripped after boot. Normally, the build system compiles any
-*.init.o file without COV_FLAGS. However, these two libraries are
-handled differently as sections will be renamed to init after linking.
-
-To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were
-added to nocov-y. This worked until e321576f4047 ("xen/build: start using
-if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y.
-This way, even though these objects appear as prerequisites of
-lib{fdt,elf}.o and the settings should propagate to them, make can also
-build them as a prerequisite of __build, in which case COV_FLAGS would
-still have the unwanted flags. Fix it by switching to $(targets) instead.
-
-Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB
-is not set. Otherwise, there is no section renaming and we should be able
-to run the coverage.
-
-Fixes: e321576f4047 ("xen/build: start using if_changed")
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68
-master date: 2024-01-23 12:02:44 +0100
----
- xen/common/libelf/Makefile | 2 +-
- xen/common/libfdt/Makefile | 4 ++--
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile
-index 8a4522e4e1..917d12b006 100644
---- a/xen/common/libelf/Makefile
-+++ b/xen/common/libelf/Makefile
-@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE
- $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE
- 	$(call if_changed,ld)
- 
--extra-y += libelf-temp.o $(libelf-objs)
-+targets += libelf-temp.o $(libelf-objs)
-diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile
-index d50487aa6e..6ce679f98f 100644
---- a/xen/common/libfdt/Makefile
-+++ b/xen/common/libfdt/Makefile
-@@ -5,10 +5,10 @@ SECTIONS := text data $(SPECIAL_DATA_SECTIONS)
- # For CONFIG_OVERLAY_DTB, libfdt functionalities will be needed during runtime.
- ifneq ($(CONFIG_OVERLAY_DTB),y)
- OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s))
-+nocov-y += libfdt.o
- endif
- 
- obj-y += libfdt.o
--nocov-y += libfdt.o
- 
- CFLAGS-y += -I$(srctree)/include/xen/libfdt/
- 
-@@ -18,4 +18,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE
- $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE
- 	$(call if_changed,ld)
- 
--extra-y += libfdt-temp.o $(LIBFDT_OBJS)
-+targets += libfdt-temp.o $(LIBFDT_OBJS)
--- 
-2.44.0
-
-
-From 295ab8060d95ed8c365077946c7faf8793099ef8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 1 Feb 2024 18:01:52 +0100
-Subject: [PATCH 26/70] x86/p2m-pt: fix off by one in entry check assert
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the
-passed end mfn should be the last mfn to be mapped (not last + 1).
-
-Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@cloud.com>
-master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd
-master date: 2024-01-25 16:09:04 +0100
----
- xen/arch/x86/mm/p2m-pt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
-index b2b14746c1..88d3733891 100644
---- a/xen/arch/x86/mm/p2m-pt.c
-+++ b/xen/arch/x86/mm/p2m-pt.c
-@@ -552,7 +552,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
-     if ( new == p2m_mmio_direct )
-         ASSERT(!mfn_eq(mfn, INVALID_MFN) &&
-                !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
--                                        mfn_x(mfn) + (1UL << order)));
-+                                        mfn_x(mfn) + (1UL << order) - 1));
-     else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid ||
-               new == p2m_mmio_dm )
-         ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
--- 
-2.44.0
-
-
-From b1fdd7d0e47e0831ac7a99d0417385fc10d3068c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 1 Feb 2024 18:02:24 +0100
-Subject: [PATCH 27/70] x86/ucode: Fix stability of the raw CPU Policy rescan
-
-Always run microcode_update_helper() on the BSP, so the the updated Raw CPU
-policy doesn't get non-BSP topology details included.
-
-Have calculate_raw_cpu_policy() clear the instantanious XSTATE sizes.  The
-value XCR0 | MSR_XSS had when we scanned the policy isn't terribly interesting
-to report.
-
-When CPUID Masking is active, it affects CPUID instructions issued by Xen
-too.  Transiently disable masking to get a clean scan.
-
-Fixes: 694d79ed5aac ("x86/ucode: Refresh raw CPU policy after microcode load")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: cf7fe8b72deaa94157ddf97d4bb391480205e9c2
-master date: 2024-01-25 17:46:57 +0000
----
- xen/arch/x86/cpu-policy.c         |  7 +++++++
- xen/arch/x86/cpu/microcode/core.c | 20 +++++++++++++++++---
- 2 files changed, 24 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index 81e574390f..bcb17b7ce3 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -353,6 +353,13 @@ void calculate_raw_cpu_policy(void)
-     /* Nothing good will come from Xen and libx86 disagreeing on vendor. */
-     ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor);
- 
-+    /*
-+     * Clear the truly dynamic fields.  These vary with the in-context XCR0
-+     * and MSR_XSS, and aren't interesting fields in the raw policy.
-+     */
-+    p->xstate.raw[0].b = 0;
-+    p->xstate.raw[1].b = 0;
-+
-     /* 0x000000ce  MSR_INTEL_PLATFORM_INFO */
-     /* Was already added by probe_cpuid_faulting() */
- }
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index 65ebeb50de..4e011cdc41 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -680,8 +680,18 @@ static long cf_check microcode_update_helper(void *data)
-         microcode_update_cache(patch);
-         spin_unlock(&microcode_mutex);
- 
--        /* Refresh the raw CPU policy, in case the features have changed. */
-+        /*
-+         * Refresh the raw CPU policy, in case the features have changed.
-+         * Disable CPUID masking if in use, to avoid having current's
-+         * cpu_policy affect the rescan.
-+         */
-+	if ( ctxt_switch_masking )
-+            alternative_vcall(ctxt_switch_masking, NULL);
-+
-         calculate_raw_cpu_policy();
-+
-+	if ( ctxt_switch_masking )
-+            alternative_vcall(ctxt_switch_masking, current);
-     }
-     else
-         microcode_free_patch(patch);
-@@ -721,8 +731,12 @@ int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
-     }
-     buffer->len = len;
- 
--    return continue_hypercall_on_cpu(smp_processor_id(),
--                                     microcode_update_helper, buffer);
-+    /*
-+     * Always queue microcode_update_helper() on CPU0.  Most of the logic
-+     * won't care, but the update of the Raw CPU policy wants to (re)run on
-+     * the BSP.
-+     */
-+    return continue_hypercall_on_cpu(0, microcode_update_helper, buffer);
- }
- 
- static int __init cf_check microcode_init(void)
--- 
-2.44.0
-
-
-From 184d723e7a5d1c021d297e14d19fe5344eac7a56 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= <slack@rabbit.lu>
-Date: Tue, 27 Feb 2024 13:53:42 +0100
-Subject: [PATCH 28/70] tools/xentop: fix sorting bug for some columns
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT.
-Fix by adjusting variables names in compare functions.
-Bug fix only. No functional change.
-
-Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.")
-Signed-off-by: Cyril RÃ©bert (zithro) <slack@rabbit.lu>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a
-master date: 2024-02-05 17:58:23 +0000
----
- tools/xentop/xentop.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c
-index 950e8935c4..545bd5e96d 100644
---- a/tools/xentop/xentop.c
-+++ b/tools/xentop/xentop.c
-@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2)
- 	unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0;
- 
- 	tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo);
--	tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo);
-+	tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo);
- 
- 	return -compare(dom1_vbd_oo, dom2_vbd_oo);
- }
-@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2)
- 	unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0;
- 
- 	tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd);
--	tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd);
-+	tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd);
- 
--	return -compare(dom1_vbd_rd, dom1_vbd_rd);
-+	return -compare(dom1_vbd_rd, dom2_vbd_rd);
- }
- 
- /* Prints number of total VBD READ requests statistic */
-@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2)
- 	unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0;
- 
- 	tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr);
--	tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr);
-+	tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr);
- 
- 	return -compare(dom1_vbd_wr, dom2_vbd_wr);
- }
-@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2)
- 	unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0;
- 
- 	tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect);
--	tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect);
-+	tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect);
- 
- 	return -compare(dom1_vbd_rsect, dom2_vbd_rsect);
- }
--- 
-2.44.0
-
-
-From fa9950a527a70971bf9279be62d445cf9c83aedf Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 13:54:04 +0100
-Subject: [PATCH 29/70] amd-vi: fix IVMD memory type checks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current code that parses the IVMD blocks is relaxed with regard to the
-restriction that such unity regions should always fall into memory ranges
-marked as reserved in the memory map.
-
-However the type checks for the IVMD addresses are inverted, and as a result
-IVMD ranges falling into RAM areas are accepted.  Note that having such ranges
-in the first place is a firmware bug, as IVMD should always fall into reserved
-ranges.
-
-Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved')
-Reported-by: Ox <oxjo@proton.me>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Tested-by: oxjo <oxjo@proton.me>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 83afa313583019d9f159c122cecf867735d27ec5
-master date: 2024-02-06 11:56:13 +0100
----
- xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++---
- 1 file changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c
-index 699d33f429..96d8879e7b 100644
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block)
-                 return -EIO;
-             }
- 
--            /* Types which won't be handed out are considered good enough. */
--            if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
--                           RAM_TYPE_UNUSABLE)) )
-+            /*
-+             * Types which aren't RAM are considered good enough.
-+             * Note that a page being partially RESERVED, ACPI or UNUSABLE will
-+             * force Xen into assuming the whole page as having that type in
-+             * practice.
-+             */
-+            if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
-+                         RAM_TYPE_UNUSABLE) )
-                 continue;
- 
-             AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr);
--- 
-2.44.0
-
-
-From 16475909baa2bcfda3ebc07ced5e5cd0ca8172d6 Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 27 Feb 2024 13:55:03 +0100
-Subject: [PATCH 30/70] block-common: Fix same_vm for no targets
-
-same_vm is broken when the two main domains do not have targets.  otvm
-and targetvm are both missing, which means they get set to -1 and then
-converted to empty strings:
-
-++10697+ local targetvm=-1
-++10697+ local otvm=-1
-++10697+ otvm=
-++10697+ othervm=/vm/cc97bc2f-3a91-43f7-8fbc-4cb92f90b4e4
-++10697+ targetvm=
-++10697+ local frontend_uuid=/vm/844dea4e-44f8-4e3e-8145-325132a31ca5
-
-The final comparison returns true since the two empty strings match:
-
-++10697+ '[' /vm/844dea4e-44f8-4e3e-8145-325132a31ca5 = /vm/cc97bc2f-3a91-43f7-8fbc-4cb92f90b4e4 -o '' = /vm/cc97bc2f-3a91-43f7-8fbc-4cb92f90b4e4 -o /vm/844dea4e-44f8-4e3e-8145-325132a31ca5 = '' -o '' = '' ']'
-
-Replace -1 with distinct strings indicating the lack of a value and
-remove the collescing to empty stings.  The strings themselves will no
-longer match, and that is correct.
-
-++12364+ '[' /vm/844dea4e-44f8-4e3e-8145-325132a31ca5 = /vm/cc97bc2f-3a91-43f7-8fbc-4cb92f90b4e4 -o 'No target' = /vm/cc97bc2f-3a91-43f7-8fbc-4cb92f90b4e4 -o /vm/844dea4e-44f8-4e3e-8145-325132a31ca5 = 'No other target' -o 'No target' = 'No other target' ']'
-
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: e8f1bb803fdf44db708991593568a9e3e6b3d130
-master date: 2024-02-07 13:46:52 +0100
----
- tools/hotplug/Linux/block-common.sh | 8 +++-----
- 1 file changed, 3 insertions(+), 5 deletions(-)
-
-diff --git a/tools/hotplug/Linux/block-common.sh b/tools/hotplug/Linux/block-common.sh
-index f86a88c4eb..5c80237d99 100644
---- a/tools/hotplug/Linux/block-common.sh
-+++ b/tools/hotplug/Linux/block-common.sh
-@@ -112,14 +112,12 @@ same_vm()
-                   "$FRONTEND_UUID")
-   local target=$(xenstore_read_default  "/local/domain/$FRONTEND_ID/target"   \
-                  "-1")
--  local targetvm=$(xenstore_read_default "/local/domain/$target/vm" "-1")
-+  local targetvm=$(xenstore_read_default "/local/domain/$target/vm" "No Target")
-   local otarget=$(xenstore_read_default  "/local/domain/$otherdom/target"   \
-                  "-1")
-   local otvm=$(xenstore_read_default  "/local/domain/$otarget/vm"   \
--                 "-1")
--  otvm=${otvm%-1}
--  othervm=${othervm%-1}
--  targetvm=${targetvm%-1}
-+                 "No Other Target")
-+
-   local frontend_uuid=${FRONTEND_UUID%-1}
-   
-   [ "$frontend_uuid" = "$othervm" -o "$targetvm" = "$othervm" -o \
--- 
-2.44.0
-
-
-From b51fd78aed865033413178f5953147effedc7ce0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Petr=20Bene=C5=A1?= <w1benny@gmail.com>
-Date: Tue, 27 Feb 2024 13:55:25 +0100
-Subject: [PATCH 31/70] x86/hvm: Fix fast singlestep state persistence
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This patch addresses an issue where the fast singlestep setting would persist
-despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF.
-Specifically, if fast singlestep was enabled in a VMI session and that session
-stopped before the MTF trap occurred, the fast singlestep setting remained
-active even though MTF itself was disabled.  This led to a situation where, upon
-starting a new VMI session, the first event to trigger an EPT violation would
-cause the corresponding EPT event callback to be skipped due to the lingering
-fast singlestep setting.
-
-The fix ensures that the fast singlestep setting is properly reset when
-disabling single step debugging operations.
-
-Signed-off-by: Petr BeneÅ¡ <w1benny@gmail.com>
-Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
-master commit: 897def94b56175ce569673a05909d2f223e1e749
-master date: 2024-02-12 09:37:58 +0100
----
- xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++----------
- 1 file changed, 24 insertions(+), 10 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
-index 482eebbabf..a70b351373 100644
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -5167,26 +5167,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
- 
- int hvm_debug_op(struct vcpu *v, int32_t op)
- {
--    int rc;
-+    int rc = 0;
- 
-     switch ( op )
-     {
-         case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
-         case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
--            rc = -EOPNOTSUPP;
-             if ( !cpu_has_monitor_trap_flag )
--                break;
--            rc = 0;
--            vcpu_pause(v);
--            v->arch.hvm.single_step =
--                (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
--            vcpu_unpause(v); /* guest will latch new state */
-+                return -EOPNOTSUPP;
-             break;
-         default:
--            rc = -ENOSYS;
--            break;
-+            return -ENOSYS;
-+    }
-+
-+    vcpu_pause(v);
-+
-+    switch ( op )
-+    {
-+    case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
-+        v->arch.hvm.single_step = true;
-+        break;
-+
-+    case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
-+        v->arch.hvm.single_step = false;
-+        v->arch.hvm.fast_single_step.enabled = false;
-+        v->arch.hvm.fast_single_step.p2midx = 0;
-+        break;
-+
-+    default: /* Excluded above */
-+        ASSERT_UNREACHABLE();
-+        return -ENOSYS;
-     }
- 
-+    vcpu_unpause(v); /* guest will latch new state */
-+
-     return rc;
- }
- 
--- 
-2.44.0
-
-
-From 59e6ad6597dc9930c966b20485a9d0b369ff71a5 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 13:55:56 +0100
-Subject: [PATCH 32/70] x86/HVM: tidy state on hvmemul_map_linear_addr()'s
- error path
-
-While in the vast majority of cases failure of the function will not
-be followed by re-invocation with the same emulation context, a few
-very specific insns - involving multiple independent writes, e.g. ENTER
-and PUSHA - exist where this can happen. Since failure of the function
-only signals to the caller that it ought to try an MMIO write instead,
-such failure also cannot be assumed to result in wholesale failure of
-emulation of the current insn. Instead we have to maintain internal
-state such that another invocation of the function with the same
-emulation context remains possible. To achieve that we need to reset MFN
-slots after putting page references on the error path.
-
-Note that all of this affects debugging code only, in causing an
-assertion to trigger (higher up in the function). There's otherwise no
-misbehavior - such a "leftover" slot would simply be overwritten by new
-contents in a release build.
-
-Also extend the related unmap() assertion, to further check for MFN 0.
-
-Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings")
-Reported-by: Manuel Andreas <manuel.andreas@tum.de>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Paul Durrant <paul@xen.org>
-master commit: e72f951df407bc3be82faac64d8733a270036ba1
-master date: 2024-02-13 09:36:14 +0100
----
- xen/arch/x86/hvm/emulate.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
-index 254716c766..865aa08bbc 100644
---- a/xen/arch/x86/hvm/emulate.c
-+++ b/xen/arch/x86/hvm/emulate.c
-@@ -696,7 +696,12 @@ static void *hvmemul_map_linear_addr(
-  out:
-     /* Drop all held references. */
-     while ( mfn-- > hvmemul_ctxt->mfn )
-+    {
-         put_page(mfn_to_page(*mfn));
-+#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */
-+        *mfn = _mfn(0);
-+#endif
-+    }
- 
-     return err;
- }
-@@ -718,7 +723,7 @@ static void hvmemul_unmap_linear_addr(
- 
-     for ( i = 0; i < nr_frames; i++ )
-     {
--        ASSERT(mfn_valid(*mfn));
-+        ASSERT(mfn_x(*mfn) && mfn_valid(*mfn));
-         paging_mark_dirty(currd, *mfn);
-         put_page(mfn_to_page(*mfn));
- 
--- 
-2.44.0
-
-
-From 006764b871db75d5d025500a079ad246d1d418a1 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 27 Feb 2024 13:56:25 +0100
-Subject: [PATCH 33/70] build: Replace `which` with `command -v`
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The `which` command is not standard, may not exist on the build host,
-or may not behave as expected by the build system. It is recommended
-to use `command -v` to find out if a command exist and have its path,
-and it's part of a POSIX shell standard (at least, it seems to be
-mandatory since IEEE Std 1003.1-2008, but was optional before).
-
-Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter")
-Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet")
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Tested-by: Marek Marczykowski-GÃ³recki <marmarek@invisiblethingslab.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f93629b18b528a5ab1b1092949c5420069c7226c
-master date: 2024-02-19 12:45:48 +0100
----
- xen/Makefile | 4 ++--
- xen/build.mk | 2 +-
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index a92709b43e..59d368e4d8 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -25,8 +25,8 @@ export XEN_BUILD_HOST	:= $(shell hostname)
- endif
- 
- # Best effort attempt to find a python interpreter, defaulting to Python 3 if
--# available.  Fall back to just `python` if `which` is nowhere to be found.
--PYTHON_INTERPRETER	:= $(word 1,$(shell which python3 python python2 2>/dev/null) python)
-+# available.  Fall back to just `python`.
-+PYTHON_INTERPRETER	:= $(word 1,$(shell command -v python3 || command -v python || command -v python2) python)
- export PYTHON		?= $(PYTHON_INTERPRETER)
- 
- export CHECKPOLICY	?= checkpolicy
-diff --git a/xen/build.mk b/xen/build.mk
-index 26dd5a8e87..0f490ca71b 100644
---- a/xen/build.mk
-+++ b/xen/build.mk
-@@ -1,6 +1,6 @@
- quiet_cmd_banner = BANNER  $@
- define cmd_banner
--    if which figlet >/dev/null 2>&1 ; then \
-+    if command -v figlet >/dev/null 2>&1 ; then \
- 	echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \
-     else \
- 	echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \
--- 
-2.44.0
-
-
-From 489c2b9ba173376e978c0ef3de416a2f09452e85 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 27 Feb 2024 13:57:07 +0100
-Subject: [PATCH 34/70] libxl: Disable relocating memory for qemu-xen in
- stubdomain too
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-According to comments (and experiments) qemu-xen cannot handle memory
-reolcation done by hvmloader. The code was already disabled when running
-qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when
-adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to
-be consistent in this regard.
-
-Reported-by: Neowutran <xen@neowutran.ovh>
-Signed-off-by: Marek Marczykowski-GÃ³recki <marmarek@invisiblethingslab.com>
-Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 97883aa269f6745a6ded232be3a855abb1297e0d
-master date: 2024-02-22 11:48:22 +0100
----
- tools/libs/light/libxl_dm.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
-index 14b593110f..ed620a9d8e 100644
---- a/tools/libs/light/libxl_dm.c
-+++ b/tools/libs/light/libxl_dm.c
-@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss)
-                         "%s",
-                         libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios));
-     }
-+    /* Disable relocating memory to make the MMIO hole larger
-+     * unless we're running qemu-traditional and vNUMA is not
-+     * configured. */
-+    libxl__xs_printf(gc, XBT_NULL,
-+                     libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate",
-+                                    libxl__xs_get_dompath(gc, guest_domid)),
-+                     "%d",
-+                     guest_config->b_info.device_model_version
-+                        == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL &&
-+                     !libxl__vnuma_configured(&guest_config->b_info));
-     ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid);
-     if (ret<0) {
-         LOGED(ERROR, guest_domid, "setting target domain %d -> %d",
--- 
-2.44.0
-
-
-From 5fda82641461a5234ab9bf0575423dfb8bfc5657 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 13:57:31 +0100
-Subject: [PATCH 35/70] build: make sure build fails when running kconfig fails
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Because of using "-include", failure to (re)build auto.conf (with
-auto.conf.cmd produced as a secondary target) won't stop make from
-continuing the build. Arrange for it being possible to drop the - from
-Rules.mk, requiring that the include be skipped for tools-only targets.
-Note that relying on the inclusion in those cases wouldn't be correct
-anyway, as it might be a stale file (yet to be rebuilt) which would be
-included, while during initial build, the file would be absent
-altogether.
-
-Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*")
-Reported-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: d34e5fa2e8db19f23081f46a3e710bb122130691
-master date: 2024-02-22 11:52:47 +0100
----
- xen/Makefile | 1 +
- xen/Rules.mk | 4 +++-
- 2 files changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 59d368e4d8..fdf9fd3f22 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep
- # This exploits the 'multi-target pattern rule' trick.
- # The syncconfig should be executed only once to make all the targets.
- include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG)
-+	$(Q)rm -f include/config/auto.conf
- 	$(Q)$(MAKE) $(build)=tools/kconfig syncconfig
- 
- ifeq ($(CONFIG_DEBUG),y)
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index 8af3dd7277..d759cccee3 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src)
- PHONY := __build
- __build:
- 
---include $(objtree)/include/config/auto.conf
-+ifneq ($(firstword $(subst /, ,$(obj))),tools)
-+include $(objtree)/include/config/auto.conf
-+endif
- 
- include $(XEN_ROOT)/Config.mk
- include $(srctree)/scripts/Kbuild.include
--- 
-2.44.0
-
-
-From a751d1321f6e1491d6ec2134d59eefa9f9752b86 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 13:57:50 +0100
-Subject: [PATCH 36/70] x86emul: add missing EVEX.R' checks
-
-EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask
-register. While for mask registers suitable checks are in place (there
-also covering EVEX.R), they were missing for the few cases where in
-EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW
-the bit is replaced before an emulation stub is invoked, for
-VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside
-an emulation stub, in turn raising #UD to the guest, but accompanied by
-log messages indicating something's wrong in Xen nevertheless.
-
-Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns")
-Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a
-master date: 2024-02-22 11:54:07 +0100
----
- xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
-index cf780da501..d6b60f0539 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
-@@ -3686,7 +3686,8 @@ x86_emulate(
-     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
-     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
-     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */
--        generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
-+        generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R ||
-+                               evex.opmsk ||
-                                (ea.type != OP_REG && evex.brs)),
-                               X86_EXC_UD);
-         host_and_vcpu_must_have(avx512f);
-@@ -7295,7 +7296,7 @@ x86_emulate(
-         goto pextr;
- 
-     case X86EMUL_OPC_EVEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
--        generate_exception_if(ea.type != OP_REG, X86_EXC_UD);
-+        generate_exception_if(ea.type != OP_REG || !evex.R, X86_EXC_UD);
-         /* Convert to alternative encoding: We want to use a memory operand. */
-         evex.opcx = ext_0f3a;
-         b = 0x15;
--- 
-2.44.0
-
-
-From 33a0368d3beb82ddb0cf7ed398b047325bb7be1c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 13:58:21 +0100
-Subject: [PATCH 37/70] xen/livepatch: fix norevert test hook setup typo
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of
-LIVEPATCH_REVERT_HOOK().
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36
-master date: 2024-02-26 10:13:46 +0100
----
- xen/test/livepatch/xen_action_hooks_norevert.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
-index 3e21ade6ab..c173855192 100644
---- a/xen/test/livepatch/xen_action_hooks_norevert.c
-+++ b/xen/test/livepatch/xen_action_hooks_norevert.c
-@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
-     printk(KERN_DEBUG "%s: Hook done.\n", __func__);
- }
- 
--LIVEPATCH_APPLY_HOOK(revert_hook);
-+LIVEPATCH_REVERT_HOOK(revert_hook);
- 
- LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook);
- LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook);
--- 
-2.44.0
-
-
-From f6e5ab5fa7257783fdbbaabf6010d8d97656c11f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 13:58:36 +0100
-Subject: [PATCH 38/70] xen/cmdline: fix printf format specifier in
- no_config_param()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-'*' sets the width field, which is the minimum number of characters to output,
-but what we want in no_config_param() is the precision instead, which is '.*'
-as it imposes a maximum limit on the output.
-
-Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf
-master date: 2024-02-26 10:17:40 +0100
----
- xen/include/xen/param.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h
-index 93c3fe7cb7..e02e49635c 100644
---- a/xen/include/xen/param.h
-+++ b/xen/include/xen/param.h
-@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param,
- {
-     int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s);
- 
--    printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n",
-+    printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n",
-            cfg, param, len, s);
- }
- 
--- 
-2.44.0
-
-
-From 19fd9ff9981732995b1028f9e7e406061b723651 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 13:59:05 +0100
-Subject: [PATCH 39/70] x86/altcall: use a union as register type for function
- parameters on clang
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current code for alternative calls uses the caller parameter types as the
-types for the register variables that serve as function parameters:
-
-uint8_t foo;
-[...]
-alternative_call(myfunc, foo);
-
-Would expand roughly into:
-
-register unint8_t a1_ asm("rdi") = foo;
-register unsigned long a2_ asm("rsi");
-[...]
-asm volatile ("call *%c[addr](%%rip)"...);
-
-However with -O2 clang will generate incorrect code, given the following
-example:
-
-unsigned int func(uint8_t t)
-{
-    return t;
-}
-
-static void bar(uint8_t b)
-{
-    int ret_;
-    register uint8_t di asm("rdi") = b;
-    register unsigned long si asm("rsi");
-    register unsigned long dx asm("rdx");
-    register unsigned long cx asm("rcx");
-    register unsigned long r8 asm("r8");
-    register unsigned long r9 asm("r9");
-    register unsigned long r10 asm("r10");
-    register unsigned long r11 asm("r11");
-
-    asm volatile ( "call %c[addr]"
-                   : "+r" (di), "=r" (si), "=r" (dx),
-                     "=r" (cx), "=r" (r8), "=r" (r9),
-                     "=r" (r10), "=r" (r11), "=a" (ret_)
-                   : [addr] "i" (&(func)), "g" (func)
-                   : "memory" );
-}
-
-void foo(unsigned int a)
-{
-    bar(a);
-}
-
-Clang generates the following assembly code:
-
-func:                                   # @func
-        movl    %edi, %eax
-        retq
-foo:                                    # @foo
-        callq   func
-        retq
-
-Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when
-passed into bar() is lost.  clang doesn't zero extend the parameters in the
-callee when required, as the psABI mandates.
-
-The above can be worked around by using a union when defining the register
-variables, so that `di` becomes:
-
-register union {
-    uint8_t e;
-    unsigned long r;
-} di asm("rdi") = { .e = b };
-
-Which results in following code generated for `foo()`:
-
-foo:                                    # @foo
-        movzbl  %dil, %edi
-        callq   func
-        retq
-
-So the truncation is not longer lost.  Apply such workaround only when built
-with clang.
-
-Reported-by: Matthew Grooms <mgrooms@shrew.net>
-Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200
-Link: https://github.com/llvm/llvm-project/issues/12579
-Link: https://github.com/llvm/llvm-project/issues/82598
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e
-master date: 2024-02-26 10:18:01 +0100
----
- xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++
- 1 file changed, 25 insertions(+)
-
-diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
-index a1cd6a9fe5..3c14db5078 100644
---- a/xen/arch/x86/include/asm/alternative.h
-+++ b/xen/arch/x86/include/asm/alternative.h
-@@ -167,9 +167,34 @@ extern void alternative_branches(void);
- #define ALT_CALL_arg5 "r8"
- #define ALT_CALL_arg6 "r9"
- 
-+#ifdef CONFIG_CC_IS_CLANG
-+/*
-+ * Use a union with an unsigned long in order to prevent clang from
-+ * skipping a possible truncation of the value.  By using the union any
-+ * truncation is carried before the call instruction, in turn covering
-+ * for ABI-non-compliance in that the necessary clipping / extension of
-+ * the value is supposed to be carried out in the callee.
-+ *
-+ * Note this behavior is not mandated by the standard, and hence could
-+ * stop being a viable workaround, or worse, could cause a different set
-+ * of code-generation issues in future clang versions.
-+ *
-+ * This has been reported upstream:
-+ * https://github.com/llvm/llvm-project/issues/12579
-+ * https://github.com/llvm/llvm-project/issues/82598
-+ */
-+#define ALT_CALL_ARG(arg, n)                                            \
-+    register union {                                                    \
-+        typeof(arg) e;                                                  \
-+        unsigned long r;                                                \
-+    } a ## n ## _ asm ( ALT_CALL_arg ## n ) = {                         \
-+        .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })   \
-+    }
-+#else
- #define ALT_CALL_ARG(arg, n) \
-     register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \
-         ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })
-+#endif
- #define ALT_CALL_NO_ARG(n) \
-     register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n )
- 
--- 
-2.44.0
-
-
-From 4d47dca20dcfdca2340c8cda6f50dcdcafb1c054 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 13:59:42 +0100
-Subject: [PATCH 40/70] x86/spec: fix BRANCH_HARDEN option to only be set when
- build-enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current logic to handle the BRANCH_HARDEN option will report it as enabled
-even when build-time disabled. Fix this by only allowing the option to be set
-when support for it is built into Xen.
-
-Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3
-master date: 2024-02-26 16:06:42 +0100
----
- xen/arch/x86/spec_ctrl.c | 14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index a8d8af22f6..01ba59cff7 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -50,7 +50,8 @@ static int8_t __initdata opt_psfd = -1;
- int8_t __ro_after_init opt_ibpb_ctxt_switch = -1;
- int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
--static bool __initdata opt_branch_harden = true;
-+static bool __initdata opt_branch_harden =
-+    IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
- 
- bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
-@@ -268,7 +269,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-         else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
-             opt_l1d_flush = val;
-         else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
--            opt_branch_harden = val;
-+        {
-+            if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) )
-+                opt_branch_harden = val;
-+            else
-+            {
-+                no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s,
-+                                ss);
-+                rc = -EINVAL;
-+            }
-+        }
-         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
-             opt_srb_lock = val;
-         else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
--- 
-2.44.0
-
-
-From 58bb8115104c9fca749ee4cfcd3579ac1ed644db Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 14:00:22 +0100
-Subject: [PATCH 41/70] x86: account for shadow stack in exception-from-stub
- recovery
-
-Dealing with exceptions raised from within emulation stubs involves
-discarding return address (replaced by exception related information).
-Such discarding of course also requires removing the corresponding entry
-from the shadow stack.
-
-Also amend the comment in fixup_exception_return(), to further clarify
-why use of ptr[1] can't be an out-of-bounds access.
-
-While touching do_invalid_op() also add a missing fall-through
-annotation.
-
-This is CVE-2023-46841 / XSA-451.
-
-Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 91f5f7a9154919a765c3933521760acffeddbf28
-master date: 2024-02-27 13:49:22 +0100
----
- xen/arch/x86/extable.c             | 20 ++++++----
- xen/arch/x86/include/asm/uaccess.h |  3 +-
- xen/arch/x86/traps.c               | 62 +++++++++++++++++++++++++++---
- 3 files changed, 71 insertions(+), 14 deletions(-)
-
-diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c
-index 74b14246e9..8ffcd346d7 100644
---- a/xen/arch/x86/extable.c
-+++ b/xen/arch/x86/extable.c
-@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first,
- }
- 
- unsigned long
--search_exception_table(const struct cpu_user_regs *regs)
-+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
- {
-     const struct virtual_region *region = find_text_region(regs->rip);
-     unsigned long stub = this_cpu(stubs.addr);
- 
-     if ( region && region->ex )
-+    {
-+        *stub_ra = 0;
-         return search_one_extable(region->ex, region->ex_end, regs->rip);
-+    }
- 
-     if ( regs->rip >= stub + STUB_BUF_SIZE / 2 &&
-          regs->rip < stub + STUB_BUF_SIZE &&
-          regs->rsp > (unsigned long)regs &&
-          regs->rsp < (unsigned long)get_cpu_info() )
-     {
--        unsigned long retptr = *(unsigned long *)regs->rsp;
-+        unsigned long retaddr = *(unsigned long *)regs->rsp, fixup;
- 
--        region = find_text_region(retptr);
--        retptr = region && region->ex
--                 ? search_one_extable(region->ex, region->ex_end, retptr)
--                 : 0;
--        if ( retptr )
-+        region = find_text_region(retaddr);
-+        fixup = region && region->ex
-+                ? search_one_extable(region->ex, region->ex_end, retaddr)
-+                : 0;
-+        if ( fixup )
-         {
-             /*
-              * Put trap number and error code on the stack (in place of the
-@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs)
-             };
- 
-             *(unsigned long *)regs->rsp = token.raw;
--            return retptr;
-+            *stub_ra = retaddr;
-+            return fixup;
-         }
-     }
- 
-diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h
-index 684fccd95c..74bb222c03 100644
---- a/xen/arch/x86/include/asm/uaccess.h
-+++ b/xen/arch/x86/include/asm/uaccess.h
-@@ -421,7 +421,8 @@ union stub_exception_token {
-     unsigned long raw;
- };
- 
--extern unsigned long search_exception_table(const struct cpu_user_regs *regs);
-+extern unsigned long search_exception_table(const struct cpu_user_regs *regs,
-+                                            unsigned long *stub_ra);
- extern void sort_exception_tables(void);
- extern void sort_exception_table(struct exception_table_entry *start,
-                                  const struct exception_table_entry *stop);
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index e1356f696a..45e1b277ea 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -845,7 +845,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs)
- }
- 
- static void fixup_exception_return(struct cpu_user_regs *regs,
--                                   unsigned long fixup)
-+                                   unsigned long fixup, unsigned long stub_ra)
- {
-     if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
-     {
-@@ -862,7 +862,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
-             /*
-              * Search for %rip.  The shstk currently looks like this:
-              *
--             *   ...  [Likely pointed to by SSP]
-+             *   tok  [Supervisor token, == &tok | BUSY, only with FRED inactive]
-+             *   ...  [Pointed to by SSP for most exceptions, empty in IST cases]
-              *   %cs  [== regs->cs]
-              *   %rip [== regs->rip]
-              *   SSP  [Likely points to 3 slots higher, above %cs]
-@@ -880,7 +881,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
-              */
-             if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
-             {
-+                unsigned long primary_shstk =
-+                    (ssp & ~(STACK_SIZE - 1)) +
-+                    (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
-+
-                 wrss(fixup, ptr);
-+
-+                if ( !stub_ra )
-+                    goto shstk_done;
-+
-+                /*
-+                 * Stub recovery ought to happen only when the outer context
-+                 * was on the main shadow stack.  We need to also "pop" the
-+                 * stub's return address from the interrupted context's shadow
-+                 * stack.  That is,
-+                 * - if we're still on the main stack, we need to move the
-+                 *   entire stack (up to and including the exception frame)
-+                 *   up by one slot, incrementing the original SSP in the
-+                 *   exception frame,
-+                 * - if we're on an IST stack, we need to increment the
-+                 *   original SSP.
-+                 */
-+                BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
-+
-+                if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
-+                {
-+                    /*
-+                     * We're on an IST stack.  First make sure the two return
-+                     * addresses actually match.  Then increment the interrupted
-+                     * context's SSP.
-+                     */
-+                    BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
-+                    wrss(ptr[-1] + 8, &ptr[-1]);
-+                    goto shstk_done;
-+                }
-+
-+                /* Make sure the two return addresses actually match. */
-+                BUG_ON(stub_ra != ptr[2]);
-+
-+                /* Move exception frame, updating SSP there. */
-+                wrss(ptr[1], &ptr[2]); /* %cs */
-+                wrss(ptr[0], &ptr[1]); /* %rip */
-+                wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
-+
-+                /* Move all newer entries. */
-+                while ( --ptr != _p(ssp) )
-+                    wrss(ptr[-1], &ptr[0]);
-+
-+                /* Finally account for our own stack having shifted up. */
-+                asm volatile ( "incsspd %0" :: "r" (2) );
-+
-                 goto shstk_done;
-             }
-         }
-@@ -901,7 +951,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
- 
- static bool extable_fixup(struct cpu_user_regs *regs, bool print)
- {
--    unsigned long fixup = search_exception_table(regs);
-+    unsigned long stub_ra = 0;
-+    unsigned long fixup = search_exception_table(regs, &stub_ra);
- 
-     if ( unlikely(fixup == 0) )
-         return false;
-@@ -915,7 +966,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print)
-                vector_name(regs->entry_vector), regs->error_code,
-                _p(regs->rip), _p(regs->rip), _p(fixup));
- 
--    fixup_exception_return(regs, fixup);
-+    fixup_exception_return(regs, fixup, stub_ra);
-     this_cpu(last_extable_addr) = regs->rip;
- 
-     return true;
-@@ -1183,7 +1234,8 @@ void do_invalid_op(struct cpu_user_regs *regs)
-     {
-     case BUGFRAME_run_fn:
-     case BUGFRAME_warn:
--        fixup_exception_return(regs, (unsigned long)eip);
-+        fixup_exception_return(regs, (unsigned long)eip, 0);
-+        fallthrough;
-     case BUGFRAME_bug:
-     case BUGFRAME_assert:
-         return;
--- 
-2.44.0
-
-
-From 498b3624d0ecc1267773e6482fd0b732e90c4511 Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Thu, 8 Feb 2024 11:43:39 +0100
-Subject: [PATCH 42/70] xen/arm: Fix UBSAN failure in start_xen()
-
-When running Xen on arm32, in scenario where Xen is loaded at an address
-such as boot_phys_offset >= 2GB, UBSAN reports the following:
-
-(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58
-(XEN) pointer operation underflowed 00200000 to 86800000
-(XEN) Xen WARN at common/ubsan/ubsan.c:172
-(XEN) ----[ Xen-4.19-unstable  arm32  debug=y ubsan=y  Not tainted ]----
-...
-(XEN) Xen call trace:
-(XEN)    [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC)
-(XEN)    [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR)
-(XEN)    [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4
-(XEN)    [<004d15a8>] start_xen+0xe0/0xbe0
-(XEN)    [<0020007c>] head.o#primary_switched+0x4/0x30
-
-The failure is reported for the following line:
-(paddr_t)(uintptr_t)(_start + boot_phys_offset)
-
-This occurs because the compiler treats (ptr + size) with size bigger than
-PTRDIFF_MAX as undefined behavior. To address this, switch to macro
-virt_to_maddr(), given the future plans to eliminate boot_phys_offset.
-
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>
-Tested-by: Luca Fancellu <luca.fancellu@arm.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169)
----
- xen/arch/arm/setup.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
-index db748839d3..2ccdde5277 100644
---- a/xen/arch/arm/setup.c
-+++ b/xen/arch/arm/setup.c
-@@ -1109,7 +1109,7 @@ void __init start_xen(unsigned long boot_phys_offset,
- 
-     /* Register Xen's load address as a boot module. */
-     xen_bootmodule = add_boot_module(BOOTMOD_XEN,
--                             (paddr_t)(uintptr_t)(_start + boot_phys_offset),
-+                             virt_to_maddr(_start),
-                              (paddr_t)(uintptr_t)(_end - _start), false);
-     BUG_ON(!xen_bootmodule);
- 
--- 
-2.44.0
-
-
-From 3e383bb4137c6ca3058cd55cb867ecc2b7414499 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 5 Mar 2024 11:48:39 +0100
-Subject: [PATCH 43/70] x86/HVM: hide SVM/VMX when their enabling is prohibited
- by firmware
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-... or we fail to enable the functionality on the BSP for other reasons.
-The only place where hardware announcing the feature is recorded is the
-raw CPU policy/featureset.
-
-Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c
-master date: 2024-01-09 14:06:34 +0100
----
- xen/arch/x86/hvm/svm/svm.c  |  1 +
- xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++
- 2 files changed, 18 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index 24c417ca71..ff991c82cf 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -2543,6 +2543,7 @@ const struct hvm_function_table * __init start_svm(void)
- 
-     if ( _svm_cpu_up(true) )
-     {
-+        setup_clear_cpu_cap(X86_FEATURE_SVM);
-         printk("SVM: failed to initialise.\n");
-         return NULL;
-     }
-diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
-index 13719cc923..e382aa16c5 100644
---- a/xen/arch/x86/hvm/vmx/vmcs.c
-+++ b/xen/arch/x86/hvm/vmx/vmcs.c
-@@ -2165,6 +2165,23 @@ int __init vmx_vmcs_init(void)
- 
-     if ( !ret )
-         register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
-+    else
-+    {
-+        setup_clear_cpu_cap(X86_FEATURE_VMX);
-+
-+        /*
-+         * _vmx_vcpu_up() may have made it past feature identification.
-+         * Make sure all dependent features are off as well.
-+         */
-+        vmx_basic_msr              = 0;
-+        vmx_pin_based_exec_control = 0;
-+        vmx_cpu_based_exec_control = 0;
-+        vmx_secondary_exec_control = 0;
-+        vmx_vmexit_control         = 0;
-+        vmx_vmentry_control        = 0;
-+        vmx_ept_vpid_cap           = 0;
-+        vmx_vmfunc                 = 0;
-+    }
- 
-     return ret;
- }
--- 
-2.44.0
-
-
-From 57f137053652d5a981ae21f3abe7becc507fe434 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 11:49:22 +0100
-Subject: [PATCH 44/70] xen/sched: Fix UB shift in compat_set_timer_op()
-
-Tamas reported this UBSAN failure from fuzzing:
-
-  (XEN) ================================================================================
-  (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37
-  (XEN) left shift of negative value -2147425536
-  (XEN) ----[ Xen-4.19-unstable  x86_64  debug=y ubsan=y  Not tainted ]----
-  ...
-  (XEN) Xen call trace:
-  (XEN)    [<ffff82d040307c1c>] R ubsan.c#ubsan_epilogue+0xa/0xd9
-  (XEN)    [<ffff82d040308afb>] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5
-  (XEN)    [<ffff82d040307758>] F compat_set_timer_op+0x41/0x43
-  (XEN)    [<ffff82d04040e4cc>] F hvm_do_multicall_call+0x77f/0xa75
-  (XEN)    [<ffff82d040519462>] F arch_do_multicall_call+0xec/0xf1
-  (XEN)    [<ffff82d040261567>] F do_multicall+0x1dc/0xde3
-  (XEN)    [<ffff82d04040d2b3>] F hvm_hypercall+0xa00/0x149a
-  (XEN)    [<ffff82d0403cd072>] F vmx_vmexit_handler+0x1596/0x279c
-  (XEN)    [<ffff82d0403d909b>] F vmx_asm_vmexit_handler+0xdb/0x200
-
-Left-shifting any negative value is strictly undefined behaviour in C, and
-the two parameters here come straight from the guest.
-
-The fuzzer happened to choose lo 0xf, hi 0x8000e300.
-
-Switch everything to be unsigned values, making the shift well defined.
-
-As GCC documents:
-
-  As an extension to the C language, GCC does not use the latitude given in
-  C99 and C11 only to treat certain aspects of signed '<<' as undefined.
-  However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such
-  cases.
-
-this was deemed not to need an XSA.
-
-Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t
-parameter is also well defined.  C makes it implementation defined, and GCC
-defines it as reduction modulo 2^N to be within range of the new type.
-
-Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.")
-Reported-by: Tamas K Lengyel <tamas@tklengyel.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb
-master date: 2024-02-01 19:52:44 +0000
----
- xen/common/sched/compat.c    | 4 ++--
- xen/include/hypercall-defs.c | 2 +-
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c
-index d718e450d4..dd97593630 100644
---- a/xen/common/sched/compat.c
-+++ b/xen/common/sched/compat.c
-@@ -43,9 +43,9 @@ static int compat_poll(struct compat_sched_poll *compat)
- 
- #include "core.c"
- 
--int compat_set_timer_op(uint32_t lo, int32_t hi)
-+int compat_set_timer_op(uint32_t lo, uint32_t hi)
- {
--    return do_set_timer_op(((s64)hi << 32) | lo);
-+    return do_set_timer_op(((uint64_t)hi << 32) | lo);
- }
- 
- #endif /* __COMMON_SCHED_COMPAT_C__ */
-diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c
-index 6d361ddfce..47c093acc8 100644
---- a/xen/include/hypercall-defs.c
-+++ b/xen/include/hypercall-defs.c
-@@ -134,7 +134,7 @@ xenoprof_op(int op, void *arg)
- 
- #ifdef CONFIG_COMPAT
- prefix: compat
--set_timer_op(uint32_t lo, int32_t hi)
-+set_timer_op(uint32_t lo, uint32_t hi)
- multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls)
- memory_op(unsigned int cmd, void *arg)
- #ifdef CONFIG_IOREQ_SERVER
--- 
-2.44.0
-
-
-From b7f9168878155e2d29b9b4a3048b0a9a68ed82ed Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:50:16 +0100
-Subject: [PATCH 45/70] x86/spec: print the built-in SPECULATIVE_HARDEN_*
- options
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Just like it's done for INDIRECT_THUNK and SHADOW_PAGING.
-
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54
-master date: 2024-02-27 14:57:52 +0100
----
- xen/arch/x86/spec_ctrl.c | 14 +++++++++++++-
- 1 file changed, 13 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 01ba59cff7..04e508b622 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -476,13 +476,25 @@ static void __init print_details(enum ind_thunk thunk)
-            (e21a & cpufeat_mask(X86_FEATURE_SBPB))           ? " SBPB"           : "");
- 
-     /* Compiled-in support which pertains to mitigations. */
--    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
-+    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
-+         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
-+         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
-+         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
-         printk("  Compiled-in support:"
- #ifdef CONFIG_INDIRECT_THUNK
-                " INDIRECT_THUNK"
- #endif
- #ifdef CONFIG_SHADOW_PAGING
-                " SHADOW_PAGING"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY
-+               " HARDEN_ARRAY"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-+               " HARDEN_BRANCH"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
-+               " HARDEN_GUEST_ACCESS"
- #endif
-                "\n");
- 
--- 
-2.44.0
-
-
-From 09b9db0413b1f31f27bece07b2bfa1723b89ace6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:50:53 +0100
-Subject: [PATCH 46/70] x86/spec: fix INDIRECT_THUNK option to only be set when
- build-enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Attempt to provide a more helpful error message when the user attempts to set
-spec-ctrl=bti-thunk option but the support is build-time disabled.
-
-While there also adjust the command line documentation to mention
-CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 8441fa806a3b778867867cd0159fa1722e90397e
-master date: 2024-02-27 14:58:20 +0100
----
- docs/misc/xen-command-line.pandoc | 10 +++++-----
- xen/arch/x86/spec_ctrl.c          |  7 ++++++-
- 2 files changed, 11 insertions(+), 6 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 8e65f8bd18..582d6741d1 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2424,11 +2424,11 @@ guests to use.
-   performance reasons dom0 is unprotected by default.  If it is necessary to
-   protect dom0 too, boot with `spec-ctrl=ibpb-entry`.
- 
--If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to
--select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
--locations.  The default thunk is `retpoline` (generally preferred), with the
--alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and
--`lfence` (an `lfence; jmp *%reg` gadget).
-+If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be
-+used to select which of the thunks gets patched into the
-+`__x86_indirect_thunk_%reg` locations.  The default thunk is `retpoline`
-+(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget,
-+minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget).
- 
- On hardware supporting IBRS (Indirect Branch Restricted Speculation), the
- `ibrs=` option can be used to force or prevent Xen using the feature itself.
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 04e508b622..99ecfb3cba 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -241,7 +241,12 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-         {
-             s += 10;
- 
--            if ( !cmdline_strcmp(s, "retpoline") )
-+            if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
-+            {
-+                no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss);
-+                rc = -EINVAL;
-+            }
-+            else if ( !cmdline_strcmp(s, "retpoline") )
-                 opt_thunk = THUNK_RETPOLINE;
-             else if ( !cmdline_strcmp(s, "lfence") )
-                 opt_thunk = THUNK_LFENCE;
--- 
-2.44.0
-
-
-From 7404c25efdc70091817479b80dbbd945e6ab4861 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:51:56 +0100
-Subject: [PATCH 47/70] x86/spec: do not print thunk option selection if not
- built-in
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Since the thunk built-in enable is printed as part of the "Compiled-in
-support:" line, avoid printing anything in "Xen settings:" if the thunk is
-disabled at build time.
-
-Note the BTI-Thunk option printing is also adjusted to print a colon in the
-same way the other options on the line do.
-
-Requested-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 576528a2a742069af203e90c613c5c93e23c9755
-master date: 2024-02-27 14:58:40 +0100
----
- xen/arch/x86/spec_ctrl.c | 11 ++++++-----
- 1 file changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 99ecfb3cba..a965b6db28 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -504,11 +504,12 @@ static void __init print_details(enum ind_thunk thunk)
-                "\n");
- 
-     /* Settings for Xen's protection, irrespective of guests. */
--    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
--           thunk == THUNK_NONE      ? "N/A" :
--           thunk == THUNK_RETPOLINE ? "RETPOLINE" :
--           thunk == THUNK_LFENCE    ? "LFENCE" :
--           thunk == THUNK_JMP       ? "JMP" : "?",
-+    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
-+           thunk != THUNK_NONE      ? "BTI-Thunk: " : "",
-+           thunk == THUNK_NONE      ? "" :
-+           thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
-+           thunk == THUNK_LFENCE    ? "LFENCE, " :
-+           thunk == THUNK_JMP       ? "JMP, " : "?, ",
-            (!boot_cpu_has(X86_FEATURE_IBRSB) &&
-             !boot_cpu_has(X86_FEATURE_IBRS))         ? "No" :
-            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ? "IBRS+" :  "IBRS-",
--- 
-2.44.0
-
-
-From 5382a6a79cb544f2eecc47330b531802f8c52977 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:52:57 +0100
-Subject: [PATCH 48/70] xen/livepatch: register livepatch regions when loaded
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Currently livepatch regions are registered as virtual regions only after the
-livepatch has been applied.
-
-This can lead to issues when using the pre-apply or post-revert hooks, as at
-that point the livepatch is not in the virtual regions list.  If a livepatch
-pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
-code to handle the bug frame won't be able to find the instruction pointer that
-triggered the #UD in any of the registered virtual regions, and hence crash.
-
-Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
-and only remove them once the payload is unloaded.  This requires some changes
-to the virtual regions code, as the removal of the virtual regions is no longer
-done in stop machine context, and hence an RCU barrier is added in order to
-make sure there are no users of the virtual region after it's been removed from
-the list.
-
-Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c      |  4 ++--
- xen/common/virtual_region.c | 44 ++++++++++++++-----------------------
- 2 files changed, 19 insertions(+), 29 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index e635606c10..e1964b841a 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -1071,6 +1071,7 @@ static int build_symbol_table(struct payload *payload,
- static void free_payload(struct payload *data)
- {
-     ASSERT(spin_is_locked(&payload_lock));
-+    unregister_virtual_region(&data->region);
-     list_del(&data->list);
-     payload_cnt--;
-     payload_version++;
-@@ -1170,6 +1171,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
-         INIT_LIST_HEAD(&data->list);
-         INIT_LIST_HEAD(&data->applied_list);
- 
-+        register_virtual_region(&data->region);
-         list_add_tail(&data->list, &payload_list);
-         payload_cnt++;
-         payload_version++;
-@@ -1386,7 +1388,6 @@ static inline void apply_payload_tail(struct payload *data)
-      * The applied_list is iterated by the trap code.
-      */
-     list_add_tail_rcu(&data->applied_list, &applied_list);
--    register_virtual_region(&data->region);
- 
-     data->state = LIVEPATCH_STATE_APPLIED;
- }
-@@ -1432,7 +1433,6 @@ static inline void revert_payload_tail(struct payload *data)
-      * The applied_list is iterated by the trap code.
-      */
-     list_del_rcu(&data->applied_list);
--    unregister_virtual_region(&data->region);
- 
-     data->reverted = true;
-     data->state = LIVEPATCH_STATE_CHECKED;
-diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
-index 5f89703f51..9f12c30efe 100644
---- a/xen/common/virtual_region.c
-+++ b/xen/common/virtual_region.c
-@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
- };
- 
- /*
-- * RCU locking. Additions are done either at startup (when there is only
-- * one CPU) or when all CPUs are running without IRQs.
-- *
-- * Deletions are bit tricky. We do it when Live Patch (all CPUs running
-- * without IRQs) or during bootup (when clearing the init).
-- *
-- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
-- * on deletion.
-+ * RCU locking. Modifications to the list must be done in exclusive mode, and
-+ * hence need to hold the spinlock.
-  *
-  * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
-  */
-@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr)
- 
- void register_virtual_region(struct virtual_region *r)
- {
--    ASSERT(!local_irq_is_enabled());
-+    unsigned long flags;
- 
-+    spin_lock_irqsave(&virtual_region_lock, flags);
-     list_add_tail_rcu(&r->list, &virtual_region_list);
-+    spin_unlock_irqrestore(&virtual_region_lock, flags);
- }
- 
--static void remove_virtual_region(struct virtual_region *r)
-+/*
-+ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left
-+ * unreachable after init code is removed.
-+ */
-+static void inline remove_virtual_region(struct virtual_region *r)
- {
-     unsigned long flags;
- 
-     spin_lock_irqsave(&virtual_region_lock, flags);
-     list_del_rcu(&r->list);
-     spin_unlock_irqrestore(&virtual_region_lock, flags);
--    /*
--     * We do not need to invoke call_rcu.
--     *
--     * This is due to the fact that on the deletion we have made sure
--     * to use spinlocks (to guard against somebody else calling
--     * unregister_virtual_region) and list_deletion spiced with
--     * memory barrier.
--     *
--     * That protects us from corrupting the list as the readers all
--     * use list_for_each_entry_rcu which is safe against concurrent
--     * deletions.
--     */
- }
- 
-+#ifdef CONFIG_LIVEPATCH
- void unregister_virtual_region(struct virtual_region *r)
- {
--    /* Expected to be called from Live Patch - which has IRQs disabled. */
--    ASSERT(!local_irq_is_enabled());
--
-     remove_virtual_region(r);
-+
-+    /* Assert that no CPU might be using the removed region. */
-+    rcu_barrier();
- }
- 
--#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)
-+#ifdef CONFIG_X86
- void relax_virtual_region_perms(void)
- {
-     const struct virtual_region *region;
-@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void)
-                                  PAGE_HYPERVISOR_RX);
-     rcu_read_unlock(&rcu_virtual_region_lock);
- }
--#endif
-+#endif /* CONFIG_X86 */
-+#endif /* CONFIG_LIVEPATCH */
- 
- void __init unregister_init_virtual_region(void)
- {
--- 
-2.44.0
-
-
-From 50a8f74df76b7ce7c35ad97a539f505eb0a9baa6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:53:05 +0100
-Subject: [PATCH 49/70] xen/livepatch: search for symbols in all loaded
- payloads
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-When checking if an address belongs to a patch, or when resolving a symbol,
-take into account all loaded livepatch payloads, even if not applied.
-
-This is required in order for the pre-apply and post-revert hooks to work
-properly, or else Xen won't detect the instruction pointer belonging to those
-hooks as being part of the currently active text.
-
-Move the RCU handling to be used for payload_list instead of applied_list, as
-now the calls from trap code will iterate over the payload_list.
-
-Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c | 49 +++++++++++++++---------------------------
- 1 file changed, 17 insertions(+), 32 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index e1964b841a..135c47e9b8 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -36,13 +36,14 @@
-  * caller in schedule_work.
-  */
- static DEFINE_SPINLOCK(payload_lock);
--static LIST_HEAD(payload_list);
--
- /*
-- * Patches which have been applied. Need RCU in case we crash (and then
-- * traps code would iterate via applied_list) when adding entries on the list.
-+ * Need RCU in case we crash (and then traps code would iterate via
-+ * payload_list) when adding entries on the list.
-  */
--static DEFINE_RCU_READ_LOCK(rcu_applied_lock);
-+static DEFINE_RCU_READ_LOCK(rcu_payload_lock);
-+static LIST_HEAD(payload_list);
-+
-+/* Patches which have been applied. Only modified from stop machine context. */
- static LIST_HEAD(applied_list);
- 
- static unsigned int payload_cnt;
-@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr)
-     const struct payload *data;
-     bool_t r = 0;
- 
--    /*
--     * Only RCU locking since this list is only ever changed during apply
--     * or revert context. And in case it dies there we need an safe list.
--     */
--    rcu_read_lock(&rcu_applied_lock);
--    list_for_each_entry_rcu ( data, &applied_list, applied_list )
-+    rcu_read_lock(&rcu_payload_lock);
-+    list_for_each_entry_rcu ( data, &payload_list, list )
-     {
-         if ( (ptr >= data->rw_addr &&
-               ptr < (data->rw_addr + data->rw_size)) ||
-@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr)
-         }
- 
-     }
--    rcu_read_unlock(&rcu_applied_lock);
-+    rcu_read_unlock(&rcu_payload_lock);
- 
-     return r;
- }
-@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup(
-     const void *va = (const void *)addr;
-     const char *n = NULL;
- 
--    /*
--     * Only RCU locking since this list is only ever changed during apply
--     * or revert context. And in case it dies there we need an safe list.
--     */
--    rcu_read_lock(&rcu_applied_lock);
--    list_for_each_entry_rcu ( data, &applied_list, applied_list )
-+    rcu_read_lock(&rcu_payload_lock);
-+    list_for_each_entry_rcu ( data, &payload_list, list )
-     {
-         if ( va < data->text_addr ||
-              va >= (data->text_addr + data->text_size) )
-@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup(
-         n = data->symtab[best].name;
-         break;
-     }
--    rcu_read_unlock(&rcu_applied_lock);
-+    rcu_read_unlock(&rcu_payload_lock);
- 
-     return n;
- }
-@@ -1072,7 +1065,8 @@ static void free_payload(struct payload *data)
- {
-     ASSERT(spin_is_locked(&payload_lock));
-     unregister_virtual_region(&data->region);
--    list_del(&data->list);
-+    list_del_rcu(&data->list);
-+    rcu_barrier();
-     payload_cnt--;
-     payload_version++;
-     free_payload_data(data);
-@@ -1172,7 +1166,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
-         INIT_LIST_HEAD(&data->applied_list);
- 
-         register_virtual_region(&data->region);
--        list_add_tail(&data->list, &payload_list);
-+        list_add_tail_rcu(&data->list, &payload_list);
-         payload_cnt++;
-         payload_version++;
-     }
-@@ -1383,11 +1377,7 @@ static int apply_payload(struct payload *data)
- 
- static inline void apply_payload_tail(struct payload *data)
- {
--    /*
--     * We need RCU variant (which has barriers) in case we crash here.
--     * The applied_list is iterated by the trap code.
--     */
--    list_add_tail_rcu(&data->applied_list, &applied_list);
-+    list_add_tail(&data->applied_list, &applied_list);
- 
-     data->state = LIVEPATCH_STATE_APPLIED;
- }
-@@ -1427,12 +1417,7 @@ static int revert_payload(struct payload *data)
- 
- static inline void revert_payload_tail(struct payload *data)
- {
--
--    /*
--     * We need RCU variant (which has barriers) in case we crash here.
--     * The applied_list is iterated by the trap code.
--     */
--    list_del_rcu(&data->applied_list);
-+    list_del(&data->applied_list);
- 
-     data->reverted = true;
-     data->state = LIVEPATCH_STATE_CHECKED;
--- 
-2.44.0
-
-
-From d81bfc7ff887426727504086fa363f91bf8c19f8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:53:13 +0100
-Subject: [PATCH 50/70] xen/livepatch: fix norevert test attempt to open-code
- revert
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The purpose of the norevert test is to install a dummy handler that replaces
-the internal Xen revert code, and then perform the revert in the post-revert
-hook.  For that purpose the usage of the previous common_livepatch_revert() is
-not enough, as that just reverts specific functions, but not the whole state of
-the payload.
-
-Remove both common_livepatch_{apply,revert}() and instead expose
-revert_payload{,_tail}() in order to perform the patch revert from the
-post-revert hook.
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c                        | 41 +++++++++++++++++--
- xen/include/xen/livepatch.h                   | 32 ++-------------
- .../livepatch/xen_action_hooks_norevert.c     | 22 +++-------
- 3 files changed, 46 insertions(+), 49 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index 135c47e9b8..0cc048fd83 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -1366,7 +1366,22 @@ static int apply_payload(struct payload *data)
-     ASSERT(!local_irq_is_enabled());
- 
-     for ( i = 0; i < data->nfuncs; i++ )
--        common_livepatch_apply(&data->funcs[i], &data->fstate[i]);
-+    {
-+        const struct livepatch_func *func = &data->funcs[i];
-+        struct livepatch_fstate *state = &data->fstate[i];
-+
-+        /* If the action has been already executed on this function, do nothing. */
-+        if ( state->applied == LIVEPATCH_FUNC_APPLIED )
-+        {
-+            printk(XENLOG_WARNING LIVEPATCH
-+                   "%s: %s has been already applied before\n",
-+                   __func__, func->name);
-+            continue;
-+        }
-+
-+        arch_livepatch_apply(func, state);
-+        state->applied = LIVEPATCH_FUNC_APPLIED;
-+    }
- 
-     arch_livepatch_revive();
- 
-@@ -1382,7 +1397,7 @@ static inline void apply_payload_tail(struct payload *data)
-     data->state = LIVEPATCH_STATE_APPLIED;
- }
- 
--static int revert_payload(struct payload *data)
-+int revert_payload(struct payload *data)
- {
-     unsigned int i;
-     int rc;
-@@ -1397,7 +1412,25 @@ static int revert_payload(struct payload *data)
-     }
- 
-     for ( i = 0; i < data->nfuncs; i++ )
--        common_livepatch_revert(&data->funcs[i], &data->fstate[i]);
-+    {
-+        const struct livepatch_func *func = &data->funcs[i];
-+        struct livepatch_fstate *state = &data->fstate[i];
-+
-+        /*
-+         * If the apply action hasn't been executed on this function, do
-+         * nothing.
-+         */
-+        if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
-+        {
-+            printk(XENLOG_WARNING LIVEPATCH
-+                   "%s: %s has not been applied before\n",
-+                   __func__, func->name);
-+            continue;
-+        }
-+
-+        arch_livepatch_revert(func, state);
-+        state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
-+    }
- 
-     /*
-      * Since we are running with IRQs disabled and the hooks may call common
-@@ -1415,7 +1448,7 @@ static int revert_payload(struct payload *data)
-     return 0;
- }
- 
--static inline void revert_payload_tail(struct payload *data)
-+void revert_payload_tail(struct payload *data)
- {
-     list_del(&data->applied_list);
- 
-diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
-index 537d3d58b6..c9ee58fd37 100644
---- a/xen/include/xen/livepatch.h
-+++ b/xen/include/xen/livepatch.h
-@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void);
- void arch_livepatch_mask(void);
- void arch_livepatch_unmask(void);
- 
--static inline void common_livepatch_apply(const struct livepatch_func *func,
--                                          struct livepatch_fstate *state)
--{
--    /* If the action has been already executed on this function, do nothing. */
--    if ( state->applied == LIVEPATCH_FUNC_APPLIED )
--    {
--        printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n",
--                __func__, func->name);
--        return;
--    }
--
--    arch_livepatch_apply(func, state);
--    state->applied = LIVEPATCH_FUNC_APPLIED;
--}
-+/* Only for testing purposes. */
-+struct payload;
-+int revert_payload(struct payload *data);
-+void revert_payload_tail(struct payload *data);
- 
--static inline void common_livepatch_revert(const struct livepatch_func *func,
--                                           struct livepatch_fstate *state)
--{
--    /* If the apply action hasn't been executed on this function, do nothing. */
--    if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
--    {
--        printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n",
--                __func__, func->name);
--        return;
--    }
--
--    arch_livepatch_revert(func, state);
--    state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
--}
- #else
- 
- /*
-diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
-index c173855192..c5fbab1746 100644
---- a/xen/test/livepatch/xen_action_hooks_norevert.c
-+++ b/xen/test/livepatch/xen_action_hooks_norevert.c
-@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload)
- 
- static void post_revert_hook(livepatch_payload_t *payload)
- {
--    int i;
-+    unsigned long flags;
- 
-     printk(KERN_DEBUG "%s: Hook starting.\n", __func__);
- 
--    for (i = 0; i < payload->nfuncs; i++)
--    {
--        const struct livepatch_func *func = &payload->funcs[i];
--        struct livepatch_fstate *fstate = &payload->fstate[i];
--
--        BUG_ON(revert_cnt != 1);
--        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
--
--        /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */
--        arch_livepatch_quiesce();
--        common_livepatch_revert(payload);
--        arch_livepatch_revive();
--        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
--
--        printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
--    }
-+    local_irq_save(flags);
-+    BUG_ON(revert_payload(payload));
-+    revert_payload_tail(payload);
-+    local_irq_restore(flags);
- 
-     printk(KERN_DEBUG "%s: Hook done.\n", __func__);
- }
--- 
-2.44.0
-
-
-From e9516b73e7d499684092c1d345818585403cf190 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:53:22 +0100
-Subject: [PATCH 51/70] xen/livepatch: properly build the noapply and norevert
- tests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-It seems the build variables for those tests where copy-pasted from
-xen_action_hooks_marker-objs and not adjusted to use the correct source files.
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: e579677095782c7dec792597ba8b037b7d716b32
-master date: 2024-02-28 16:57:25 +0000
----
- xen/test/livepatch/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile
-index c258ab0b59..d987a8367f 100644
---- a/xen/test/livepatch/Makefile
-+++ b/xen/test/livepatch/Makefile
-@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o
- $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h
- 
- extra-y += xen_action_hooks_noapply.livepatch
--xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
-+xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o
- 
- $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h
- 
- extra-y += xen_action_hooks_norevert.livepatch
--xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
-+xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o
- 
- EXPECT_BYTES_COUNT := 8
- CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g')
--- 
-2.44.0
-
-
-From 267845a8389d5d34edb2b38a1972f32f51f70b4e Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 5 Mar 2024 11:54:12 +0100
-Subject: [PATCH 52/70] libxl: Fix segfault in device_model_spawn_outcome
-
-libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when
-starting QEMU (the usual launch through libxl__spawn_local_dm() has a
-guest_config though).
-
-Bail early on a NULL guest_config/d_config.  This skips the QMP queries
-for chardevs and VNC, but this xenpv QEMU instance isn't expected to
-provide those - only qdisk (or 9pfs backends after an upcoming change).
-
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255
-master date: 2024-02-29 08:18:38 +0100
----
- tools/libs/light/libxl_dm.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
-index ed620a9d8e..29b43ed20a 100644
---- a/tools/libs/light/libxl_dm.c
-+++ b/tools/libs/light/libxl_dm.c
-@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc,
- 
-     /* Check if spawn failed */
-     if (rc) goto out;
--
--    if (d_config->b_info.device_model_version
-+    /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */
-+    if (d_config && d_config->b_info.device_model_version
-             == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
-         rc = libxl__ev_time_register_rel(ao, &dmss->timeout,
-                                          devise_model_postconfig_timeout,
--- 
-2.44.0
-
-
-From 75221fb0f87e4d7278b0a540bc28a6d0b74afeba Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:54:33 +0100
-Subject: [PATCH 53/70] x86/altcall: always use a temporary parameter stashing
- variable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The usage in ALT_CALL_ARG() on clang of:
-
-register union {
-    typeof(arg) e;
-    const unsigned long r;
-} ...
-
-When `arg` is the first argument to alternative_{,v}call() and
-const_vlapic_vcpu() is used results in clang 3.5.0 complaining with:
-
-arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line
-         alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) )
-
-Workaround this by pulling `arg1` into a local variable, like it's done for
-further arguments (arg2, arg3...)
-
-Originally arg1 wasn't pulled into a variable because for the a1_ register
-local variable the possible clobbering as a result of operators on other
-variables don't matter:
-
-https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
-
-Note clang version 3.8.1 seems to already be fixed and don't require the
-workaround, but since it's harmless do it uniformly everywhere.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0
-master date: 2024-02-29 08:21:49 +0100
----
- xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++---------
- 1 file changed, 24 insertions(+), 12 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
-index 3c14db5078..0d3697f1de 100644
---- a/xen/arch/x86/include/asm/alternative.h
-+++ b/xen/arch/x86/include/asm/alternative.h
-@@ -253,21 +253,24 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_vcall1(func, arg) ({           \
--    ALT_CALL_ARG(arg, 1);                          \
-+    typeof(arg) v1_ = (arg);                       \
-+    ALT_CALL_ARG(v1_, 1);                          \
-     ALT_CALL_NO_ARG2;                              \
-     (void)sizeof(func(arg));                       \
-     (void)alternative_callN(1, int, func);         \
- })
- 
- #define alternative_call1(func, arg) ({            \
--    ALT_CALL_ARG(arg, 1);                          \
-+    typeof(arg) v1_ = (arg);                       \
-+    ALT_CALL_ARG(v1_, 1);                          \
-     ALT_CALL_NO_ARG2;                              \
-     alternative_callN(1, typeof(func(arg)), func); \
- })
- 
- #define alternative_vcall2(func, arg1, arg2) ({           \
-+    typeof(arg1) v1_ = (arg1);                            \
-     typeof(arg2) v2_ = (arg2);                            \
--    ALT_CALL_ARG(arg1, 1);                                \
-+    ALT_CALL_ARG(v1_, 1);                                 \
-     ALT_CALL_ARG(v2_, 2);                                 \
-     ALT_CALL_NO_ARG3;                                     \
-     (void)sizeof(func(arg1, arg2));                       \
-@@ -275,17 +278,19 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_call2(func, arg1, arg2) ({            \
-+    typeof(arg1) v1_ = (arg1);                            \
-     typeof(arg2) v2_ = (arg2);                            \
--    ALT_CALL_ARG(arg1, 1);                                \
-+    ALT_CALL_ARG(v1_, 1);                                 \
-     ALT_CALL_ARG(v2_, 2);                                 \
-     ALT_CALL_NO_ARG3;                                     \
-     alternative_callN(2, typeof(func(arg1, arg2)), func); \
- })
- 
- #define alternative_vcall3(func, arg1, arg2, arg3) ({    \
-+    typeof(arg1) v1_ = (arg1);                           \
-     typeof(arg2) v2_ = (arg2);                           \
-     typeof(arg3) v3_ = (arg3);                           \
--    ALT_CALL_ARG(arg1, 1);                               \
-+    ALT_CALL_ARG(v1_, 1);                                \
-     ALT_CALL_ARG(v2_, 2);                                \
-     ALT_CALL_ARG(v3_, 3);                                \
-     ALT_CALL_NO_ARG4;                                    \
-@@ -294,9 +299,10 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_call3(func, arg1, arg2, arg3) ({     \
-+    typeof(arg1) v1_ = (arg1);                            \
-     typeof(arg2) v2_ = (arg2);                           \
-     typeof(arg3) v3_ = (arg3);                           \
--    ALT_CALL_ARG(arg1, 1);                               \
-+    ALT_CALL_ARG(v1_, 1);                                \
-     ALT_CALL_ARG(v2_, 2);                                \
-     ALT_CALL_ARG(v3_, 3);                                \
-     ALT_CALL_NO_ARG4;                                    \
-@@ -305,10 +311,11 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \
-+    typeof(arg1) v1_ = (arg1);                              \
-     typeof(arg2) v2_ = (arg2);                              \
-     typeof(arg3) v3_ = (arg3);                              \
-     typeof(arg4) v4_ = (arg4);                              \
--    ALT_CALL_ARG(arg1, 1);                                  \
-+    ALT_CALL_ARG(v1_, 1);                                   \
-     ALT_CALL_ARG(v2_, 2);                                   \
-     ALT_CALL_ARG(v3_, 3);                                   \
-     ALT_CALL_ARG(v4_, 4);                                   \
-@@ -318,10 +325,11 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_call4(func, arg1, arg2, arg3, arg4) ({  \
-+    typeof(arg1) v1_ = (arg1);                              \
-     typeof(arg2) v2_ = (arg2);                              \
-     typeof(arg3) v3_ = (arg3);                              \
-     typeof(arg4) v4_ = (arg4);                              \
--    ALT_CALL_ARG(arg1, 1);                                  \
-+    ALT_CALL_ARG(v1_, 1);                                   \
-     ALT_CALL_ARG(v2_, 2);                                   \
-     ALT_CALL_ARG(v3_, 3);                                   \
-     ALT_CALL_ARG(v4_, 4);                                   \
-@@ -332,11 +340,12 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \
-+    typeof(arg1) v1_ = (arg1);                                    \
-     typeof(arg2) v2_ = (arg2);                                    \
-     typeof(arg3) v3_ = (arg3);                                    \
-     typeof(arg4) v4_ = (arg4);                                    \
-     typeof(arg5) v5_ = (arg5);                                    \
--    ALT_CALL_ARG(arg1, 1);                                        \
-+    ALT_CALL_ARG(v1_, 1);                                         \
-     ALT_CALL_ARG(v2_, 2);                                         \
-     ALT_CALL_ARG(v3_, 3);                                         \
-     ALT_CALL_ARG(v4_, 4);                                         \
-@@ -347,11 +356,12 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({  \
-+    typeof(arg1) v1_ = (arg1);                                    \
-     typeof(arg2) v2_ = (arg2);                                    \
-     typeof(arg3) v3_ = (arg3);                                    \
-     typeof(arg4) v4_ = (arg4);                                    \
-     typeof(arg5) v5_ = (arg5);                                    \
--    ALT_CALL_ARG(arg1, 1);                                        \
-+    ALT_CALL_ARG(v1_, 1);                                         \
-     ALT_CALL_ARG(v2_, 2);                                         \
-     ALT_CALL_ARG(v3_, 3);                                         \
-     ALT_CALL_ARG(v4_, 4);                                         \
-@@ -363,12 +373,13 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
-+    typeof(arg1) v1_ = (arg1);                                          \
-     typeof(arg2) v2_ = (arg2);                                          \
-     typeof(arg3) v3_ = (arg3);                                          \
-     typeof(arg4) v4_ = (arg4);                                          \
-     typeof(arg5) v5_ = (arg5);                                          \
-     typeof(arg6) v6_ = (arg6);                                          \
--    ALT_CALL_ARG(arg1, 1);                                              \
-+    ALT_CALL_ARG(v1_, 1);                                               \
-     ALT_CALL_ARG(v2_, 2);                                               \
-     ALT_CALL_ARG(v3_, 3);                                               \
-     ALT_CALL_ARG(v4_, 4);                                               \
-@@ -379,12 +390,13 @@ extern void alternative_branches(void);
- })
- 
- #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({  \
-+    typeof(arg1) v1_ = (arg1);                                          \
-     typeof(arg2) v2_ = (arg2);                                          \
-     typeof(arg3) v3_ = (arg3);                                          \
-     typeof(arg4) v4_ = (arg4);                                          \
-     typeof(arg5) v5_ = (arg5);                                          \
-     typeof(arg6) v6_ = (arg6);                                          \
--    ALT_CALL_ARG(arg1, 1);                                              \
-+    ALT_CALL_ARG(v1_, 1);                                               \
-     ALT_CALL_ARG(v2_, 2);                                               \
-     ALT_CALL_ARG(v3_, 3);                                               \
-     ALT_CALL_ARG(v4_, 4);                                               \
--- 
-2.44.0
-
-
-From fd7cb7a1d0433049d8fc59444d0e91b71728763e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 11:55:17 +0100
-Subject: [PATCH 54/70] x86/cpu-policy: Allow for levelling of VERW side
- effects
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool.  Allow this, by
-having them unconditinally set in max, with the host values reflected in
-default.  Annotate the bits as having special properies.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d
-master date: 2024-03-01 20:14:19 +0000
----
- xen/arch/x86/cpu-policy.c                   | 24 +++++++++++++++++++++
- xen/arch/x86/include/asm/cpufeature.h       |  1 +
- xen/include/public/arch-x86/cpufeatureset.h |  4 ++--
- 3 files changed, 27 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index bcb17b7ce3..c7c5e99b7b 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -442,6 +442,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
-         __set_bit(X86_FEATURE_RSBA, fs);
-         __set_bit(X86_FEATURE_RRSBA, fs);
- 
-+        /*
-+         * These bits indicate that the VERW instruction may have gained
-+         * scrubbing side effects.  With pooling, they mean "you might migrate
-+         * somewhere where scrubbing is necessary", and may need exposing on
-+         * unaffected hardware.  This is fine, because the VERW instruction
-+         * has been around since the 286.
-+         */
-+        __set_bit(X86_FEATURE_MD_CLEAR, fs);
-+        __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+
-         /*
-          * The Gather Data Sampling microcode mitigation (August 2023) has an
-          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-@@ -476,6 +486,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
-              cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) )
-             __clear_bit(X86_FEATURE_RDRAND, fs);
- 
-+        /*
-+         * These bits indicate that the VERW instruction may have gained
-+         * scrubbing side effects.  The max policy has them set for migration
-+         * reasons, so reset the default policy back to the host values in
-+         * case we're unaffected.
-+         */
-+        __clear_bit(X86_FEATURE_MD_CLEAR, fs);
-+        if ( cpu_has_md_clear )
-+            __set_bit(X86_FEATURE_MD_CLEAR, fs);
-+
-+        __clear_bit(X86_FEATURE_FB_CLEAR, fs);
-+        if ( cpu_has_fb_clear )
-+            __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+
-         /*
-          * The Gather Data Sampling microcode mitigation (August 2023) has an
-          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
-index 06e1dd7f33..76ef2aeb1d 100644
---- a/xen/arch/x86/include/asm/cpufeature.h
-+++ b/xen/arch/x86/include/asm/cpufeature.h
-@@ -177,6 +177,7 @@ static inline bool boot_cpu_has(unsigned int feat)
- #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
- #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
- #define cpu_has_srbds_ctrl      boot_cpu_has(X86_FEATURE_SRBDS_CTRL)
-+#define cpu_has_md_clear        boot_cpu_has(X86_FEATURE_MD_CLEAR)
- #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
- #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
- #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 6b6ce2745c..337aaa9c77 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -262,7 +262,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single
- XEN_CPUFEATURE(FSRM,          9*32+ 4) /*A  Fast Short REP MOVS */
- XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a  VP2INTERSECT{D,Q} insns */
- XEN_CPUFEATURE(SRBDS_CTRL,    9*32+ 9) /*   MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
--XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*A  VERW clears microarchitectural buffers */
-+XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*!A VERW clears microarchitectural buffers */
- XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
- XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
- XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*A  SERIALIZE insn */
-@@ -329,7 +329,7 @@ XEN_CPUFEATURE(DOITM,              16*32+12) /*   Data Operand Invariant Timing
- XEN_CPUFEATURE(SBDR_SSDP_NO,       16*32+13) /*A  No Shared Buffer Data Read or Sideband Stale Data Propagation */
- XEN_CPUFEATURE(FBSDP_NO,           16*32+14) /*A  No Fill Buffer Stale Data Propagation */
- XEN_CPUFEATURE(PSDP_NO,            16*32+15) /*A  No Primary Stale Data Propagation */
--XEN_CPUFEATURE(FB_CLEAR,           16*32+17) /*A  Fill Buffers cleared by VERW */
-+XEN_CPUFEATURE(FB_CLEAR,           16*32+17) /*!A Fill Buffers cleared by VERW */
- XEN_CPUFEATURE(FB_CLEAR_CTRL,      16*32+18) /*   MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */
- XEN_CPUFEATURE(RRSBA,              16*32+19) /*!  Restricted RSB Alternative */
- XEN_CPUFEATURE(BHI_NO,             16*32+20) /*A  No Branch History Injection  */
--- 
-2.44.0
-
-
-From 4c84fa6cb66fe66f2c5dad65208c497558ab7d17 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 12 Mar 2024 12:06:57 +0100
-Subject: [PATCH 55/70] hvmloader/PCI: skip huge BARs in certain calculations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-BARs of size 2Gb and up can't possibly fit below 4Gb: Both the bottom of
-the lower 2Gb range and the top of the higher 2Gb range have special
-purpose. Don't even have them influence whether to (perhaps) relocate
-low RAM.
-
-Reported-by: Neowutran <xen@neowutran.ovh>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-master commit: 57acad12a09ffa490e870ebe17596aad858f0191
-master date: 2024-03-06 10:19:29 +0100
----
- tools/firmware/hvmloader/pci.c | 28 ++++++++++++++++++++--------
- 1 file changed, 20 insertions(+), 8 deletions(-)
-
-diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c
-index 257a6feb61..c3c61ca060 100644
---- a/tools/firmware/hvmloader/pci.c
-+++ b/tools/firmware/hvmloader/pci.c
-@@ -33,6 +33,13 @@ uint32_t pci_mem_start = HVM_BELOW_4G_MMIO_START;
- const uint32_t pci_mem_end = RESERVED_MEMBASE;
- uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0;
- 
-+/*
-+ * BARs larger than this value are put in 64-bit space unconditionally.  That
-+ * is, such BARs also don't play into the determination of how big the lowmem
-+ * MMIO hole needs to be.
-+ */
-+#define BAR_RELOC_THRESH GB(1)
-+
- enum virtual_vga virtual_vga = VGA_none;
- unsigned long igd_opregion_pgbase = 0;
- 
-@@ -286,9 +293,11 @@ void pci_setup(void)
-             bars[i].bar_reg = bar_reg;
-             bars[i].bar_sz  = bar_sz;
- 
--            if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
--                  PCI_BASE_ADDRESS_SPACE_MEMORY) ||
--                 (bar_reg == PCI_ROM_ADDRESS) )
-+            if ( is_64bar && bar_sz > BAR_RELOC_THRESH )
-+                bar64_relocate = 1;
-+            else if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
-+                       PCI_BASE_ADDRESS_SPACE_MEMORY) ||
-+                      (bar_reg == PCI_ROM_ADDRESS) )
-                 mmio_total += bar_sz;
- 
-             nr_bars++;
-@@ -367,7 +376,7 @@ void pci_setup(void)
-             pci_mem_start = hvm_info->low_mem_pgend << PAGE_SHIFT;
-     }
- 
--    if ( mmio_total > (pci_mem_end - pci_mem_start) )
-+    if ( mmio_total > (pci_mem_end - pci_mem_start) || bar64_relocate )
-     {
-         printf("Low MMIO hole not large enough for all devices,"
-                " relocating some BARs to 64-bit\n");
-@@ -430,7 +439,8 @@ void pci_setup(void)
- 
-         /*
-          * Relocate to high memory if the total amount of MMIO needed
--         * is more than the low MMIO available.  Because devices are
-+         * is more than the low MMIO available or BARs bigger than
-+         * BAR_RELOC_THRESH are present.  Because devices are
-          * processed in order of bar_sz, this will preferentially
-          * relocate larger devices to high memory first.
-          *
-@@ -446,8 +456,9 @@ void pci_setup(void)
-          *   the code here assumes it to be.)
-          * Should either of those two conditions change, this code will break.
-          */
--        using_64bar = bars[i].is_64bar && bar64_relocate
--            && (mmio_total > (mem_resource.max - mem_resource.base));
-+        using_64bar = bars[i].is_64bar && bar64_relocate &&
-+            (mmio_total > (mem_resource.max - mem_resource.base) ||
-+             bar_sz > BAR_RELOC_THRESH);
-         bar_data = pci_readl(devfn, bar_reg);
- 
-         if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
-@@ -467,7 +478,8 @@ void pci_setup(void)
-                 resource = &mem_resource;
-                 bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
-             }
--            mmio_total -= bar_sz;
-+            if ( bar_sz <= BAR_RELOC_THRESH )
-+                mmio_total -= bar_sz;
-         }
-         else
-         {
--- 
-2.44.0
-
-
-From a96d2d4355d85fc82abd0a3799978db04ee8cff3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 12 Mar 2024 12:07:07 +0100
-Subject: [PATCH 56/70] x86/mm: fix detection of last L1 entry in
- modify_xen_mappings_lite()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current logic to detect when to switch to the next L1 table is incorrectly
-using l2_table_offset() in order to notice when the last entry on the current
-L1 table has been reached.
-
-It should instead use l1_table_offset() to check whether the index has wrapped
-to point to the first entry, and so the next L1 table should be used.
-
-Fixes: 8676092a0f16 ('x86/livepatch: Fix livepatch application when CET is active')
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 7c81558208de7858251b62f168a449be84305595
-master date: 2024-03-11 11:09:42 +0000
----
- xen/arch/x86/mm.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 39544bd9f9..ab0acbfea6 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5947,7 +5947,7 @@ void init_or_livepatch modify_xen_mappings_lite(
- 
-                 v += 1UL << L1_PAGETABLE_SHIFT;
- 
--                if ( l2_table_offset(v) == 0 )
-+                if ( l1_table_offset(v) == 0 )
-                     break;
-             }
- 
--- 
-2.44.0
-
-
-From fe1869a569bab56e44c35d1522ee064bab6286da Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Sat, 27 Jan 2024 17:52:09 +0000
-Subject: [PATCH 57/70] x86/entry: Introduce EFRAME_* constants
-
-restore_all_guest() does a lot of manipulation of the stack after popping the
-GPRs, and uses raw %rsp displacements to do so.  Also, almost all entrypaths
-use raw %rsp displacements prior to pushing GPRs.
-
-Provide better mnemonics, to aid readability and reduce the chance of errors
-when editing.
-
-No functional change.  The resulting binary is identical.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035)
----
- xen/arch/x86/x86_64/asm-offsets.c  | 17 ++++++++
- xen/arch/x86/x86_64/compat/entry.S |  2 +-
- xen/arch/x86/x86_64/entry.S        | 70 +++++++++++++++---------------
- 3 files changed, 53 insertions(+), 36 deletions(-)
-
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 57b73a4e62..2fc4d9130a 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -51,6 +51,23 @@ void __dummy__(void)
-     OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
-     BLANK();
- 
-+    /*
-+     * EFRAME_* is for the entry/exit logic where %rsp is pointing at
-+     * UREGS_error_code and GPRs are still/already guest values.
-+     */
-+#define OFFSET_EF(sym, mem)                                             \
-+    DEFINE(sym, offsetof(struct cpu_user_regs, mem) -                   \
-+                offsetof(struct cpu_user_regs, error_code))
-+
-+    OFFSET_EF(EFRAME_entry_vector,    entry_vector);
-+    OFFSET_EF(EFRAME_rip,             rip);
-+    OFFSET_EF(EFRAME_cs,              cs);
-+    OFFSET_EF(EFRAME_eflags,          eflags);
-+    OFFSET_EF(EFRAME_rsp,             rsp);
-+    BLANK();
-+
-+#undef OFFSET_EF
-+
-     OFFSET(VCPU_processor, struct vcpu, processor);
-     OFFSET(VCPU_domain, struct vcpu, domain);
-     OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index fcc3a721f1..cb473f08ee 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -15,7 +15,7 @@ ENTRY(entry_int82)
-         ENDBR64
-         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
-         pushq $0
--        movl  $HYPERCALL_VECTOR, 4(%rsp)
-+        movl  $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp)
-         SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
- 
-         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 9a7b129aa7..968da9d727 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -190,15 +190,15 @@ restore_all_guest:
-         SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         RESTORE_ALL
--        testw $TRAP_syscall,4(%rsp)
-+        testw $TRAP_syscall, EFRAME_entry_vector(%rsp)
-         jz    iret_exit_to_guest
- 
--        movq  24(%rsp),%r11           # RFLAGS
-+        mov   EFRAME_eflags(%rsp), %r11
-         andq  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11
-         orq   $X86_EFLAGS_IF,%r11
- 
-         /* Don't use SYSRET path if the return address is not canonical. */
--        movq  8(%rsp),%rcx
-+        mov   EFRAME_rip(%rsp), %rcx
-         sarq  $47,%rcx
-         incl  %ecx
-         cmpl  $1,%ecx
-@@ -213,20 +213,20 @@ restore_all_guest:
-         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
- #endif
- 
--        movq  8(%rsp), %rcx           # RIP
--        cmpw  $FLAT_USER_CS32,16(%rsp)# CS
--        movq  32(%rsp),%rsp           # RSP
-+        mov   EFRAME_rip(%rsp), %rcx
-+        cmpw  $FLAT_USER_CS32, EFRAME_cs(%rsp)
-+        mov   EFRAME_rsp(%rsp), %rsp
-         je    1f
-         sysretq
- 1:      sysretl
- 
-         ALIGN
- .Lrestore_rcx_iret_exit_to_guest:
--        movq  8(%rsp), %rcx           # RIP
-+        mov   EFRAME_rip(%rsp), %rcx
- /* No special register assumptions. */
- iret_exit_to_guest:
--        andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp)
--        orl   $X86_EFLAGS_IF,24(%rsp)
-+        andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
-+        orl   $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
-         addq  $8,%rsp
- .Lft0:  iretq
-         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
-@@ -257,7 +257,7 @@ ENTRY(lstar_enter)
-         pushq $FLAT_KERNEL_CS64
-         pushq %rcx
-         pushq $0
--        movl  $TRAP_syscall, 4(%rsp)
-+        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
-         SAVE_ALL
- 
-         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -294,7 +294,7 @@ ENTRY(cstar_enter)
-         pushq $FLAT_USER_CS32
-         pushq %rcx
-         pushq $0
--        movl  $TRAP_syscall, 4(%rsp)
-+        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
-         SAVE_ALL
- 
-         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved)
-         pushq $3 /* ring 3 null cs */
-         pushq $0 /* null rip */
-         pushq $0
--        movl  $TRAP_syscall, 4(%rsp)
-+        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
-         SAVE_ALL
- 
-         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap)
-         ENDBR64
-         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
-         pushq $0
--        movl  $0x80, 4(%rsp)
-+        movl  $0x80, EFRAME_entry_vector(%rsp)
-         SAVE_ALL
- 
-         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -649,7 +649,7 @@ ret_from_intr:
-         .section .init.text, "ax", @progbits
- ENTRY(early_page_fault)
-         ENDBR64
--        movl  $X86_EXC_PF, 4(%rsp)
-+        movl  $X86_EXC_PF, EFRAME_entry_vector(%rsp)
-         SAVE_ALL
-         movq  %rsp, %rdi
-         call  do_early_page_fault
-@@ -716,7 +716,7 @@ ENTRY(common_interrupt)
- 
- ENTRY(entry_PF)
-         ENDBR64
--        movl  $X86_EXC_PF, 4(%rsp)
-+        movl  $X86_EXC_PF, EFRAME_entry_vector(%rsp)
- /* No special register assumptions. */
- GLOBAL(handle_exception)
-         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
-@@ -890,90 +890,90 @@ FATAL_exception_with_ints_disabled:
- ENTRY(entry_DE)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_DE, 4(%rsp)
-+        movl  $X86_EXC_DE, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_MF)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_MF, 4(%rsp)
-+        movl  $X86_EXC_MF, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_XM)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_XM, 4(%rsp)
-+        movl  $X86_EXC_XM, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_NM)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_NM, 4(%rsp)
-+        movl  $X86_EXC_NM, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_DB)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_DB, 4(%rsp)
-+        movl  $X86_EXC_DB, EFRAME_entry_vector(%rsp)
-         jmp   handle_ist_exception
- 
- ENTRY(entry_BP)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_BP, 4(%rsp)
-+        movl  $X86_EXC_BP, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_OF)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_OF, 4(%rsp)
-+        movl  $X86_EXC_OF, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_BR)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_BR, 4(%rsp)
-+        movl  $X86_EXC_BR, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_UD)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_UD, 4(%rsp)
-+        movl  $X86_EXC_UD, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_TS)
-         ENDBR64
--        movl  $X86_EXC_TS, 4(%rsp)
-+        movl  $X86_EXC_TS, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_NP)
-         ENDBR64
--        movl  $X86_EXC_NP, 4(%rsp)
-+        movl  $X86_EXC_NP, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_SS)
-         ENDBR64
--        movl  $X86_EXC_SS, 4(%rsp)
-+        movl  $X86_EXC_SS, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_GP)
-         ENDBR64
--        movl  $X86_EXC_GP, 4(%rsp)
-+        movl  $X86_EXC_GP, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_AC)
-         ENDBR64
--        movl  $X86_EXC_AC, 4(%rsp)
-+        movl  $X86_EXC_AC, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_CP)
-         ENDBR64
--        movl  $X86_EXC_CP, 4(%rsp)
-+        movl  $X86_EXC_CP, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
- ENTRY(entry_DF)
-         ENDBR64
--        movl  $X86_EXC_DF, 4(%rsp)
-+        movl  $X86_EXC_DF, EFRAME_entry_vector(%rsp)
-         /* Set AC to reduce chance of further SMAP faults */
-         ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
-         SAVE_ALL
-@@ -998,7 +998,7 @@ ENTRY(entry_DF)
- ENTRY(entry_NMI)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_NMI, 4(%rsp)
-+        movl  $X86_EXC_NMI, EFRAME_entry_vector(%rsp)
- handle_ist_exception:
-         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
-         SAVE_ALL
-@@ -1130,7 +1130,7 @@ handle_ist_exception:
- ENTRY(entry_MC)
-         ENDBR64
-         pushq $0
--        movl  $X86_EXC_MC, 4(%rsp)
-+        movl  $X86_EXC_MC, EFRAME_entry_vector(%rsp)
-         jmp   handle_ist_exception
- 
- /* No op trap handler.  Required for kexec crash path. */
-@@ -1167,7 +1167,7 @@ autogen_stubs: /* Automatically generated stubs. */
- 1:
-         ENDBR64
-         pushq $0
--        movb  $vec,4(%rsp)
-+        movb  $vec, EFRAME_entry_vector(%rsp)
-         jmp   common_interrupt
- 
-         entrypoint 1b
-@@ -1181,7 +1181,7 @@ autogen_stubs: /* Automatically generated stubs. */
-         test  $8,%spl        /* 64bit exception frames are 16 byte aligned, but the word */
-         jz    2f             /* size is 8 bytes.  Check whether the processor gave us an */
-         pushq $0             /* error code, and insert an empty one if not.              */
--2:      movb  $vec,4(%rsp)
-+2:      movb  $vec, EFRAME_entry_vector(%rsp)
-         jmp   handle_exception
- 
-         entrypoint 1b
--- 
-2.44.0
-
-
-From b91c253e81db915f685b29e6947144ab9905388d Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 27 Feb 2024 16:07:39 +0000
-Subject: [PATCH 58/70] x86: Resync intel-family.h from Linux
-
-From v6.8-rc6
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8)
----
- xen/arch/x86/include/asm/intel-family.h | 38 ++++++++++++++++++++++---
- 1 file changed, 34 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h
-index ffc49151be..b65e9c46b9 100644
---- a/xen/arch/x86/include/asm/intel-family.h
-+++ b/xen/arch/x86/include/asm/intel-family.h
-@@ -26,6 +26,9 @@
-  *		_G	- parts with extra graphics on
-  *		_X	- regular server parts
-  *		_D	- micro server parts
-+ *		_N,_P	- other mobile parts
-+ *		_H	- premium mobile parts
-+ *		_S	- other client parts
-  *
-  *		Historical OPTDIFFs:
-  *
-@@ -37,6 +40,9 @@
-  * their own names :-(
-  */
- 
-+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
-+#define INTEL_FAM6_ANY			X86_MODEL_ANY
-+
- #define INTEL_FAM6_CORE_YONAH		0x0E
- 
- #define INTEL_FAM6_CORE2_MEROM		0x0F
-@@ -93,8 +99,6 @@
- #define INTEL_FAM6_ICELAKE_L		0x7E	/* Sunny Cove */
- #define INTEL_FAM6_ICELAKE_NNPI		0x9D	/* Sunny Cove */
- 
--#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
--
- #define INTEL_FAM6_ROCKETLAKE		0xA7	/* Cypress Cove */
- 
- #define INTEL_FAM6_TIGERLAKE_L		0x8C	/* Willow Cove */
-@@ -102,12 +106,31 @@
- 
- #define INTEL_FAM6_SAPPHIRERAPIDS_X	0x8F	/* Golden Cove */
- 
-+#define INTEL_FAM6_EMERALDRAPIDS_X	0xCF
-+
-+#define INTEL_FAM6_GRANITERAPIDS_X	0xAD
-+#define INTEL_FAM6_GRANITERAPIDS_D	0xAE
-+
-+/* "Hybrid" Processors (P-Core/E-Core) */
-+
-+#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
-+
- #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
- #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
- 
--#define INTEL_FAM6_RAPTORLAKE		0xB7
-+#define INTEL_FAM6_RAPTORLAKE		0xB7	/* Raptor Cove / Enhanced Gracemont */
-+#define INTEL_FAM6_RAPTORLAKE_P		0xBA
-+#define INTEL_FAM6_RAPTORLAKE_S		0xBF
-+
-+#define INTEL_FAM6_METEORLAKE		0xAC
-+#define INTEL_FAM6_METEORLAKE_L		0xAA
-+
-+#define INTEL_FAM6_ARROWLAKE_H		0xC5
-+#define INTEL_FAM6_ARROWLAKE		0xC6
-+
-+#define INTEL_FAM6_LUNARLAKE_M		0xBD
- 
--/* "Small Core" Processors (Atom) */
-+/* "Small Core" Processors (Atom/E-Core) */
- 
- #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
- #define INTEL_FAM6_ATOM_BONNELL_MID	0x26 /* Silverthorne, Lincroft */
-@@ -134,6 +157,13 @@
- #define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
- #define INTEL_FAM6_ATOM_TREMONT_L	0x9C /* Jasper Lake */
- 
-+#define INTEL_FAM6_ATOM_GRACEMONT	0xBE /* Alderlake N */
-+
-+#define INTEL_FAM6_ATOM_CRESTMONT_X	0xAF /* Sierra Forest */
-+#define INTEL_FAM6_ATOM_CRESTMONT	0xB6 /* Grand Ridge */
-+
-+#define INTEL_FAM6_ATOM_DARKMONT_X	0xDD /* Clearwater Forest */
-+
- /* Xeon Phi */
- 
- #define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
--- 
-2.44.0
-
-
-From 9f89ec65fbe49c3be32a456091097d7ef017d268 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 23 Jun 2023 11:32:00 +0100
-Subject: [PATCH 59/70] x86/vmx: Perform VERW flushing later in the VMExit path
-
-Broken out of the following patch because this change is subtle enough on its
-own.  See it for the rational of why we're moving VERW.
-
-As for how, extend the trick already used to hold one condition in
-flags (RESUME vs LAUNCH) through the POPing of GPRs.
-
-Move the MOV CR earlier.  Intel specify flags to be undefined across it.
-
-Encode the two conditions we want using SF and PF.  See the code comment for
-exactly how.
-
-Leave a comment to explain the lack of any content around
-SPEC_CTRL_EXIT_TO_VMX, but leave the block in place.  Sods law says if we
-delete it, we'll need to reintroduce it.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f)
----
- xen/arch/x86/hvm/vmx/entry.S             | 36 +++++++++++++++++++++---
- xen/arch/x86/include/asm/asm_defns.h     |  8 ++++++
- xen/arch/x86/include/asm/spec_ctrl_asm.h |  7 +++++
- xen/arch/x86/x86_64/asm-offsets.c        |  1 +
- 4 files changed, 48 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
-index e3f60d5a82..1bead826ca 100644
---- a/xen/arch/x86/hvm/vmx/entry.S
-+++ b/xen/arch/x86/hvm/vmx/entry.S
-@@ -87,17 +87,39 @@ UNLIKELY_END(realmode)
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-         /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
--        DO_SPEC_CTRL_COND_VERW
-+        /*
-+         * All speculation safety work happens to be elsewhere.  VERW is after
-+         * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left
-+         * to the MSR load list.
-+         */
- 
-         mov  VCPU_hvm_guest_cr2(%rbx),%rax
-+        mov  %rax, %cr2
-+
-+        /*
-+         * We need to perform two conditional actions (VERW, and Resume vs
-+         * Launch) after popping GPRs.  With some cunning, we can encode both
-+         * of these in eflags together.
-+         *
-+         * Parity is only calculated over the bottom byte of the answer, while
-+         * Sign is simply the top bit.
-+         *
-+         * Therefore, the final OR instruction ends up producing:
-+         *   SF = VCPU_vmx_launched
-+         *   PF = !SCF_verw
-+         */
-+        BUILD_BUG_ON(SCF_verw & ~0xff)
-+        movzbl VCPU_vmx_launched(%rbx), %ecx
-+        shl  $31, %ecx
-+        movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax
-+        and  $SCF_verw, %eax
-+        or   %eax, %ecx
- 
-         pop  %r15
-         pop  %r14
-         pop  %r13
-         pop  %r12
-         pop  %rbp
--        mov  %rax,%cr2
--        cmpb $0,VCPU_vmx_launched(%rbx)
-         pop  %rbx
-         pop  %r11
-         pop  %r10
-@@ -108,7 +130,13 @@ UNLIKELY_END(realmode)
-         pop  %rdx
-         pop  %rsi
-         pop  %rdi
--        je   .Lvmx_launch
-+
-+        jpe  .L_skip_verw
-+        /* VERW clobbers ZF, but preserves all others, including SF. */
-+        verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp)
-+.L_skip_verw:
-+
-+        jns  .Lvmx_launch
- 
- /*.Lvmx_resume:*/
-         VMRESUME
-diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h
-index baaaccb26e..56ae26e542 100644
---- a/xen/arch/x86/include/asm/asm_defns.h
-+++ b/xen/arch/x86/include/asm/asm_defns.h
-@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp");
- 
- #ifdef __ASSEMBLY__
- 
-+.macro BUILD_BUG_ON condstr, cond:vararg
-+        .if \cond
-+        .error "Condition \"\condstr\" not satisfied"
-+        .endif
-+.endm
-+/* preprocessor macro to make error message more user friendly */
-+#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond
-+
- #ifdef HAVE_AS_QUOTED_SYM
- #define SUBSECTION_LBL(tag)                        \
-         .ifndef .L.tag;                            \
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index 6cb7c1b949..525745a066 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -152,6 +152,13 @@
- #endif
- .endm
- 
-+/*
-+ * Helper to improve the readibility of stack dispacements with %rsp in
-+ * unusual positions.  Both @field and @top_of_stack should be constants from
-+ * the same object.  @top_of_stack should be where %rsp is currently pointing.
-+ */
-+#define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
-+
- .macro DO_SPEC_CTRL_COND_VERW
- /*
-  * Requires %rsp=cpuinfo
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 2fc4d9130a..0d33678898 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -135,6 +135,7 @@ void __dummy__(void)
- #endif
- 
-     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
-+    OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
-     OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
-     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
-     OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
--- 
-2.44.0
-
-
-From 95dd34fdbea5408872d5c244fe268222a4f145d0 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Sat, 27 Jan 2024 18:20:56 +0000
-Subject: [PATCH 60/70] x86/spec-ctrl: Perform VERW flushing later in exit
- paths
-
-On parts vulnerable to RFDS, VERW's side effects are extended to scrub all
-non-architectural entries in various Physical Register Files.  To remove all
-of Xen's values, the VERW must be after popping the GPRs.
-
-Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position,
-but with overrides for other contexts.  Identify that it clobbers eflags; this
-is particularly relevant for the SYSRET path.
-
-For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a
-shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to
-issue the VERW.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++--------
- xen/arch/x86/x86_64/asm-offsets.c        | 13 +++++++--
- xen/arch/x86/x86_64/compat/entry.S       |  6 ++++
- xen/arch/x86/x86_64/entry.S              | 21 +++++++++++++-
- 4 files changed, 61 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index 525745a066..13acebc75d 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -159,16 +159,23 @@
-  */
- #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
- 
--.macro DO_SPEC_CTRL_COND_VERW
-+.macro SPEC_CTRL_COND_VERW \
-+    scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \
-+    sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_error_code)
- /*
-- * Requires %rsp=cpuinfo
-+ * Requires \scf and \sel as %rsp-relative expressions
-+ * Clobbers eflags
-+ *
-+ * VERW needs to run after guest GPRs have been restored, where only %rsp is
-+ * good to use.  Default to expecting %rsp pointing at CPUINFO_error_code.
-+ * Contexts where this is not true must provide an alternative \scf and \sel.
-  *
-  * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
-  * v1 gadget, but the IRET/VMEntry is serialising.
-  */
--    testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
-+    testb $SCF_verw, \scf(%rsp)
-     jz .L\@_verw_skip
--    verw CPUINFO_verw_sel(%rsp)
-+    verw \sel(%rsp)
- .L\@_verw_skip:
- .endm
- 
-@@ -286,8 +293,6 @@
-  */
-     ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
- 
--    DO_SPEC_CTRL_COND_VERW
--
-     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
- .endm
- 
-@@ -367,7 +372,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-  */
- .macro SPEC_CTRL_EXIT_TO_XEN
- /*
-- * Requires %r12=ist_exit, %r14=stack_end
-+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
-  * Clobbers %rax, %rbx, %rcx, %rdx
-  */
-     movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
-@@ -395,11 +400,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-     test %r12, %r12
-     jz .L\@_skip_ist_exit
- 
--    /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
--    testb $SCF_verw, %bl
--    jz .L\@_skip_verw
--    verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
--.L\@_skip_verw:
-+    /*
-+     * Stash SCF and verw_sel above eflags in the case of an IST_exit.  The
-+     * VERW logic needs to run after guest GPRs have been restored; i.e. where
-+     * we cannot use %r12 or %r14 for the purposes they have here.
-+     *
-+     * When the CPU pushed this exception frame, it zero-extended eflags.
-+     * Therefore it is safe for the VERW logic to look at the stashed SCF
-+     * outside of the ist_exit condition.  Also, this stashing won't influence
-+     * any other restore_all_guest() paths.
-+     */
-+    or $(__HYPERVISOR_DS32 << 16), %ebx
-+    mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
- 
-     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
- 
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 0d33678898..85c7d0c989 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -55,14 +55,22 @@ void __dummy__(void)
-      * EFRAME_* is for the entry/exit logic where %rsp is pointing at
-      * UREGS_error_code and GPRs are still/already guest values.
-      */
--#define OFFSET_EF(sym, mem)                                             \
-+#define OFFSET_EF(sym, mem, ...)                                        \
-     DEFINE(sym, offsetof(struct cpu_user_regs, mem) -                   \
--                offsetof(struct cpu_user_regs, error_code))
-+                offsetof(struct cpu_user_regs, error_code) __VA_ARGS__)
- 
-     OFFSET_EF(EFRAME_entry_vector,    entry_vector);
-     OFFSET_EF(EFRAME_rip,             rip);
-     OFFSET_EF(EFRAME_cs,              cs);
-     OFFSET_EF(EFRAME_eflags,          eflags);
-+
-+    /*
-+     * These aren't real fields.  They're spare space, used by the IST
-+     * exit-to-xen path.
-+     */
-+    OFFSET_EF(EFRAME_shadow_scf,      eflags, +4);
-+    OFFSET_EF(EFRAME_shadow_sel,      eflags, +6);
-+
-     OFFSET_EF(EFRAME_rsp,             rsp);
-     BLANK();
- 
-@@ -136,6 +144,7 @@ void __dummy__(void)
- 
-     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
-     OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
-+    OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip);
-     OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
-     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
-     OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index cb473f08ee..3bbe3a79a5 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest)
-         SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         RESTORE_ALL adj=8 compat=1
-+
-+        /* Account for ev/ec having already been popped off the stack. */
-+        SPEC_CTRL_COND_VERW \
-+            scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \
-+            sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_rip)
-+
- .Lft0:  iretq
-         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
- 
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 968da9d727..2c7512130f 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -214,6 +214,9 @@ restore_all_guest:
- #endif
- 
-         mov   EFRAME_rip(%rsp), %rcx
-+
-+        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
-+
-         cmpw  $FLAT_USER_CS32, EFRAME_cs(%rsp)
-         mov   EFRAME_rsp(%rsp), %rsp
-         je    1f
-@@ -227,6 +230,9 @@ restore_all_guest:
- iret_exit_to_guest:
-         andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
-         orl   $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
-+
-+        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
-+
-         addq  $8,%rsp
- .Lft0:  iretq
-         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
-@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3)
- UNLIKELY_END(exit_cr3)
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
--        SPEC_CTRL_EXIT_TO_XEN     /* Req: %r12=ist_exit %r14=end, Clob: abcd */
-+        SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */
- 
-         RESTORE_ALL adj=8
-+
-+        /*
-+         * When the CPU pushed this exception frame, it zero-extended eflags.
-+         * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of
-+         * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs,
-+         * and we're at a random place on the stack, not in a CPUFINFO block.
-+         *
-+         * Account for ev/ec having already been popped off the stack.
-+         */
-+        SPEC_CTRL_COND_VERW \
-+            scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \
-+            sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip)
-+
-         iretq
- 
- ENTRY(common_interrupt)
--- 
-2.44.0
-
-
-From b7205fc1cbad0c633e92d2d019a02a507467507b Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 12 Feb 2024 17:50:43 +0000
-Subject: [PATCH 61/70] x86/spec-ctrl: Rename VERW related options
-
-VERW is going to be used for a 3rd purpose, and the existing nomenclature
-didn't survive the Stale MMIO issues terribly well.
-
-Rename the command line option from `md-clear=` to `verw=`.  This is more
-consistent with other options which tend to be named based on what they're
-doing, not which feature enumeration they use behind the scenes.  Retain
-`md-clear=` as a deprecated alias.
-
-Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio},
-which has a side effect of making spec_ctrl_init_domain() rather clearer to
-follow.
-
-No functional change.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4)
----
- docs/misc/xen-command-line.pandoc | 15 ++++----
- xen/arch/x86/spec_ctrl.c          | 62 ++++++++++++++++---------------
- 2 files changed, 40 insertions(+), 37 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 582d6741d1..fbf1683924 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2370,7 +2370,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- 
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
-->              {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
-+>              {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- >              eager-fpu,l1d-flush,branch-harden,srb-lock,
- >              unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
-@@ -2395,7 +2395,7 @@ in place for guests to use.
- 
- Use of a positive boolean value for either of these options is invalid.
- 
--The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options
-+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options
- offer fine grained control over the primitives by Xen.  These impact Xen's
- ability to protect itself, and/or Xen's ability to virtualise support for
- guests to use.
-@@ -2412,11 +2412,12 @@ guests to use.
-   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
- * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-   Return Address Stack on entry to Xen and on idle.
--* `md-clear=` offers control over whether to use VERW to flush
--  microarchitectural buffers on idle and exit from Xen.  *Note: For
--  compatibility with development versions of this fix, `mds=` is also accepted
--  on Xen 4.12 and earlier as an alias.  Consult vendor documentation in
--  preference to here.*
-+* `verw=` offers control over whether to use VERW for its scrubbing side
-+  effects at appropriate privilege transitions.  The exact side effects are
-+  microarchitecture and microcode specific.  *Note: `md-clear=` is accepted as
-+  a deprecated alias.  For compatibility with development versions of XSA-297,
-+  `mds=` is also accepted on Xen 4.12 and earlier as an alias.  Consult vendor
-+  documentation in preference to here.*
- * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction
-   Barrier) is used on entry to Xen.  This is used by default on hardware
-   vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index a965b6db28..c42d8cdc22 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -25,8 +25,8 @@ static bool __initdata opt_msr_sc_pv = true;
- static bool __initdata opt_msr_sc_hvm = true;
- static int8_t __initdata opt_rsb_pv = -1;
- static bool __initdata opt_rsb_hvm = true;
--static int8_t __ro_after_init opt_md_clear_pv = -1;
--static int8_t __ro_after_init opt_md_clear_hvm = -1;
-+static int8_t __ro_after_init opt_verw_pv = -1;
-+static int8_t __ro_after_init opt_verw_hvm = -1;
- 
- static int8_t __ro_after_init opt_ibpb_entry_pv = -1;
- static int8_t __ro_after_init opt_ibpb_entry_hvm = -1;
-@@ -66,7 +66,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination.
- 
- static int8_t __initdata opt_srb_lock = -1;
- static bool __initdata opt_unpriv_mmio;
--static bool __ro_after_init opt_fb_clear_mmio;
-+static bool __ro_after_init opt_verw_mmio;
- static int8_t __initdata opt_gds_mit = -1;
- static int8_t __initdata opt_div_scrub = -1;
- 
-@@ -108,8 +108,8 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-         disable_common:
-             opt_rsb_pv = false;
-             opt_rsb_hvm = false;
--            opt_md_clear_pv = 0;
--            opt_md_clear_hvm = 0;
-+            opt_verw_pv = 0;
-+            opt_verw_hvm = 0;
-             opt_ibpb_entry_pv = 0;
-             opt_ibpb_entry_hvm = 0;
-             opt_ibpb_entry_dom0 = false;
-@@ -140,14 +140,14 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-         {
-             opt_msr_sc_pv = val;
-             opt_rsb_pv = val;
--            opt_md_clear_pv = val;
-+            opt_verw_pv = val;
-             opt_ibpb_entry_pv = val;
-         }
-         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-         {
-             opt_msr_sc_hvm = val;
-             opt_rsb_hvm = val;
--            opt_md_clear_hvm = val;
-+            opt_verw_hvm = val;
-             opt_ibpb_entry_hvm = val;
-         }
-         else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
-@@ -192,21 +192,22 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-                 break;
-             }
-         }
--        else if ( (val = parse_boolean("md-clear", s, ss)) != -1 )
-+        else if ( (val = parse_boolean("verw", s, ss)) != -1 ||
-+                  (val = parse_boolean("md-clear", s, ss)) != -1 )
-         {
-             switch ( val )
-             {
-             case 0:
-             case 1:
--                opt_md_clear_pv = opt_md_clear_hvm = val;
-+                opt_verw_pv = opt_verw_hvm = val;
-                 break;
- 
-             case -2:
--                s += strlen("md-clear=");
-+                s += (*s == 'v') ? strlen("verw=") : strlen("md-clear=");
-                 if ( (val = parse_boolean("pv", s, ss)) >= 0 )
--                    opt_md_clear_pv = val;
-+                    opt_verw_pv = val;
-                 else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
--                    opt_md_clear_hvm = val;
-+                    opt_verw_hvm = val;
-                 else
-             default:
-                     rc = -EINVAL;
-@@ -528,8 +529,8 @@ static void __init print_details(enum ind_thunk thunk)
-            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
-            opt_ibpb_ctxt_switch                      ? " IBPB-ctxt" : "",
-            opt_l1d_flush                             ? " L1D_FLUSH" : "",
--           opt_md_clear_pv || opt_md_clear_hvm ||
--           opt_fb_clear_mmio                         ? " VERW"  : "",
-+           opt_verw_pv || opt_verw_hvm ||
-+           opt_verw_mmio                             ? " VERW"  : "",
-            opt_div_scrub                             ? " DIV" : "",
-            opt_branch_harden                         ? " BRANCH_HARDEN" : "");
- 
-@@ -550,13 +551,13 @@ static void __init print_details(enum ind_thunk thunk)
-             boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
-             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
-             amd_virt_spec_ctrl ||
--            opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
-+            opt_eager_fpu || opt_verw_hvm)           ? ""               : " None",
-            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
-            (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
-             amd_virt_spec_ctrl)                      ? " MSR_VIRT_SPEC_CTRL" : "",
-            boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
-            opt_eager_fpu                             ? " EAGER_FPU"     : "",
--           opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
-+           opt_verw_hvm                              ? " VERW"          : "",
-            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
- 
- #endif
-@@ -565,11 +566,11 @@ static void __init print_details(enum ind_thunk thunk)
-            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
-             boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
-             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
--            opt_eager_fpu || opt_md_clear_pv)        ? ""               : " None",
-+            opt_eager_fpu || opt_verw_pv)            ? ""               : " None",
-            boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
-            boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
-            opt_eager_fpu                             ? " EAGER_FPU"     : "",
--           opt_md_clear_pv                           ? " MD_CLEAR"      : "",
-+           opt_verw_pv                               ? " VERW"          : "",
-            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)   ? " IBPB-entry"    : "");
- 
-     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
-@@ -1502,8 +1503,8 @@ void spec_ctrl_init_domain(struct domain *d)
- {
-     bool pv = is_pv_domain(d);
- 
--    bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
--                 (opt_fb_clear_mmio && is_iommu_enabled(d)));
-+    bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) ||
-+                 (opt_verw_mmio && is_iommu_enabled(d)));
- 
-     bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) &&
-                  (d->domain_id != 0 || opt_ibpb_entry_dom0));
-@@ -1866,19 +1867,20 @@ void __init init_speculation_mitigations(void)
-      * the return-to-guest path.
-      */
-     if ( opt_unpriv_mmio )
--        opt_fb_clear_mmio = cpu_has_fb_clear;
-+        opt_verw_mmio = cpu_has_fb_clear;
- 
-     /*
-      * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
-      * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
-      * but it is somewhat better than nothing.
-      */
--    if ( opt_md_clear_pv == -1 )
--        opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
--                           boot_cpu_has(X86_FEATURE_MD_CLEAR));
--    if ( opt_md_clear_hvm == -1 )
--        opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
--                            boot_cpu_has(X86_FEATURE_MD_CLEAR));
-+    if ( opt_verw_pv == -1 )
-+        opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-+                       cpu_has_md_clear);
-+
-+    if ( opt_verw_hvm == -1 )
-+        opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-+                        cpu_has_md_clear);
- 
-     /*
-      * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
-@@ -1891,12 +1893,12 @@ void __init init_speculation_mitigations(void)
-      * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
-      *
-      * After calculating the appropriate idle setting, simplify
--     * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
-+     * opt_verw_hvm to mean just "should we VERW on the way into HVM
-      * guests", so spec_ctrl_init_domain() can calculate suitable settings.
-      */
--    if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
-+    if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
-         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
--    opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
-+    opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
- 
-     /*
-      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
--- 
-2.44.0
-
-
-From fb85a8fc91f8cfd61d7c7f9742502b223d4024b5 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 19:33:37 +0000
-Subject: [PATCH 62/70] x86/spec-ctrl: VERW-handling adjustments
-
-... before we add yet more complexity to this logic.  Mostly expanded
-comments, but with three minor changes.
-
-1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and
-   future ones.
-
-2) We only ever need SC_VERW_IDLE when SMT is active.  If SMT isn't active,
-   then there's no re-partition of pipeline resources based on thread-idleness
-   to worry about.
-
-3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as
-   it turns out, wrong.  SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush
-   is the relevant decision of whether to use L1D_FLUSH based on
-   susceptibility and user preference.
-
-   Rewrite the logic so it can be followed, and incorporate the fact that when
-   FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2)
----
- xen/arch/x86/spec_ctrl.c | 99 +++++++++++++++++++++++++++++-----------
- 1 file changed, 73 insertions(+), 26 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index c42d8cdc22..a4afcd8570 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1519,7 +1519,7 @@ void __init init_speculation_mitigations(void)
- {
-     enum ind_thunk thunk = THUNK_DEFAULT;
-     bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
--    bool cpu_has_bug_taa, retpoline_safe;
-+    bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe;
- 
-     hw_smt_enabled = check_smt_enabled();
- 
-@@ -1855,50 +1855,97 @@ void __init init_speculation_mitigations(void)
-             "enabled.  Please assess your configuration and choose an\n"
-             "explicit 'smt=<bool>' setting.  See XSA-273.\n");
- 
-+    /*
-+     * A brief summary of VERW-related changes.
-+     *
-+     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
-+     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
-+     *
-+     * Relevant ucodes:
-+     *
-+     * - May 2019, for MDS.  Introduces the MD_CLEAR CPUID bit and VERW side
-+     *   effects to scrub Store/Load/Fill buffers as applicable.  MD_CLEAR
-+     *   exists architecturally, even when the side effects have been removed.
-+     *
-+     *   Use VERW to scrub on return-to-guest.  Parts with L1D_FLUSH to
-+     *   mitigate L1TF have the same side effect, so no need to do both.
-+     *
-+     *   Various Atoms suffer from Store-buffer sampling only.  Store buffers
-+     *   are statically partitioned between non-idle threads, so scrubbing is
-+     *   wanted when going idle too.
-+     *
-+     *   Load ports and Fill buffers are competitively shared between threads.
-+     *   SMT must be disabled for VERW scrubbing to be fully effective.
-+     *
-+     * - November 2019, for TAA.  Extended VERW side effects to TSX-enabled
-+     *   MDS_NO parts.
-+     *
-+     * - February 2022, for Client TSX de-feature.  Removed VERW side effects
-+     *   from Client CPUs only.
-+     *
-+     * - May 2022, for MMIO Stale Data.  (Re)introduced Fill Buffer scrubbing
-+     *   on all MMIO-affected parts which didn't already have it for MDS
-+     *   reasons, enumerating FB_CLEAR on those parts only.
-+     *
-+     *   If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
-+     *   side effects as VERW and cannot be used in its place.
-+     */
-     mds_calculations();
- 
-     /*
--     * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
--     * reintroduced the VERW fill buffer flushing side effect because of a
--     * susceptibility to FBSDP.
-+     * Parts which enumerate FB_CLEAR are those with now-updated microcode
-+     * which weren't susceptible to the original MFBDS (and therefore didn't
-+     * have Fill Buffer scrubbing side effects to begin with, or were Client
-+     * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had
-+     * the scrubbing reintroduced because of a susceptibility to FBSDP.
-      *
-      * If unprivileged guests have (or will have) MMIO mappings, we can
-      * mitigate cross-domain leakage of fill buffer data by issuing VERW on
--     * the return-to-guest path.
-+     * the return-to-guest path.  This is only a token effort if SMT is
-+     * active.
-      */
-     if ( opt_unpriv_mmio )
-         opt_verw_mmio = cpu_has_fb_clear;
- 
-     /*
--     * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
--     * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
--     * but it is somewhat better than nothing.
-+     * MD_CLEAR is enumerated architecturally forevermore, even after the
-+     * scrubbing side effects have been removed.  Create ourselves an version
-+     * which expressed whether we think MD_CLEAR is having any useful side
-+     * effect.
-+     */
-+    cpu_has_useful_md_clear = (cpu_has_md_clear &&
-+                               (cpu_has_bug_mds || cpu_has_bug_msbds_only));
-+
-+    /*
-+     * By default, use VERW scrubbing on applicable hardware, if we think it's
-+     * going to have an effect.  This will only be a token effort for
-+     * MLPDS/MFBDS when SMT is enabled.
-      */
-     if ( opt_verw_pv == -1 )
--        opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
--                       cpu_has_md_clear);
-+        opt_verw_pv = cpu_has_useful_md_clear;
- 
-     if ( opt_verw_hvm == -1 )
--        opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
--                        cpu_has_md_clear);
-+        opt_verw_hvm = cpu_has_useful_md_clear;
- 
-     /*
--     * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
--     * either the PV or HVM MDS defences are used, or if we may give MMIO
--     * access to untrusted guests.
--     *
--     * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
--     * equivalent semantics to avoid needing to perform both flushes on the
--     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH (for
--     * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
--     *
--     * After calculating the appropriate idle setting, simplify
--     * opt_verw_hvm to mean just "should we VERW on the way into HVM
--     * guests", so spec_ctrl_init_domain() can calculate suitable settings.
-+     * If SMT is active, and we're protecting against MDS or MMIO stale data,
-+     * we need to scrub before going idle as well as on return to guest.
-+     * Various pipeline resources are repartitioned amongst non-idle threads.
-      */
--    if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
-+    if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
-+          opt_verw_mmio) && hw_smt_enabled )
-         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
--    opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
-+
-+    /*
-+     * After calculating the appropriate idle setting, simplify opt_verw_hvm
-+     * to mean just "should we VERW on the way into HVM guests", so
-+     * spec_ctrl_init_domain() can calculate suitable settings.
-+     *
-+     * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
-+     * only *_CLEAR we can see.
-+     */
-+    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
-+        opt_verw_hvm = false;
- 
-     /*
-      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
--- 
-2.44.0
-
-
-From 908cbd1893e80eb52b92b2c70c2bfd9ffdf6f77b Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 22 Jun 2023 23:32:19 +0100
-Subject: [PATCH 63/70] x86/spec-ctrl: Mitigation Register File Data Sampling
-
-RFDS affects Atom cores, also branded E-cores, between the Goldmont and
-Gracemont microarchitectures.  This includes Alder Lake and Raptor Lake hybrid
-clien systems which have a mix of Gracemont and other types of cores.
-
-Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side
-effets, and RFDS_NO to incidate that the system is unaffected.  Plenty of
-unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we
-synthesise it.  Alder Lake and Raptor Lake Xeon-E's are unaffected due to
-their platform configuration, and we must use the Hybrid CPUID bit to
-distinguish them from their non-Xeon counterparts.
-
-Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so
-set it in the max policies and reflect the host setting in default.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31)
----
- tools/misc/xen-cpuid.c                      |   5 +-
- xen/arch/x86/cpu-policy.c                   |   5 +
- xen/arch/x86/include/asm/cpufeature.h       |   3 +
- xen/arch/x86/include/asm/msr-index.h        |   2 +
- xen/arch/x86/spec_ctrl.c                    | 100 +++++++++++++++++++-
- xen/include/public/arch-x86/cpufeatureset.h |   3 +
- 6 files changed, 111 insertions(+), 7 deletions(-)
-
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index 7370f1b56e..52e451a806 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -172,7 +172,7 @@ static const char *const str_7d0[32] =
-     [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl",
-     [10] = "md-clear",            [11] = "rtm-always-abort",
-     /* 12 */                [13] = "tsx-force-abort",
--    [14] = "serialize",
-+    [14] = "serialize",     [15] = "hybrid",
-     [16] = "tsxldtrk",
-     [18] = "pconfig",
-     [20] = "cet-ibt",
-@@ -245,7 +245,8 @@ static const char *const str_m10Al[32] =
-     [20] = "bhi-no",              [21] = "xapic-status",
-     /* 22 */                      [23] = "ovrclk-status",
-     [24] = "pbrsb-no",            [25] = "gds-ctrl",
--    [26] = "gds-no",
-+    [26] = "gds-no",              [27] = "rfds-no",
-+    [28] = "rfds-clear",
- };
- 
- static const char *const str_m10Ah[32] =
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index c7c5e99b7b..12e621b97d 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -451,6 +451,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
-          */
-         __set_bit(X86_FEATURE_MD_CLEAR, fs);
-         __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+        __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
- 
-         /*
-          * The Gather Data Sampling microcode mitigation (August 2023) has an
-@@ -500,6 +501,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
-         if ( cpu_has_fb_clear )
-             __set_bit(X86_FEATURE_FB_CLEAR, fs);
- 
-+        __clear_bit(X86_FEATURE_RFDS_CLEAR, fs);
-+        if ( cpu_has_rfds_clear )
-+            __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
-+
-         /*
-          * The Gather Data Sampling microcode mitigation (August 2023) has an
-          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
-index 76ef2aeb1d..3c57f55de0 100644
---- a/xen/arch/x86/include/asm/cpufeature.h
-+++ b/xen/arch/x86/include/asm/cpufeature.h
-@@ -181,6 +181,7 @@ static inline bool boot_cpu_has(unsigned int feat)
- #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
- #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
- #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
-+#define cpu_has_hybrid          boot_cpu_has(X86_FEATURE_HYBRID)
- #define cpu_has_avx512_fp16     boot_cpu_has(X86_FEATURE_AVX512_FP16)
- #define cpu_has_arch_caps       boot_cpu_has(X86_FEATURE_ARCH_CAPS)
- 
-@@ -208,6 +209,8 @@ static inline bool boot_cpu_has(unsigned int feat)
- #define cpu_has_rrsba           boot_cpu_has(X86_FEATURE_RRSBA)
- #define cpu_has_gds_ctrl        boot_cpu_has(X86_FEATURE_GDS_CTRL)
- #define cpu_has_gds_no          boot_cpu_has(X86_FEATURE_GDS_NO)
-+#define cpu_has_rfds_no         boot_cpu_has(X86_FEATURE_RFDS_NO)
-+#define cpu_has_rfds_clear      boot_cpu_has(X86_FEATURE_RFDS_CLEAR)
- 
- /* Synthesized. */
- #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
-index 82a81bd0a2..85ef28a612 100644
---- a/xen/arch/x86/include/asm/msr-index.h
-+++ b/xen/arch/x86/include/asm/msr-index.h
-@@ -89,6 +89,8 @@
- #define  ARCH_CAPS_PBRSB_NO                 (_AC(1, ULL) << 24)
- #define  ARCH_CAPS_GDS_CTRL                 (_AC(1, ULL) << 25)
- #define  ARCH_CAPS_GDS_NO                   (_AC(1, ULL) << 26)
-+#define  ARCH_CAPS_RFDS_NO                  (_AC(1, ULL) << 27)
-+#define  ARCH_CAPS_RFDS_CLEAR               (_AC(1, ULL) << 28)
- 
- #define MSR_FLUSH_CMD                       0x0000010b
- #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index a4afcd8570..8165379fed 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -12,6 +12,7 @@
- 
- #include <asm/amd.h>
- #include <asm/hvm/svm/svm.h>
-+#include <asm/intel-family.h>
- #include <asm/microcode.h>
- #include <asm/msr.h>
- #include <asm/pv/domain.h>
-@@ -435,7 +436,7 @@ static void __init print_details(enum ind_thunk thunk)
-      * Hardware read-only information, stating immunity to certain issues, or
-      * suggestions of which mitigation to use.
-      */
--    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
-            (caps & ARCH_CAPS_EIBRS)                          ? " EIBRS"          : "",
-            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
-@@ -451,6 +452,7 @@ static void __init print_details(enum ind_thunk thunk)
-            (caps & ARCH_CAPS_FB_CLEAR)                       ? " FB_CLEAR"       : "",
-            (caps & ARCH_CAPS_PBRSB_NO)                       ? " PBRSB_NO"       : "",
-            (caps & ARCH_CAPS_GDS_NO)                         ? " GDS_NO"         : "",
-+           (caps & ARCH_CAPS_RFDS_NO)                        ? " RFDS_NO"        : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
-@@ -461,7 +463,7 @@ static void __init print_details(enum ind_thunk thunk)
-            (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO))        ? " SRSO_NO"        : "");
- 
-     /* Hardware features which need driving to mitigate issues. */
--    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBPB)) ||
-            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB))          ? " IBPB"           : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS)) ||
-@@ -479,6 +481,7 @@ static void __init print_details(enum ind_thunk thunk)
-            (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "",
-            (caps & ARCH_CAPS_FB_CLEAR_CTRL)                  ? " FB_CLEAR_CTRL"  : "",
-            (caps & ARCH_CAPS_GDS_CTRL)                       ? " GDS_CTRL"       : "",
-+           (caps & ARCH_CAPS_RFDS_CLEAR)                     ? " RFDS_CLEAR"     : "",
-            (e21a & cpufeat_mask(X86_FEATURE_SBPB))           ? " SBPB"           : "");
- 
-     /* Compiled-in support which pertains to mitigations. */
-@@ -1347,6 +1350,83 @@ static __init void mds_calculations(void)
-     }
- }
- 
-+/*
-+ * Register File Data Sampling affects Atom cores from the Goldmont to
-+ * Gracemont microarchitectures.  The March 2024 microcode adds RFDS_NO to
-+ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still
-+ * in support.
-+ *
-+ * Alder Lake and Raptor Lake client CPUs have a mix of P cores
-+ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont,
-+ * vulnerable), and both enumerate RFDS_CLEAR.
-+ *
-+ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by
-+ * platform configuration, and enumerate RFDS_NO.
-+ *
-+ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when
-+ * safe to do so.
-+ *
-+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
-+ */
-+static void __init rfds_calculations(void)
-+{
-+    /* RFDS is only known to affect Intel Family 6 processors at this time. */
-+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-+         boot_cpu_data.x86 != 6 )
-+        return;
-+
-+    /*
-+     * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable
-+     * microcode, or an RFDS-aware hypervisor is levelling us in a pool.
-+     */
-+    if ( cpu_has_rfds_no || cpu_has_rfds_clear )
-+        return;
-+
-+    /* If we're virtualised, don't attempt to synthesise RFDS_NO. */
-+    if ( cpu_has_hypervisor )
-+        return;
-+
-+    /*
-+     * Not all CPUs are expected to get a microcode update enumerating one of
-+     * RFDS_{NO,CLEAR}, or we might have out-of-date microcode.
-+     */
-+    switch ( boot_cpu_data.x86_model )
-+    {
-+    case INTEL_FAM6_ALDERLAKE:
-+    case INTEL_FAM6_RAPTORLAKE:
-+        /*
-+         * Alder Lake and Raptor Lake might be a client SKU (with the
-+         * Gracemont cores active, and therefore vulnerable) or might be a
-+         * server SKU (with the Gracemont cores disabled, and therefore not
-+         * vulnerable).
-+         *
-+         * See if the CPU identifies as hybrid to distinguish the two cases.
-+         */
-+        if ( !cpu_has_hybrid )
-+            break;
-+        fallthrough;
-+    case INTEL_FAM6_ALDERLAKE_L:
-+    case INTEL_FAM6_RAPTORLAKE_P:
-+    case INTEL_FAM6_RAPTORLAKE_S:
-+
-+    case INTEL_FAM6_ATOM_GOLDMONT:      /* Apollo Lake */
-+    case INTEL_FAM6_ATOM_GOLDMONT_D:    /* Denverton */
-+    case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */
-+    case INTEL_FAM6_ATOM_TREMONT_D:     /* Snow Ridge / Parker Ridge */
-+    case INTEL_FAM6_ATOM_TREMONT:       /* Elkhart Lake */
-+    case INTEL_FAM6_ATOM_TREMONT_L:     /* Jasper Lake */
-+    case INTEL_FAM6_ATOM_GRACEMONT:     /* Alder Lake N */
-+        return;
-+    }
-+
-+    /*
-+     * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO,
-+     * perhaps because of it's age or because of out-of-date microcode.
-+     * Synthesise it.
-+     */
-+    setup_force_cpu_cap(X86_FEATURE_RFDS_NO);
-+}
-+
- static bool __init cpu_has_gds(void)
- {
-     /*
-@@ -1860,6 +1940,7 @@ void __init init_speculation_mitigations(void)
-      *
-      * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
-      * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
-+     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
-      *
-      * Relevant ucodes:
-      *
-@@ -1889,8 +1970,12 @@ void __init init_speculation_mitigations(void)
-      *
-      *   If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
-      *   side effects as VERW and cannot be used in its place.
-+     *
-+     * - March 2023, for RFDS.  Enumerate RFDS_CLEAR to mean that VERW now
-+     *   scrubs non-architectural entries from certain register files.
-      */
-     mds_calculations();
-+    rfds_calculations();
- 
-     /*
-      * Parts which enumerate FB_CLEAR are those with now-updated microcode
-@@ -1922,15 +2007,19 @@ void __init init_speculation_mitigations(void)
-      * MLPDS/MFBDS when SMT is enabled.
-      */
-     if ( opt_verw_pv == -1 )
--        opt_verw_pv = cpu_has_useful_md_clear;
-+        opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear;
- 
-     if ( opt_verw_hvm == -1 )
--        opt_verw_hvm = cpu_has_useful_md_clear;
-+        opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear;
- 
-     /*
-      * If SMT is active, and we're protecting against MDS or MMIO stale data,
-      * we need to scrub before going idle as well as on return to guest.
-      * Various pipeline resources are repartitioned amongst non-idle threads.
-+     *
-+     * We don't need to scrub on idle for RFDS.  There are no affected cores
-+     * which support SMT, despite there being affected cores in hybrid systems
-+     * which have SMT elsewhere in the platform.
-      */
-     if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
-           opt_verw_mmio) && hw_smt_enabled )
-@@ -1944,7 +2033,8 @@ void __init init_speculation_mitigations(void)
-      * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
-      * only *_CLEAR we can see.
-      */
--    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
-+    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear &&
-+         !cpu_has_rfds_clear )
-         opt_verw_hvm = false;
- 
-     /*
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 337aaa9c77..8e17ef670f 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -266,6 +266,7 @@ XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*!A VERW clears microarchitectural buffe
- XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
- XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
- XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*A  SERIALIZE insn */
-+XEN_CPUFEATURE(HYBRID,        9*32+15) /*   Heterogeneous platform */
- XEN_CPUFEATURE(TSXLDTRK,      9*32+16) /*a  TSX load tracking suspend/resume insns */
- XEN_CPUFEATURE(CET_IBT,       9*32+20) /*   CET - Indirect Branch Tracking */
- XEN_CPUFEATURE(AVX512_FP16,   9*32+23) /*A  AVX512 FP16 instructions */
-@@ -338,6 +339,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS,      16*32+23) /*   MSR_OVERCLOCKING_STATUS */
- XEN_CPUFEATURE(PBRSB_NO,           16*32+24) /*A  No Post-Barrier RSB predictions */
- XEN_CPUFEATURE(GDS_CTRL,           16*32+25) /*   MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */
- XEN_CPUFEATURE(GDS_NO,             16*32+26) /*A  No Gather Data Sampling */
-+XEN_CPUFEATURE(RFDS_NO,            16*32+27) /*A  No Register File Data Sampling */
-+XEN_CPUFEATURE(RFDS_CLEAR,         16*32+28) /*!A Register File(s) cleared by VERW */
- 
- /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
- 
--- 
-2.44.0
-
-
-From bdda600406e5f5c35bcb17b2f9458e2138d7ad46 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 2 Feb 2024 00:39:42 +0000
-Subject: [PATCH 64/70] xen: Swap order of actions in the FREE*() macros
-
-Wherever possible, it is a good idea to NULL out the visible reference to an
-object prior to freeing it.  The FREE*() macros already collect together both
-parts, making it easy to adjust.
-
-This has a marginal code generation improvement, as some of the calls to the
-free() function can be tailcall optimised.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e)
----
- xen/include/xen/mm.h      | 3 ++-
- xen/include/xen/xmalloc.h | 7 ++++---
- 2 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
-index 8b9618609f..8bc5f4249d 100644
---- a/xen/include/xen/mm.h
-+++ b/xen/include/xen/mm.h
-@@ -91,8 +91,9 @@ bool scrub_free_pages(void);
- 
- /* Free an allocation, and zero the pointer to it. */
- #define FREE_XENHEAP_PAGES(p, o) do { \
--    free_xenheap_pages(p, o);         \
-+    void *_ptr_ = (p);                \
-     (p) = NULL;                       \
-+    free_xenheap_pages(_ptr_, o);     \
- } while ( false )
- #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0)
- 
-diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
-index 16979a117c..d857298011 100644
---- a/xen/include/xen/xmalloc.h
-+++ b/xen/include/xen/xmalloc.h
-@@ -66,9 +66,10 @@
- extern void xfree(void *);
- 
- /* Free an allocation, and zero the pointer to it. */
--#define XFREE(p) do { \
--    xfree(p);         \
--    (p) = NULL;       \
-+#define XFREE(p) do {                       \
-+    void *_ptr_ = (p);                      \
-+    (p) = NULL;                             \
-+    xfree(_ptr_);                           \
- } while ( false )
- 
- /* Underlying functions */
--- 
-2.44.0
-
-
-From 1932973ac9a8c28197ebb24749c73c18cf23f5f1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 13:08:05 +0100
-Subject: [PATCH 65/70] x86/spinlock: introduce support for blocking
- speculation into critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce a new Kconfig option to block speculation into lock protected
-critical regions.  The Kconfig option is enabled by default, but the mitigation
-won't be engaged unless it's explicitly enabled in the command line using
-`spec-ctrl=lock-harden`.
-
-Convert the spinlock acquire macros into always-inline functions, and introduce
-a speculation barrier after the lock has been taken.  Note the speculation
-barrier is not placed inside the implementation of the spin lock functions, as
-to prevent speculation from falling through the call to the lock functions
-resulting in the barrier also being skipped.
-
-trylock variants are protected using a construct akin to the existing
-evaluate_nospec().
-
-This patch only implements the speculation barrier for x86.
-
-Note spin locks are the only locking primitive taken care in this change,
-further locking primitives will be adjusted by separate changes.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa)
----
- docs/misc/xen-command-line.pandoc      |  7 ++++-
- xen/arch/x86/include/asm/cpufeatures.h |  2 +-
- xen/arch/x86/include/asm/nospec.h      | 26 ++++++++++++++++++
- xen/arch/x86/spec_ctrl.c               | 26 +++++++++++++++---
- xen/common/Kconfig                     | 17 ++++++++++++
- xen/include/xen/nospec.h               | 15 +++++++++++
- xen/include/xen/spinlock.h             | 37 +++++++++++++++++++++-----
- 7 files changed, 119 insertions(+), 11 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index fbf1683924..3f9f916718 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2373,7 +2373,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- >              {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- >              eager-fpu,l1d-flush,branch-harden,srb-lock,
-->              unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
-+>              unpriv-mmio,gds-mit,div-scrub,lock-harden}=<bool> ]`
- 
- Controls for speculative execution sidechannel mitigations.  By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2500,6 +2500,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen
- from mitigating the DIV-leakage vulnerability.  By default, Xen will mitigate
- DIV-leakage on hardware believed to be vulnerable.
- 
-+If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=`
-+boolean can be used to force or prevent Xen from using speculation barriers to
-+protect lock critical regions.  This mitigation won't be engaged by default,
-+and needs to be explicitly enabled on the command line.
-+
- ### sync_console
- > `= <boolean>`
- 
-diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
-index c3aad21c3b..7e8221fd85 100644
---- a/xen/arch/x86/include/asm/cpufeatures.h
-+++ b/xen/arch/x86/include/asm/cpufeatures.h
-@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF,        X86_SYNTH( 8)) /* APERFMPERF */
- XEN_CPUFEATURE(MFENCE_RDTSC,      X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */
- XEN_CPUFEATURE(XEN_SMEP,          X86_SYNTH(10)) /* SMEP gets used by Xen itself */
- XEN_CPUFEATURE(XEN_SMAP,          X86_SYNTH(11)) /* SMAP gets used by Xen itself */
--/* Bit 12 unused. */
-+XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */
- XEN_CPUFEATURE(IND_THUNK_LFENCE,  X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */
- XEN_CPUFEATURE(IND_THUNK_JMP,     X86_SYNTH(14)) /* Use IND_THUNK_JMP */
- XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */
-diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h
-index 7150e76b87..0725839e19 100644
---- a/xen/arch/x86/include/asm/nospec.h
-+++ b/xen/arch/x86/include/asm/nospec.h
-@@ -38,6 +38,32 @@ static always_inline void block_speculation(void)
-     barrier_nospec_true();
- }
- 
-+static always_inline void arch_block_lock_speculation(void)
-+{
-+    alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+}
-+
-+/* Allow to insert a read memory barrier into conditionals */
-+static always_inline bool barrier_lock_true(void)
-+{
-+    alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+    return true;
-+}
-+
-+static always_inline bool barrier_lock_false(void)
-+{
-+    alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+    return false;
-+}
-+
-+static always_inline bool arch_lock_evaluate_nospec(bool condition)
-+{
-+    if ( condition )
-+        return barrier_lock_true();
-+    else
-+        return barrier_lock_false();
-+}
-+
- #endif /* _ASM_X86_NOSPEC_H */
- 
- /*
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 8165379fed..5dfc4ed69e 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -53,6 +53,7 @@ int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
- static bool __initdata opt_branch_harden =
-     IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
-+static bool __initdata opt_lock_harden;
- 
- bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
-@@ -121,6 +122,7 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-             opt_ssbd = false;
-             opt_l1d_flush = 0;
-             opt_branch_harden = false;
-+            opt_lock_harden = false;
-             opt_srb_lock = 0;
-             opt_unpriv_mmio = false;
-             opt_gds_mit = 0;
-@@ -286,6 +288,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
-                 rc = -EINVAL;
-             }
-         }
-+        else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 )
-+        {
-+            if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
-+                opt_lock_harden = val;
-+            else
-+            {
-+                no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss);
-+                rc = -EINVAL;
-+            }
-+        }
-         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
-             opt_srb_lock = val;
-         else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
-@@ -488,7 +500,8 @@ static void __init print_details(enum ind_thunk thunk)
-     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
-          IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
-          IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
--         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
-+         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ||
-+         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
-         printk("  Compiled-in support:"
- #ifdef CONFIG_INDIRECT_THUNK
-                " INDIRECT_THUNK"
-@@ -504,11 +517,14 @@ static void __init print_details(enum ind_thunk thunk)
- #endif
- #ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
-                " HARDEN_GUEST_ACCESS"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+               " HARDEN_LOCK"
- #endif
-                "\n");
- 
-     /* Settings for Xen's protection, irrespective of guests. */
--    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
-+    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n",
-            thunk != THUNK_NONE      ? "BTI-Thunk: " : "",
-            thunk == THUNK_NONE      ? "" :
-            thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
-@@ -535,7 +551,8 @@ static void __init print_details(enum ind_thunk thunk)
-            opt_verw_pv || opt_verw_hvm ||
-            opt_verw_mmio                             ? " VERW"  : "",
-            opt_div_scrub                             ? " DIV" : "",
--           opt_branch_harden                         ? " BRANCH_HARDEN" : "");
-+           opt_branch_harden                         ? " BRANCH_HARDEN" : "",
-+           opt_lock_harden                           ? " LOCK_HARDEN" : "");
- 
-     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
-     if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
-@@ -1918,6 +1935,9 @@ void __init init_speculation_mitigations(void)
-     if ( !opt_branch_harden )
-         setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN);
- 
-+    if ( !opt_lock_harden )
-+        setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN);
-+
-     /*
-      * We do not disable HT by default on affected hardware.
-      *
-diff --git a/xen/common/Kconfig b/xen/common/Kconfig
-index 4d6fe05164..3361a6d892 100644
---- a/xen/common/Kconfig
-+++ b/xen/common/Kconfig
-@@ -188,6 +188,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS
- 
- 	  If unsure, say Y.
- 
-+config SPECULATIVE_HARDEN_LOCK
-+	bool "Speculative lock context hardening"
-+	default y
-+	depends on X86
-+	help
-+	  Contemporary processors may use speculative execution as a
-+	  performance optimisation, but this can potentially be abused by an
-+	  attacker to leak data via speculative sidechannels.
-+
-+	  One source of data leakage is via speculative accesses to lock
-+	  critical regions.
-+
-+	  This option is disabled by default at run time, and needs to be
-+	  enabled on the command line.
-+
-+	  If unsure, say Y.
-+
- endmenu
- 
- config DIT_DEFAULT
-diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
-index 76255bc46e..4552846403 100644
---- a/xen/include/xen/nospec.h
-+++ b/xen/include/xen/nospec.h
-@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
- #define array_access_nospec(array, index)                               \
-     (array)[array_index_nospec(index, ARRAY_SIZE(array))]
- 
-+static always_inline void block_lock_speculation(void)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+    arch_block_lock_speculation();
-+#endif
-+}
-+
-+static always_inline bool lock_evaluate_nospec(bool condition)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+    return arch_lock_evaluate_nospec(condition);
-+#endif
-+    return condition;
-+}
-+
- #endif /* XEN_NOSPEC_H */
- 
- /*
-diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
-index e7a1c1aa89..28fce5615e 100644
---- a/xen/include/xen/spinlock.h
-+++ b/xen/include/xen/spinlock.h
-@@ -1,6 +1,7 @@
- #ifndef __SPINLOCK_H__
- #define __SPINLOCK_H__
- 
-+#include <xen/nospec.h>
- #include <xen/time.h>
- #include <xen/types.h>
- 
-@@ -195,13 +196,30 @@ int _spin_trylock_recursive(spinlock_t *lock);
- void _spin_lock_recursive(spinlock_t *lock);
- void _spin_unlock_recursive(spinlock_t *lock);
- 
--#define spin_lock(l)                  _spin_lock(l)
--#define spin_lock_cb(l, c, d)         _spin_lock_cb(l, c, d)
--#define spin_lock_irq(l)              _spin_lock_irq(l)
-+static always_inline void spin_lock(spinlock_t *l)
-+{
-+    _spin_lock(l);
-+    block_lock_speculation();
-+}
-+
-+static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data),
-+                                       void *d)
-+{
-+    _spin_lock_cb(l, c, d);
-+    block_lock_speculation();
-+}
-+
-+static always_inline void spin_lock_irq(spinlock_t *l)
-+{
-+    _spin_lock_irq(l);
-+    block_lock_speculation();
-+}
-+
- #define spin_lock_irqsave(l, f)                                 \
-     ({                                                          \
-         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
-         ((f) = _spin_lock_irqsave(l));                          \
-+        block_lock_speculation();                               \
-     })
- 
- #define spin_unlock(l)                _spin_unlock(l)
-@@ -209,7 +227,7 @@ void _spin_unlock_recursive(spinlock_t *lock);
- #define spin_unlock_irqrestore(l, f)  _spin_unlock_irqrestore(l, f)
- 
- #define spin_is_locked(l)             _spin_is_locked(l)
--#define spin_trylock(l)               _spin_trylock(l)
-+#define spin_trylock(l)               lock_evaluate_nospec(_spin_trylock(l))
- 
- #define spin_trylock_irqsave(lock, flags)       \
- ({                                              \
-@@ -230,8 +248,15 @@ void _spin_unlock_recursive(spinlock_t *lock);
-  * are any critical regions that cannot form part of such a set, they can use
-  * standard spin_[un]lock().
-  */
--#define spin_trylock_recursive(l)     _spin_trylock_recursive(l)
--#define spin_lock_recursive(l)        _spin_lock_recursive(l)
-+#define spin_trylock_recursive(l) \
-+    lock_evaluate_nospec(_spin_trylock_recursive(l))
-+
-+static always_inline void spin_lock_recursive(spinlock_t *l)
-+{
-+    _spin_lock_recursive(l);
-+    block_lock_speculation();
-+}
-+
- #define spin_unlock_recursive(l)      _spin_unlock_recursive(l)
- 
- #endif /* __SPINLOCK_H__ */
--- 
-2.44.0
-
-
-From e7f0f11c888757e62940ded87b4ab5ebc992764f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 16:08:52 +0100
-Subject: [PATCH 66/70] rwlock: introduce support for blocking speculation into
- critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce inline wrappers as required and add direct calls to
-block_lock_speculation() in order to prevent speculation into the rwlock
-protected critical regions.
-
-Note the rwlock primitives are adjusted to use the non speculation safe variants
-of the spinlock handlers, as a speculation barrier is added in the rwlock
-calling wrappers.
-
-trylock variants are protected by using lock_evaluate_nospec().
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59)
----
- xen/common/rwlock.c      | 14 +++++++++++---
- xen/include/xen/rwlock.h | 34 ++++++++++++++++++++++++++++------
- 2 files changed, 39 insertions(+), 9 deletions(-)
-
-diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
-index 18224a4bb5..290602936d 100644
---- a/xen/common/rwlock.c
-+++ b/xen/common/rwlock.c
-@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock)
- 
-     /*
-      * Put the reader into the wait queue.
-+     *
-+     * Use the speculation unsafe helper, as it's the caller responsibility to
-+     * issue a speculation barrier if required.
-      */
--    spin_lock(&lock->lock);
-+    _spin_lock(&lock->lock);
- 
-     /*
-      * At the head of the wait queue now, wait until the writer state
-@@ -66,8 +69,13 @@ void queue_write_lock_slowpath(rwlock_t *lock)
- {
-     u32 cnts;
- 
--    /* Put the writer into the wait queue. */
--    spin_lock(&lock->lock);
-+    /*
-+     * Put the writer into the wait queue.
-+     *
-+     * Use the speculation unsafe helper, as it's the caller responsibility to
-+     * issue a speculation barrier if required.
-+     */
-+    _spin_lock(&lock->lock);
- 
-     /* Try to acquire the lock directly if no reader is present. */
-     if ( !atomic_read(&lock->cnts) &&
-diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
-index e0d2b41c5c..9a0d3ec238 100644
---- a/xen/include/xen/rwlock.h
-+++ b/xen/include/xen/rwlock.h
-@@ -259,27 +259,49 @@ static inline int _rw_is_write_locked(const rwlock_t *lock)
-     return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
- }
- 
--#define read_lock(l)                  _read_lock(l)
--#define read_lock_irq(l)              _read_lock_irq(l)
-+static always_inline void read_lock(rwlock_t *l)
-+{
-+    _read_lock(l);
-+    block_lock_speculation();
-+}
-+
-+static always_inline void read_lock_irq(rwlock_t *l)
-+{
-+    _read_lock_irq(l);
-+    block_lock_speculation();
-+}
-+
- #define read_lock_irqsave(l, f)                                 \
-     ({                                                          \
-         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
-         ((f) = _read_lock_irqsave(l));                          \
-+        block_lock_speculation();                               \
-     })
- 
- #define read_unlock(l)                _read_unlock(l)
- #define read_unlock_irq(l)            _read_unlock_irq(l)
- #define read_unlock_irqrestore(l, f)  _read_unlock_irqrestore(l, f)
--#define read_trylock(l)               _read_trylock(l)
-+#define read_trylock(l)               lock_evaluate_nospec(_read_trylock(l))
-+
-+static always_inline void write_lock(rwlock_t *l)
-+{
-+    _write_lock(l);
-+    block_lock_speculation();
-+}
-+
-+static always_inline void write_lock_irq(rwlock_t *l)
-+{
-+    _write_lock_irq(l);
-+    block_lock_speculation();
-+}
- 
--#define write_lock(l)                 _write_lock(l)
--#define write_lock_irq(l)             _write_lock_irq(l)
- #define write_lock_irqsave(l, f)                                \
-     ({                                                          \
-         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
-         ((f) = _write_lock_irqsave(l));                         \
-+        block_lock_speculation();                               \
-     })
--#define write_trylock(l)              _write_trylock(l)
-+#define write_trylock(l)              lock_evaluate_nospec(_write_trylock(l))
- 
- #define write_unlock(l)               _write_unlock(l)
- #define write_unlock_irq(l)           _write_unlock_irq(l)
--- 
-2.44.0
-
-
-From 5a13c81542a163718d7cb9b150b0282b7855efde Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 17:57:38 +0100
-Subject: [PATCH 67/70] percpu-rwlock: introduce support for blocking
- speculation into critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add direct calls to block_lock_speculation() where required in order to prevent
-speculation into the lock protected critical regions.  Also convert
-_percpu_read_lock() from inline to always_inline.
-
-Note that _percpu_write_lock() has been modified the use the non speculation
-safe of the locking primites, as a speculation is added unconditionally by the
-calling wrapper.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441)
----
- xen/common/rwlock.c      |  6 +++++-
- xen/include/xen/rwlock.h | 14 ++++++++++----
- 2 files changed, 15 insertions(+), 5 deletions(-)
-
-diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
-index 290602936d..f5a249bcc2 100644
---- a/xen/common/rwlock.c
-+++ b/xen/common/rwlock.c
-@@ -129,8 +129,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
-     /*
-      * First take the write lock to protect against other writers or slow
-      * path readers.
-+     *
-+     * Note we use the speculation unsafe variant of write_lock(), as the
-+     * calling wrapper already adds a speculation barrier after the lock has
-+     * been taken.
-      */
--    write_lock(&percpu_rwlock->rwlock);
-+    _write_lock(&percpu_rwlock->rwlock);
- 
-     /* Now set the global variable so that readers start using read_lock. */
-     percpu_rwlock->writer_activating = 1;
-diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
-index 9a0d3ec238..9e35ee2edf 100644
---- a/xen/include/xen/rwlock.h
-+++ b/xen/include/xen/rwlock.h
-@@ -338,8 +338,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata,
- #define percpu_rwlock_resource_init(l, owner) \
-     (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner)))
- 
--static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
--                                         percpu_rwlock_t *percpu_rwlock)
-+static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
-+                                            percpu_rwlock_t *percpu_rwlock)
- {
-     /* Validate the correct per_cpudata variable has been provided. */
-     _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock);
-@@ -374,6 +374,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
-     }
-     else
-     {
-+        /* Other branch already has a speculation barrier in read_lock(). */
-+        block_lock_speculation();
-         /* All other paths have implicit check_lock() calls via read_lock(). */
-         check_lock(&percpu_rwlock->rwlock.lock.debug, false);
-     }
-@@ -430,8 +432,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,
-     _percpu_read_lock(&get_per_cpu_var(percpu), lock)
- #define percpu_read_unlock(percpu, lock) \
-     _percpu_read_unlock(&get_per_cpu_var(percpu), lock)
--#define percpu_write_lock(percpu, lock) \
--    _percpu_write_lock(&get_per_cpu_var(percpu), lock)
-+
-+#define percpu_write_lock(percpu, lock)                 \
-+({                                                      \
-+    _percpu_write_lock(&get_per_cpu_var(percpu), lock); \
-+    block_lock_speculation();                           \
-+})
- #define percpu_write_unlock(percpu, lock) \
-     _percpu_write_unlock(&get_per_cpu_var(percpu), lock)
- 
--- 
-2.44.0
-
-
-From 9de8a52b0e09a2491736abbd4a865a06ac2ced7a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 14:29:36 +0100
-Subject: [PATCH 68/70] locking: attempt to ensure lock wrappers are always
- inline
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-In order to prevent the locking speculation barriers from being inside of
-`call`ed functions that could be speculatively bypassed.
-
-While there also add an extra locking barrier to _mm_write_lock() in the branch
-taken when the lock is already held.
-
-Note some functions are switched to use the unsafe variants (without speculation
-barrier) of the locking primitives, but a speculation barrier is always added
-to the exposed public lock wrapping helper.  That's the case with
-sched_spin_lock_double() or pcidevs_lock() for example.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762)
----
- xen/arch/x86/hvm/vpt.c         | 10 +++++++---
- xen/arch/x86/include/asm/irq.h |  1 +
- xen/arch/x86/mm/mm-locks.h     | 28 +++++++++++++++-------------
- xen/arch/x86/mm/p2m-pod.c      |  2 +-
- xen/common/event_channel.c     |  5 +++--
- xen/common/grant_table.c       |  6 +++---
- xen/common/sched/core.c        | 19 ++++++++++++-------
- xen/common/sched/private.h     | 26 ++++++++++++++++++++++++--
- xen/common/timer.c             |  8 +++++---
- xen/drivers/passthrough/pci.c  |  5 +++--
- xen/include/xen/event.h        |  4 ++--
- xen/include/xen/pci.h          |  8 ++++++--
- 12 files changed, 82 insertions(+), 40 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
-index 8f53e88d67..e1d6845a28 100644
---- a/xen/arch/x86/hvm/vpt.c
-+++ b/xen/arch/x86/hvm/vpt.c
-@@ -150,7 +150,7 @@ static int pt_irq_masked(struct periodic_time *pt)
-  * pt->vcpu field, because another thread holding the pt_migrate lock
-  * may already be spinning waiting for your vcpu lock.
-  */
--static void pt_vcpu_lock(struct vcpu *v)
-+static always_inline void pt_vcpu_lock(struct vcpu *v)
- {
-     spin_lock(&v->arch.hvm.tm_lock);
- }
-@@ -169,9 +169,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
-  * need to take an additional lock that protects against pt->vcpu
-  * changing.
-  */
--static void pt_lock(struct periodic_time *pt)
-+static always_inline void pt_lock(struct periodic_time *pt)
- {
--    read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
-+    /*
-+     * Use the speculation unsafe variant for the first lock, as the following
-+     * lock taking helper already includes a speculation barrier.
-+     */
-+    _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
-     spin_lock(&pt->vcpu->arch.hvm.tm_lock);
- }
- 
-diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
-index a87af47ece..465ab39bb0 100644
---- a/xen/arch/x86/include/asm/irq.h
-+++ b/xen/arch/x86/include/asm/irq.h
-@@ -174,6 +174,7 @@ void cf_check irq_complete_move(struct irq_desc *desc);
- 
- extern struct irq_desc *irq_desc;
- 
-+/* Not speculation safe, only used for AP bringup. */
- void lock_vector_lock(void);
- void unlock_vector_lock(void);
- 
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index 5a3f96fbaa..5ec080c02f 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -74,8 +74,8 @@ static inline void _set_lock_level(int l)
-     this_cpu(mm_lock_level) = l;
- }
- 
--static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
--                            const char *func, int level, int rec)
-+static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
-+                                   const char *func, int level, int rec)
- {
-     if ( !((mm_locked_by_me(l)) && rec) )
-         _check_lock_level(d, level);
-@@ -125,8 +125,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
-     return (l->locker == get_processor_id());
- }
- 
--static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
--                                  const char *func, int level)
-+static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
-+                                         const char *func, int level)
- {
-     if ( !mm_write_locked_by_me(l) )
-     {
-@@ -137,6 +137,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
-         l->unlock_level = _get_lock_level();
-         _set_lock_level(_lock_level(d, level));
-     }
-+    else
-+        block_speculation();
-     l->recurse_count++;
- }
- 
-@@ -150,8 +152,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
-     percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
- }
- 
--static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
--                                 int level)
-+static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
-+                                        int level)
- {
-     _check_lock_level(d, level);
-     percpu_read_lock(p2m_percpu_rwlock, &l->lock);
-@@ -166,15 +168,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
- 
- /* This wrapper uses the line number to express the locking order below */
- #define declare_mm_lock(name)                                                 \
--    static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l,   \
--                                      const char *func, int rec)              \
-+    static always_inline void mm_lock_##name(                                 \
-+        const struct domain *d, mm_lock_t *l, const char *func, int rec)      \
-     { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
- #define declare_mm_rwlock(name)                                               \
--    static inline void mm_write_lock_##name(const struct domain *d,           \
--                                            mm_rwlock_t *l, const char *func) \
-+    static always_inline void mm_write_lock_##name(                           \
-+        const struct domain *d, mm_rwlock_t *l, const char *func)             \
-     { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); }                     \
--    static inline void mm_read_lock_##name(const struct domain *d,            \
--                                           mm_rwlock_t *l)                    \
-+    static always_inline void mm_read_lock_##name(const struct domain *d,     \
-+                                                  mm_rwlock_t *l)             \
-     { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
- /* These capture the name of the calling function */
- #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
-@@ -309,7 +311,7 @@ declare_mm_lock(altp2mlist)
- #define MM_LOCK_ORDER_altp2m                 40
- declare_mm_rwlock(altp2m);
- 
--static inline void p2m_lock(struct p2m_domain *p)
-+static always_inline void p2m_lock(struct p2m_domain *p)
- {
-     if ( p2m_is_altp2m(p) )
-         mm_write_lock(altp2m, p->domain, &p->lock);
-diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
-index 9969eb45fa..9be67b63ce 100644
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -24,7 +24,7 @@
- #define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
- 
- /* Enforce lock ordering when grabbing the "external" page_alloc lock */
--static inline void lock_page_alloc(struct p2m_domain *p2m)
-+static always_inline void lock_page_alloc(struct p2m_domain *p2m)
- {
-     page_alloc_mm_pre_lock(p2m->domain);
-     spin_lock(&(p2m->domain->page_alloc_lock));
-diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
-index a7a004a084..66f924a7b0 100644
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -45,7 +45,7 @@
-  * just assume the event channel is free or unbound at the moment when the
-  * evtchn_read_trylock() returns false.
-  */
--static inline void evtchn_write_lock(struct evtchn *evtchn)
-+static always_inline void evtchn_write_lock(struct evtchn *evtchn)
- {
-     write_lock(&evtchn->lock);
- 
-@@ -351,7 +351,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
-     return rc;
- }
- 
--static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
-+static always_inline void double_evtchn_lock(struct evtchn *lchn,
-+                                             struct evtchn *rchn)
- {
-     ASSERT(lchn != rchn);
- 
-diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
-index 89b7811c51..934924cbda 100644
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -403,7 +403,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
- 
- static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
- 
--static inline void grant_read_lock(struct grant_table *gt)
-+static always_inline void grant_read_lock(struct grant_table *gt)
- {
-     percpu_read_lock(grant_rwlock, &gt->lock);
- }
-@@ -413,7 +413,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
-     percpu_read_unlock(grant_rwlock, &gt->lock);
- }
- 
--static inline void grant_write_lock(struct grant_table *gt)
-+static always_inline void grant_write_lock(struct grant_table *gt)
- {
-     percpu_write_lock(grant_rwlock, &gt->lock);
- }
-@@ -450,7 +450,7 @@ nr_active_grant_frames(struct grant_table *gt)
-     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
- }
- 
--static inline struct active_grant_entry *
-+static always_inline struct active_grant_entry *
- active_entry_acquire(struct grant_table *t, grant_ref_t e)
- {
-     struct active_grant_entry *act;
-diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
-index 901782bbb4..34ad39b9ad 100644
---- a/xen/common/sched/core.c
-+++ b/xen/common/sched/core.c
-@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
-  * This avoids dead- or live-locks when this code is running on both
-  * cpus at the same time.
-  */
--static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
--                                   unsigned long *flags)
-+static always_inline void sched_spin_lock_double(
-+    spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
- {
-+    /*
-+     * In order to avoid extra overhead, use the locking primitives without the
-+     * speculation barrier, and introduce a single barrier here.
-+     */
-     if ( lock1 == lock2 )
-     {
--        spin_lock_irqsave(lock1, *flags);
-+        *flags = _spin_lock_irqsave(lock1);
-     }
-     else if ( lock1 < lock2 )
-     {
--        spin_lock_irqsave(lock1, *flags);
--        spin_lock(lock2);
-+        *flags = _spin_lock_irqsave(lock1);
-+        _spin_lock(lock2);
-     }
-     else
-     {
--        spin_lock_irqsave(lock2, *flags);
--        spin_lock(lock1);
-+        *flags = _spin_lock_irqsave(lock2);
-+        _spin_lock(lock1);
-     }
-+    block_lock_speculation();
- }
- 
- static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
-diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
-index c516976c37..3b97f15767 100644
---- a/xen/common/sched/private.h
-+++ b/xen/common/sched/private.h
-@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
- #define cpumask_scratch        (&this_cpu(cpumask_scratch))
- #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
- 
-+/*
-+ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
-+ * it in a passed parameter.
-+ */
-+#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
-+#define _sched_spinlock1(lock, irq, arg) ({ \
-+    BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
-+    (arg) = _spin_lock##irq(lock); \
-+})
-+
-+#define _sched_spinlock__(nr) _sched_spinlock ## nr
-+#define _sched_spinlock_(nr)  _sched_spinlock__(nr)
-+#define _sched_spinlock(lock, irq, args...) \
-+    _sched_spinlock_(count_args(args))(lock, irq, ## args)
-+
- #define sched_lock(kind, param, cpu, irq, arg...) \
--static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
-+static always_inline spinlock_t \
-+*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
- { \
-     for ( ; ; ) \
-     { \
-@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
-          * \
-          * It may also be the case that v->processor may change but the \
-          * lock may be the same; this will succeed in that case. \
-+         * \
-+         * Use the speculation unsafe locking helper, there's a speculation \
-+         * barrier before returning to the caller. \
-          */ \
--        spin_lock##irq(lock, ## arg); \
-+        _sched_spinlock(lock, irq, ## arg); \
-         if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
-+        { \
-+            block_lock_speculation(); \
-             return lock; \
-+        } \
-         spin_unlock##irq(lock, ## arg); \
-     } \
- }
-diff --git a/xen/common/timer.c b/xen/common/timer.c
-index 0fddfa7487..38eb5fd20d 100644
---- a/xen/common/timer.c
-+++ b/xen/common/timer.c
-@@ -239,7 +239,7 @@ static inline void deactivate_timer(struct timer *timer)
-     list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
- }
- 
--static inline bool_t timer_lock(struct timer *timer)
-+static inline bool_t timer_lock_unsafe(struct timer *timer)
- {
-     unsigned int cpu;
- 
-@@ -253,7 +253,8 @@ static inline bool_t timer_lock(struct timer *timer)
-             rcu_read_unlock(&timer_cpu_read_lock);
-             return 0;
-         }
--        spin_lock(&per_cpu(timers, cpu).lock);
-+        /* Use the speculation unsafe variant, the wrapper has the barrier. */
-+        _spin_lock(&per_cpu(timers, cpu).lock);
-         if ( likely(timer->cpu == cpu) )
-             break;
-         spin_unlock(&per_cpu(timers, cpu).lock);
-@@ -266,8 +267,9 @@ static inline bool_t timer_lock(struct timer *timer)
- #define timer_lock_irqsave(t, flags) ({         \
-     bool_t __x;                                 \
-     local_irq_save(flags);                      \
--    if ( !(__x = timer_lock(t)) )               \
-+    if ( !(__x = timer_lock_unsafe(t)) )        \
-         local_irq_restore(flags);               \
-+    block_lock_speculation();                   \
-     __x;                                        \
- })
- 
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index e99837b6e1..2a1e7ee89a 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -52,9 +52,10 @@ struct pci_seg {
- 
- static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
- 
--void pcidevs_lock(void)
-+/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
-+void pcidevs_lock_unsafe(void)
- {
--    spin_lock_recursive(&_pcidevs_lock);
-+    _spin_lock_recursive(&_pcidevs_lock);
- }
- 
- void pcidevs_unlock(void)
-diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
-index 8e509e0784..f1472ea1eb 100644
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
- #define bucket_from_port(d, p) \
-     ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
- 
--static inline void evtchn_read_lock(struct evtchn *evtchn)
-+static always_inline void evtchn_read_lock(struct evtchn *evtchn)
- {
-     read_lock(&evtchn->lock);
- }
- 
--static inline bool evtchn_read_trylock(struct evtchn *evtchn)
-+static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
- {
-     return read_trylock(&evtchn->lock);
- }
-diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
-index 251b8761a8..a71bed36be 100644
---- a/xen/include/xen/pci.h
-+++ b/xen/include/xen/pci.h
-@@ -155,8 +155,12 @@ struct pci_dev {
-  * devices, it also sync the access to the msi capability that is not
-  * interrupt handling related (the mask bit register).
-  */
--
--void pcidevs_lock(void);
-+void pcidevs_lock_unsafe(void);
-+static always_inline void pcidevs_lock(void)
-+{
-+    pcidevs_lock_unsafe();
-+    block_lock_speculation();
-+}
- void pcidevs_unlock(void);
- bool __must_check pcidevs_locked(void);
- 
--- 
-2.44.0
-
-
-From e107a8ece71ec4e1bb0092d5beea6cb16a96f7ae Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 18:08:48 +0100
-Subject: [PATCH 69/70] x86/mm: add speculation barriers to open coded locks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add a speculation barrier to the clearly identified open-coded lock taking
-functions.
-
-Note that the memory sharing page_lock() replacement (_page_lock()) is left
-as-is, as the code is experimental and not security supported.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4)
----
- xen/arch/x86/include/asm/mm.h | 4 +++-
- xen/arch/x86/mm.c             | 6 ++++--
- 2 files changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
-index 05dfe35502..d1b1fee99b 100644
---- a/xen/arch/x86/include/asm/mm.h
-+++ b/xen/arch/x86/include/asm/mm.h
-@@ -399,7 +399,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
-  * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is
-  * only supported for hvm guests, which do not have PV PTEs updated.
-  */
--int page_lock(struct page_info *page);
-+int page_lock_unsafe(struct page_info *page);
-+#define page_lock(pg)   lock_evaluate_nospec(page_lock_unsafe(pg))
-+
- void page_unlock(struct page_info *page);
- 
- void put_page_type(struct page_info *page);
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index ab0acbfea6..000fd0fb55 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2017,7 +2017,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) {
- #define current_locked_page_ne_check(x) true
- #endif
- 
--int page_lock(struct page_info *page)
-+int page_lock_unsafe(struct page_info *page)
- {
-     unsigned long x, nx;
- 
-@@ -2078,7 +2078,7 @@ void page_unlock(struct page_info *page)
-  * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
-  * reverse order.
-  */
--static void l3t_lock(struct page_info *page)
-+static always_inline void l3t_lock(struct page_info *page)
- {
-     unsigned long x, nx;
- 
-@@ -2087,6 +2087,8 @@ static void l3t_lock(struct page_info *page)
-             cpu_relax();
-         nx = x | PGT_locked;
-     } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
-+
-+    block_lock_speculation();
- }
- 
- static void l3t_unlock(struct page_info *page)
--- 
-2.44.0
-
-
-From 4da8ca9cb9cfdb92c9dd09d5270ae16a3b2dbc89 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 16:24:21 +0100
-Subject: [PATCH 70/70] x86: protect conditional lock taking from speculative
- execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Conditionally taken locks that use the pattern:
-
-if ( lock )
-    spin_lock(...);
-
-Need an else branch in order to issue an speculation barrier in the else case,
-just like it's done in case the lock needs to be acquired.
-
-eval_nospec() could be used on the condition itself, but that would result in a
-double barrier on the branch where the lock is taken.
-
-Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to
-conditionally take a lock in a speculation safe way.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406)
----
- xen/arch/x86/mm.c          | 35 +++++++++++++----------------------
- xen/arch/x86/mm/mm-locks.h |  9 +++++++++
- xen/arch/x86/mm/p2m.c      |  5 ++---
- xen/include/xen/spinlock.h |  8 ++++++++
- 4 files changed, 32 insertions(+), 25 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 000fd0fb55..45bfbc2522 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5007,8 +5007,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
-         if ( !l3t )
-             return NULL;
-         UNMAP_DOMAIN_PAGE(l3t);
--        if ( locking )
--            spin_lock(&map_pgdir_lock);
-+        spin_lock_if(locking, &map_pgdir_lock);
-         if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
-         {
-             l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
-@@ -5045,8 +5044,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
-             return NULL;
-         }
-         UNMAP_DOMAIN_PAGE(l2t);
--        if ( locking )
--            spin_lock(&map_pgdir_lock);
-+        spin_lock_if(locking, &map_pgdir_lock);
-         if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
-         {
-             l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
-@@ -5084,8 +5082,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
-             return NULL;
-         }
-         UNMAP_DOMAIN_PAGE(l1t);
--        if ( locking )
--            spin_lock(&map_pgdir_lock);
-+        spin_lock_if(locking, &map_pgdir_lock);
-         if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
-         {
-             l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR));
-@@ -5116,6 +5113,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
-     do {                      \
-         if ( locking )        \
-             l3t_lock(page);   \
-+        else                            \
-+            block_lock_speculation();   \
-     } while ( false )
- 
- #define L3T_UNLOCK(page)                           \
-@@ -5331,8 +5330,7 @@ int map_pages_to_xen(
-             if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
-                 flush_flags |= FLUSH_TLB_GLOBAL;
- 
--            if ( locking )
--                spin_lock(&map_pgdir_lock);
-+            spin_lock_if(locking, &map_pgdir_lock);
-             if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
-                  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
-             {
-@@ -5436,8 +5434,7 @@ int map_pages_to_xen(
-                 if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
-                     flush_flags |= FLUSH_TLB_GLOBAL;
- 
--                if ( locking )
--                    spin_lock(&map_pgdir_lock);
-+                spin_lock_if(locking, &map_pgdir_lock);
-                 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
-                      (l2e_get_flags(*pl2e) & _PAGE_PSE) )
-                 {
-@@ -5478,8 +5475,7 @@ int map_pages_to_xen(
-                 unsigned long base_mfn;
-                 const l1_pgentry_t *l1t;
- 
--                if ( locking )
--                    spin_lock(&map_pgdir_lock);
-+                spin_lock_if(locking, &map_pgdir_lock);
- 
-                 ol2e = *pl2e;
-                 /*
-@@ -5533,8 +5529,7 @@ int map_pages_to_xen(
-             unsigned long base_mfn;
-             const l2_pgentry_t *l2t;
- 
--            if ( locking )
--                spin_lock(&map_pgdir_lock);
-+            spin_lock_if(locking, &map_pgdir_lock);
- 
-             ol3e = *pl3e;
-             /*
-@@ -5678,8 +5673,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
-                                        l3e_get_flags(*pl3e)));
-             UNMAP_DOMAIN_PAGE(l2t);
- 
--            if ( locking )
--                spin_lock(&map_pgdir_lock);
-+            spin_lock_if(locking, &map_pgdir_lock);
-             if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
-                  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
-             {
-@@ -5738,8 +5732,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
-                                            l2e_get_flags(*pl2e) & ~_PAGE_PSE));
-                 UNMAP_DOMAIN_PAGE(l1t);
- 
--                if ( locking )
--                    spin_lock(&map_pgdir_lock);
-+                spin_lock_if(locking, &map_pgdir_lock);
-                 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
-                      (l2e_get_flags(*pl2e) & _PAGE_PSE) )
-                 {
-@@ -5783,8 +5776,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
-              */
-             if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) )
-                 continue;
--            if ( locking )
--                spin_lock(&map_pgdir_lock);
-+            spin_lock_if(locking, &map_pgdir_lock);
- 
-             /*
-              * L2E may be already cleared, or set to a superpage, by
-@@ -5831,8 +5823,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
-         if ( (nf & _PAGE_PRESENT) ||
-              ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) )
-             continue;
--        if ( locking )
--            spin_lock(&map_pgdir_lock);
-+        spin_lock_if(locking, &map_pgdir_lock);
- 
-         /*
-          * L3E may be already cleared, or set to a superpage, by
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index 5ec080c02f..b4960fb90e 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -335,6 +335,15 @@ static inline void p2m_unlock(struct p2m_domain *p)
- #define p2m_locked_by_me(p)   mm_write_locked_by_me(&(p)->lock)
- #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
- 
-+static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m,
-+                                      gfn_t gfn, unsigned int order)
-+{
-+    if ( condition )
-+        gfn_lock(p2m, gfn, order);
-+    else
-+        block_lock_speculation();
-+}
-+
- /* PoD lock (per-p2m-table)
-  *
-  * Protects private PoD data structs: entry and cache
-diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
-index 0983bd71d9..22ab1d606e 100644
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -280,9 +280,8 @@ mfn_t p2m_get_gfn_type_access(struct p2m_domain *p2m, gfn_t gfn,
-     if ( q & P2M_UNSHARE )
-         q |= P2M_ALLOC;
- 
--    if ( locked )
--        /* Grab the lock here, don't release until put_gfn */
--        gfn_lock(p2m, gfn, 0);
-+    /* Grab the lock here, don't release until put_gfn */
-+    gfn_lock_if(locked, p2m, gfn, 0);
- 
-     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
- 
-diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
-index 28fce5615e..c830df3430 100644
---- a/xen/include/xen/spinlock.h
-+++ b/xen/include/xen/spinlock.h
-@@ -222,6 +222,14 @@ static always_inline void spin_lock_irq(spinlock_t *l)
-         block_lock_speculation();                               \
-     })
- 
-+/* Conditionally take a spinlock in a speculation safe way. */
-+static always_inline void spin_lock_if(bool condition, spinlock_t *l)
-+{
-+    if ( condition )
-+        _spin_lock(l);
-+    block_lock_speculation();
-+}
-+
- #define spin_unlock(l)                _spin_unlock(l)
- #define spin_unlock_irq(l)            _spin_unlock_irq(l)
- #define spin_unlock_irqrestore(l, f)  _spin_unlock_irqrestore(l, f)
--- 
-2.44.0
-