Commit 4e803597 authored by Henrik Riomar's avatar Henrik Riomar Committed by Leonardo Arena
Browse files

main/xen: security upgrade to 4.11.3

Fixes CVE-2019-19579 / XSA-306
parent 9ae93588
......@@ -2,8 +2,8 @@
# Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.11.2
pkgrel=1
pkgver=4.11.3
pkgrel=0
pkgdesc="Xen hypervisor"
url="https://www.xenproject.org/"
arch="x86_64 armhf aarch64" # enable armv7 when builds with gcc8
......@@ -165,6 +165,8 @@ options="!strip"
# - CVE-2019-18422 XSA-303
# - CVE-2018-12207 XSA-304
# - CVE-2019-11135 XSA-305
# 4.11.3-r0:
# - CVE-2019-19579 XSA-306
case "$CARCH" in
x86*)
......@@ -232,33 +234,6 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
hotplug-Linux-iscsi-block-handle-lun-1.patch
xsa298-4.11.patch
xsa299-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
xsa299-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
xsa299-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
xsa299-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
xsa299-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
xsa299-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
xsa299-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
xsa299-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
xsa299-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
xsa299-0010-x86-mm-Fix-nested-de-validation-on-error.patch
xsa299-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
xsa301-4.11-1.patch
xsa301-4.11-2.patch
xsa301-4.11-3.patch
xsa302-0001-IOMMU-add-missing-HVM-check.patch
xsa302-0002-passthrough-quarantine-PCI-devices.patch
xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
xsa304-4.11-1.patch
xsa304-4.11-2.patch
xsa304-4.11-3.patch
xsa305-4.11-1.patch
xsa305-4.11-2.patch
xenstored.initd
xenstored.confd
xenconsoled.initd
......@@ -492,7 +467,7 @@ EOF
}
sha512sums="48d3d926d35eb56c79c06d0abc6e6be2564fadb43367cc7f46881c669a75016707672179c2cca1c4cfb14af2cefd46e2e7f99470cddf7df2886d8435a2de814e xen-4.11.2.tar.gz
sha512sums="2204e490e9fc357a05983a9bf4e7345e1d364fe00400ce473988dcb9ca7d4e2b921fe10f095cbbc64248130a92d22c6f0d154dcae250a57a7f915df32e3dc436 xen-4.11.3.tar.gz
2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2
c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz
1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz
......@@ -515,32 +490,6 @@ e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3a
69dfa60628ca838678862383528654ecbdf4269cbb5c9cfb6b84d976202a8dea85d711aa65a52fa1b477fb0b30604ca70cf1337192d6fb9388a08bbe7fe56077 xenstore_client_transaction_fix.patch
2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch
8c9cfc6afca325df1d8026e21ed03fa8cd2c7e1a21a56cc1968301c5ab634bfe849951899e75d328951d7a41273d1e49a2448edbadec0029ed410c43c0549812 hotplug-Linux-iscsi-block-handle-lun-1.patch
a4e1a0c080c51dad386b0c276eb1b9a5fc2acbca5886e1c390325cbe81102a2f5c570e7d4e9c18a8214036e80504ada0216a19e0f63a69926558fae7680d765b xsa298-4.11.patch
12b1dd1c320c0fbf5cc651e018b7051875d826014c993f04a0002ed9658d84969e413e2baaaeb8fc42d57dc0ae4b904bcb8baa75a38bff43484b94790a34eb92 xsa299-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
19385d9e713b806c237d7505b8a5e9bb5b286d3cf0f0a452a2638735e4204a8eeb14a66f761e78f6ed00098e1e773d865e0aa2c388620961f83aac9d149d8e2c xsa299-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
9652e3c20448bbb1a13e801b451c13077699e32626f544ec125fd5cb6f06fae7ed0308a950ec1783f7d91fc2d101133bbcf5a3545fec5aa9575e7e38078a176e xsa299-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
22adf9c5fd4a2a85ccaeb0552a67aabab97259f8419b62082922fcadb41c754cf70bbd9d1746589074f601b820290eb6f0ca7eda4d512dceaabff71d86ea1ad3 xsa299-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
2b58855bcd5fa8c15c99a5e3e5ab57c8a1053ff5f5b6fa75d3352c913af56dc9f12e85bd0bee3b79ff1d808f7f893b3c3a5bc2c5edd1539a5d6e1a9faa1053d9 xsa299-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
776898c266bdd3c2c20947ae048a5c83cc78ae719407d55037f396fea6c52339c5c8859adaa8626fe9c70498a11ae0becc33e12dd92684492b7d37fd0846d0a0 xsa299-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
f2feef23419530e059cdddf4e76990183b365171647dba89a2b7f59c7c7a084da21f3ea834148b0b3d328dbec3d960023ca4f807094bb746ca3be3b0e4098aae xsa299-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
856cd9bb37c1253423f1d117fa2421e2296a08f0b7716c39cff8db0959f94f90588ab978611b670b630ccc7b6776325afc366460d899c9932d881a25526c813b xsa299-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
fa9b497941a063fc378849f9a93f1a1500c05299fade739a322d2686041542f1a2d54e46c979b9a5bae342e8eba6226c606d38cb5ad52787c5f541890d2906b6 xsa299-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
b6af837ba78964605a8640cffcf2295c0d92df621864bfc4f7999aee0c8c68e502d6e4ffe98bcba77216090335ed40f6ae4bfe506221373d35139905cd94288a xsa299-0010-x86-mm-Fix-nested-de-validation-on-error.patch
4c7458f90f31c4755e5dd3b4c01f2d72f8a8fdc94822e1d70a03b28b769461c1be4098ceeb91c66e69211d74034bf955ac29a60c008f870c52228de258ee285c xsa299-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
916dc53eddb225a5c118630553baaa784c14a2a2ddc1f031ea83dbbb0241f0b103e664d3429414236951f6de689ff234be3fb900b83d1e1a4a4227385b32d496 xsa301-4.11-1.patch
555d6586543f4b8661d1104851073277290ccce17d05279531c685966186948f933706d834ac1dd748678340df0aaa0423d18ea88be9a35bec67685aeb6258ac xsa301-4.11-2.patch
5cf43aeb65988b7395a2d87bef0587cc172456ebebc2b9b67c231802ebcfb6bc1bdccccaaa09be463f1a79158234cb4003c0cd478990b99f020d812b90acc011 xsa301-4.11-3.patch
c44b8b5b6d64bed624fa045efa88a3d34af1c14d3186fe9ef316eee288ea9ebb0d55414fb612d3873f9a6d76bd316176d68e284d4fdc1b59205c6b5ebd03365e xsa302-0001-IOMMU-add-missing-HVM-check.patch
997c45c485cd382d9d3919cdedf4c9c50a579e21a98503408a2f9854437c8af6e7eafb83639261e1fdd8c4438fb1ff8ea196cfde00e8db82f26244086be31537 xsa302-0002-passthrough-quarantine-PCI-devices.patch
b65de69f7c0097177652fc6fe7c0c12ab44c6bb0a8823b19ee315a574b04f9151a572d518d684fec467b995c9c9756bd5b2d88f7546199c0b807155c5dca43b5 xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
440869c1d7212820ba0c7d4b35681483897d1dcc4aa2f833af1370ac5bd8995b3d2712c598e6309488b90f37e36ca36db232e5de06242afa017d1c991f5d6af6 xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
7d56d0576fcd90ce4296e59cd2eae35929ecae6a7fa40e88c2f66f54234083b09be92630f299e5bb24d23b32949e58d49bafa1bed1e73719b73a4c640b86206f xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
49b540f2585f43685c9f37ea1b6f166a98e71d85e6e0fbf2807b5788b3e71cb35dd71f71b7ad5a6d230ba60706cd11ef4bcecec7c2f250f28fd95dbd50fffc2b xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
35a047ba201c5ef718a961e4bd6f5a9760e736ee9d28a87e2de91fa2475d1883c2acb71817c4bcd1bb582ef986c2defd230030783a56dda3e9125d66189d5898 xsa304-4.11-1.patch
947d016de1356782ed9f44ce30649c3a1898a230d492ef17a8d1724686ac93e069fb15a8110ea7fe4fe34ce409f067d3558e53a8ae7ba988c2c6390cccff04c3 xsa304-4.11-2.patch
cd5d1967a38e74555465f71eb99501ef0c39d4a22e4e2f737b7a478e414caf5992b2a7d8257d68e6c8f48f260742931df22ff5dbc4d4cc35eb2d8075d1c0798b xsa304-4.11-3.patch
9b7730142f63d87d962b1ec148a440a0da3d331053df34c92d535084e3044adc4cb7f1004e9a7028c9cce8f11150714d86f03f067ab2a631a15ef0eca8300581 xsa305-4.11-1.patch
58a73ae86b37d59aad469cdadd8d325f6daa91d0af068ea27f022cfcfa6770c52a29a12a15867a5e95d5e2e23a8ee442070dac6e0b4d5b389543c8769de63f12 xsa305-4.11-2.patch
52c43beb2596d645934d0f909f2d21f7587b6898ed5e5e7046799a8ed6d58f7a09c5809e1634fa26152f3fd4f3e7cfa07da7076f01b4a20cc8f5df8b9cb77e50 xenstored.initd
093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0 xenstored.confd
3c86ed48fbee0af4051c65c4a3893f131fa66e47bf083caf20c9b6aa4b63fdead8832f84a58d0e27964bc49ec8397251b34e5be5c212c139f556916dc8da9523 xenconsoled.initd
......
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/PV: check GDT/LDT limits during emulation
Accesses beyond the LDT limit originating from emulation would trigger
the ASSERT() in pv_map_ldt_shadow_page(). On production builds such
accesses would cause an attempt to promote the touched page (offset from
the present LDT base address) to a segment descriptor one. If this
happens to succeed, guest user mode would be able to elevate its
privileges to that of the guest kernel. This is particularly easy when
there's no LDT at all, in which case the LDT base stored internally to
Xen is simply zero.
Also adjust the ASSERT() that was triggering: It was off by one to
begin with, and for production builds we also better use
ASSERT_UNREACHABLE() instead with suitable recovery code afterwards.
This is XSA-298.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/pv/emul-gate-op.c
+++ b/xen/arch/x86/pv/emul-gate-op.c
@@ -51,7 +51,13 @@ static int read_gate_descriptor(unsigned
const struct desc_struct *pdesc = gdt_ldt_desc_ptr(gate_sel);
if ( (gate_sel < 4) ||
- ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
+ /*
+ * We're interested in call gates only, which occupy a single
+ * seg_desc_t for 32-bit and a consecutive pair of them for 64-bit.
+ */
+ ((gate_sel >> 3) + !is_pv_32bit_vcpu(v) >=
+ (gate_sel & 4 ? v->arch.pv_vcpu.ldt_ents
+ : v->arch.pv_vcpu.gdt_ents)) ||
__get_user(desc, pdesc) )
return 0;
@@ -70,7 +76,7 @@ static int read_gate_descriptor(unsigned
if ( !is_pv_32bit_vcpu(v) )
{
if ( (*ar & 0x1f00) != 0x0c00 ||
- (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
+ /* Limit check done above already. */
__get_user(desc, pdesc + 1) ||
(desc.b & 0x1f00) )
return 0;
--- a/xen/arch/x86/pv/emulate.c
+++ b/xen/arch/x86/pv/emulate.c
@@ -31,7 +31,14 @@ int pv_emul_read_descriptor(unsigned int
{
struct desc_struct desc;
- if ( sel < 4)
+ if ( sel < 4 ||
+ /*
+ * Don't apply the GDT limit here, as the selector may be a Xen
+ * provided one. __get_user() will fail (without taking further
+ * action) for ones falling in the gap between guest populated
+ * and Xen ones.
+ */
+ ((sel & 4) && (sel >> 3) >= v->arch.pv_vcpu.ldt_ents) )
desc.b = desc.a = 0;
else if ( __get_user(desc, gdt_ldt_desc_ptr(sel)) )
return 0;
--- a/xen/arch/x86/pv/mm.c
+++ b/xen/arch/x86/pv/mm.c
@@ -92,12 +92,16 @@ bool pv_map_ldt_shadow_page(unsigned int
BUG_ON(unlikely(in_irq()));
/*
- * Hardware limit checking should guarantee this property. NB. This is
+ * Prior limit checking should guarantee this property. NB. This is
* safe as updates to the LDT can only be made by MMUEXT_SET_LDT to the
* current vcpu, and vcpu_reset() will block until this vcpu has been
* descheduled before continuing.
*/
- ASSERT((offset >> 3) <= curr->arch.pv_vcpu.ldt_ents);
+ if ( unlikely((offset >> 3) >= curr->arch.pv_vcpu.ldt_ents) )
+ {
+ ASSERT_UNREACHABLE();
+ return false;
+ }
if ( is_pv_32bit_domain(currd) )
linear = (uint32_t)linear;
From 852df269d247e177d5f2e9b8f3a4301a6fdd76bd Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 01/11] x86/mm: L1TF checks don't leave a partial entry
On detection of a potential L1TF issue, most validation code returns
-ERESTART to allow the switch to shadow mode to happen and cause the
original operation to be restarted.
However, in the validation code, the return value -ERESTART has been
repurposed to indicate 1) the function has partially completed
something which needs to be undone, and 2) calling put_page_type()
should cleanly undo it. This causes problems in several places.
For L1 tables, on receiving an -ERESTART return from alloc_l1_table(),
alloc_page_type() will set PGT_partial on the page. If for some
reason the original operation never restarts, then on domain
destruction, relinquish_memory() will call free_page_type() on the
page.
Unfortunately, alloc_ and free_l1_table() aren't set up to deal with
PGT_partial. When returning a failure, alloc_l1_table() always
de-validates whatever it's validated so far, and free_l1_table()
always devalidates the whole page. This means that if
relinquish_memory() calls free_page_type() on an L1 that didn't
complete due to an L1TF, it will call put_page_from_l1e() on "page
entries" that have never been validated.
For L2+ tables, setting rc to ERESTART causes the rest of the
alloc_lN_table() function to *think* that the entry in question will
have PGT_partial set. This will cause it to set partial_pte = 1. If
relinqush_memory() then calls free_page_type() on one of those pages,
then free_lN_table() will call put_page_from_lNe() on the entry when
it shouldn't.
Rather than indicating -ERESTART, indicate -EINTR. This is the code
to indicate that nothing has changed from when you started the call
(which is effectively how alloc_l1_table() handles errors).
mod_lN_entry() shouldn't have any of these types of problems, so leave
potential changes there for a clean-up patch later.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e6a4cb28f8..8ced185b49 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1110,7 +1110,7 @@ get_page_from_l2e(
int rc;
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l2e(d, l2e) ? -ERESTART : 1;
+ return pv_l1tf_check_l2e(d, l2e) ? -EINTR : 1;
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
@@ -1142,7 +1142,7 @@ get_page_from_l3e(
int rc;
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l3e(d, l3e) ? -ERESTART : 1;
+ return pv_l1tf_check_l3e(d, l3e) ? -EINTR : 1;
if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
{
@@ -1175,7 +1175,7 @@ get_page_from_l4e(
int rc;
if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l4e(d, l4e) ? -ERESTART : 1;
+ return pv_l1tf_check_l4e(d, l4e) ? -EINTR : 1;
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
@@ -1404,7 +1404,7 @@ static int alloc_l1_table(struct page_info *page)
{
if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
{
- ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -ERESTART : 0;
+ ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -EINTR : 0;
if ( ret )
goto out;
}
--
2.23.0
From 6bdddd7980eac0cc883945d823986f24682ca47a Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 02/11] x86/mm: Don't re-set PGT_pinned on a partially
de-validated page
When unpinning pagetables, if an operation is interrupted,
relinquish_memory() re-sets PGT_pinned so that the un-pin will
pickedup again when the hypercall restarts.
This is appropriate when put_page_and_type_preemptible() returns
-EINTR, which indicates that the page is back in its initial state
(i.e., completely validated). However, for -ERESTART, this leads to a
state where a page has both PGT_pinned and PGT_partial set.
This happens to work at the moment, although it's not really a
"canonical" state; but in subsequent patches, where we need to make a
distinction in handling between PGT_validated and PGT_partial pages,
this causes issues.
Move to a "canonical" state by:
- Only re-setting PGT_pinned on -EINTR
- Re-dropping the refcount held by PGT_pinned on -ERESTART
In the latter case, the PGT_partial bit will be cleared further down
with the rest of the other PGT_partial pages.
While here, clean up some trainling whitespace.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/domain.c | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 29f892c04c..8fbecbb169 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -112,7 +112,7 @@ static void play_dead(void)
* this case, heap corruption or #PF can occur (when heap debugging is
* enabled). For example, even printk() can involve tasklet scheduling,
* which touches per-cpu vars.
- *
+ *
* Consider very carefully when adding code to *dead_idle. Most hypervisor
* subsystems are unsafe to call.
*/
@@ -1838,9 +1838,34 @@ static int relinquish_memory(
break;
case -ERESTART:
case -EINTR:
+ /*
+ * -EINTR means PGT_validated has been re-set; re-set
+ * PGT_pinned again so that it gets picked up next time
+ * around.
+ *
+ * -ERESTART, OTOH, means PGT_partial is set instead. Put
+ * it back on the list, but don't set PGT_pinned; the
+ * section below will finish off de-validation. But we do
+ * need to drop the general ref associated with
+ * PGT_pinned, since put_page_and_type_preemptible()
+ * didn't do it.
+ *
+ * NB we can do an ASSERT for PGT_validated, since we
+ * "own" the type ref; but theoretically, the PGT_partial
+ * could be cleared by someone else.
+ */
+ if ( ret == -EINTR )
+ {
+ ASSERT(page->u.inuse.type_info & PGT_validated);
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ put_page(page);
+
ret = -ERESTART;
+
+ /* Put the page back on the list and drop the ref we grabbed above */
page_list_add(page, list);
- set_bit(_PGT_pinned, &page->u.inuse.type_info);
put_page(page);
goto out;
default:
@@ -2062,7 +2087,7 @@ void vcpu_kick(struct vcpu *v)
* pending flag. These values may fluctuate (after all, we hold no
* locks) but the key insight is that each change will cause
* evtchn_upcall_pending to be polled.
- *
+ *
* NB2. We save the running flag across the unblock to avoid a needless
* IPI for domains that we IPI'd to unblock.
*/
--
2.23.0
From 20b8a6702c6839bafd252789396b443d4b5c5474 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 04/11] x86/mm: Use flags for _put_page_type rather than a
boolean
This is in mainly in preparation for _put_page_type taking the
partial_flags value in the future. It also makes it easier to read in
the caller (since you see a flag name rather than `true` or `false`).
No functional change intended.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1c4f54e328..e2fba15d86 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1207,7 +1207,7 @@ get_page_from_l4e(
return rc;
}
-static int _put_page_type(struct page_info *page, bool preemptible,
+static int _put_page_type(struct page_info *page, unsigned int flags,
struct page_info *ptpg);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
@@ -1314,7 +1314,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- rc = _put_page_type(pg, true, ptpg);
+ rc = _put_page_type(pg, PTF_preemptible, ptpg);
}
else if ( flags & PTF_defer )
{
@@ -1323,7 +1323,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
}
else
{
- rc = _put_page_type(pg, true, ptpg);
+ rc = _put_page_type(pg, PTF_preemptible, ptpg);
if ( likely(!rc) )
put_page(pg);
}
@@ -1360,7 +1360,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
}
if ( flags & PTF_defer )
@@ -1370,7 +1370,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
return 0;
}
- rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
if ( likely(!rc) )
put_page(pg);
@@ -1391,7 +1391,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
}
if ( flags & PTF_defer )
@@ -1401,7 +1401,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
return 0;
}
- rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
if ( likely(!rc) )
put_page(pg);
}
@@ -2701,10 +2701,11 @@ static int _put_final_page_type(struct page_info *page, unsigned long type,
}
-static int _put_page_type(struct page_info *page, bool preemptible,
+static int _put_page_type(struct page_info *page, unsigned int flags,
struct page_info *ptpg)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ bool preemptible = flags & PTF_preemptible;
ASSERT(current_locked_page_ne_check(page));
@@ -2911,7 +2912,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
if ( unlikely(iommu_ret) )
{
- _put_page_type(page, false, NULL);
+ _put_page_type(page, 0, NULL);
rc = iommu_ret;
goto out;
}
@@ -2938,7 +2939,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
void put_page_type(struct page_info *page)
{
- int rc = _put_page_type(page, false, NULL);
+ int rc = _put_page_type(page, 0, NULL);
ASSERT(rc == 0);
(void)rc;
}
@@ -2955,7 +2956,7 @@ int get_page_type(struct page_info *page, unsigned long type)
int put_page_type_preemptible(struct page_info *page)
{
- return _put_page_type(page, true, NULL);
+ return _put_page_type(page, PTF_preemptible, NULL);
}
int get_page_type_preemptible(struct page_info *page, unsigned long type)
@@ -2972,7 +2973,7 @@ int put_old_guest_table(struct vcpu *v)
if ( !v->arch.old_guest_table )
return 0;
- switch ( rc = _put_page_type(v->arch.old_guest_table, true,
+ switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible,
v->arch.old_guest_ptpg) )
{
case -EINTR:
--
2.23.0
From 7b3f9f9a797459902bebba962e31be5cbfe7b515 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 05/11] x86/mm: Rework get_page_and_type_from_mfn conditional
Make it easier to read by declaring the conditions in which we will
retain the ref, rather than the conditions under which we release it.
The only way (page == current->arch.old_guest_table) can be true is if
preemptible is true; so remove this from the query itself, and add an
ASSERT() to that effect on the opposite path.
No functional change intended.
NB that alloc_lN_table() mishandle the "linear pt failure" situation
described in the comment; this will be addressed in a future patch.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 39 +++++++++++++++++++++++++++++++++++++--
1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e2fba15d86..eaf7b14245 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -637,8 +637,43 @@ static int get_page_and_type_from_mfn(
rc = _get_page_type(page, type, preemptible);
- if ( unlikely(rc) && !partial_ref &&
- (!preemptible || page != current->arch.old_guest_table) )
+ /*
+ * Retain the refcount if:
+ * - page is fully validated (rc == 0)
+ * - page is not validated (rc < 0) but:
+ * - We came in with a reference (partial_ref)
+ * - page is partially validated but there's been an error
+ * (page == current->arch.old_guest_table)
+ *
+ * The partial_ref-on-error clause is worth an explanation. There
+ * are two scenarios where partial_ref might be true coming in:
+ * - mfn has been partially demoted as type `type`; i.e. has
+ * PGT_partial set
+ * - mfn has been partially demoted as L(type+1) (i.e., a linear
+ * page; e.g. we're being called from get_page_from_l2e with
+ * type == PGT_l1_table, but the mfn is PGT_l2_table)
+ *
+ * If there's an error, in the first case, _get_page_type will
+ * either return -ERESTART, in which case we want to retain the
+ * ref (as the caller will consider it retained), or -EINVAL, in
+ * which case old_guest_table will be set; in both cases, we need
+ * to retain the ref.
+ *
+ * In the second case, if there's an error, _get_page_type() can
+ * *only* return -EINVAL, and *never* set old_guest_table. In
+ * that case we also want to retain the reference, to allow the
+ * page to continue to be torn down (i.e., PGT_partial cleared)
+ * safely.
+ *
+ * Also note that we shouldn't be able to leave with the reference
+ * count retained unless we succeeded, or the operation was
+ * preemptible.
+ */
+ if ( likely(!rc) || partial_ref )
+ /* nothing */;
+ else if ( page == current->arch.old_guest_table )
+ ASSERT(preemptible);
+ else
put_page(page);
return rc;
--
2.23.0