Commit b56efe8d authored by Leonardo Arena's avatar Leonardo Arena
Browse files

main/xen: security fixes

- CVE-2019-18425 XSA-298
- CVE-2019-18421 XSA-299
- CVE-2019-18423 XSA-301
- CVE-2019-18424 XSA-302
- CVE-2019-18422 XSA-303
- CVE-2018-12207 XSA-304
- CVE-2019-11135 XSA-305

fixes #10968
parent d21e62f3
......@@ -3,7 +3,7 @@
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.10.4
pkgrel=0
pkgrel=1
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86_64 armhf aarch64"
......@@ -154,6 +154,14 @@ options="!strip"
# - XSA-294
# - XSA-295
# - XSA-296
# 4.10.4-r1:
# - CVE-2019-18425 XSA-298
# - CVE-2019-18421 XSA-299
# - CVE-2019-18423 XSA-301
# - CVE-2019-18424 XSA-302
# - CVE-2019-18422 XSA-303
# - CVE-2018-12207 XSA-304
# - CVE-2019-11135 XSA-305
case "$CARCH" in
x86*)
......@@ -218,6 +226,33 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
hotplug-Linux-iscsi-block-handle-lun-1.patch
xsa298-4.10.patch
xsa299-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
xsa299-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
xsa299-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
xsa299-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
xsa299-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
xsa299-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
xsa299-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
xsa299-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
xsa299-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
xsa299-0010-x86-mm-Fix-nested-de-validation-on-error.patch
xsa299-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
xsa301-4.11-1.patch
xsa301-4.11-2.patch
xsa301-4.11-3.patch
xsa302-0001-IOMMU-add-missing-HVM-check.patch
xsa302-0002-passthrough-quarantine-PCI-devices.patch
xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
xsa304-4.10-1.patch
xsa304-4.10-2.patch
xsa304-4.10-3.patch
xsa305-4.10-1.patch
xsa305-4.10-2.patch
xenstored.initd
xenstored.confd
xenconsoled.initd
......@@ -471,6 +506,32 @@ e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3a
69dfa60628ca838678862383528654ecbdf4269cbb5c9cfb6b84d976202a8dea85d711aa65a52fa1b477fb0b30604ca70cf1337192d6fb9388a08bbe7fe56077 xenstore_client_transaction_fix.patch
2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch
8c9cfc6afca325df1d8026e21ed03fa8cd2c7e1a21a56cc1968301c5ab634bfe849951899e75d328951d7a41273d1e49a2448edbadec0029ed410c43c0549812 hotplug-Linux-iscsi-block-handle-lun-1.patch
c81ce3b1f14731061af530861f628e1fd392211f98c4aba9db8354e7aff604902908733ec716d46f679e65e068717dc87694797480f490046701c4e2aecc3a51 xsa298-4.10.patch
eaeba22b8582a5f7cac727d0c068236a6af375b8b9f9e57d69d97569a6e1b7da15c38b611bc2504a84e044a6cafabc1fed27a134547c629210ebc66750fbce9f xsa299-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
a027beb481e58b575967212381fd98e992eb28c1e6cd9a207c7c3f22e9aa6f65ca94b73cd02f460fdb2c931c527300bc2bd6dee9f039d1ace3532069ab9fb42d xsa299-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
6a48835ad80ba6d8c97d09e74303d8c430e1f8a1245bdd4ea9b9301d4d35a5bbb388ef694d8ca9bbf872521123c40ac8f8142e59c2b13efd932948083d98b09f xsa299-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
a9774b3bece635bb86501f67718cdeeeadfb32c465ef11a41a0f9869b42f879a82c73753c198b5285bb29e8df6531f6467619c4b29b583e0a761f45c2419b521 xsa299-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
d25dd31942d676c4b4f9db593b1a520ef8e3feaf50dd79313860eb5afd5e41503caca937d5bd0fbc57a02f9d85d52fea3646e0bb1580ff4971c6d194f872b9d1 xsa299-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
695a3ea0a0c2965e88cf907719aa2ace509d1f4944793eabbe3ace44d94f4f6b8e685695cf668c129d205b6b1ef30f37c13acb736bdf7de3b44c1b60d05c22be xsa299-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
8bd1fb05bed70aacdebf31755e673c74700d6f5ee1a15a35d950e90d5c34f16b3d0531b56ae74f17203cf87579d2b157c049efea040a2a03c7d0e8adce8498b9 xsa299-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
45bf263b11abd75e2fa2ee9e757c13de0a99365861d900b82cad0302446762a0ae76b9efbd870887d6353dcf95d565987debf43f80be4c9a0950c88964a3ee6a xsa299-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
35faf5434ebf4c6166d7f8fd10f9010e3dc8a714d5b9e168f641d420e070222c172060a7a72b8c81b93aa762b1d5286098713b485f86c1f1a679c5c588dd642f xsa299-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
8512e19397e30b4cca367b1fb936ef615ed5d4656206d16b24d0f44539a6ec5af07d0021a6276b48592a68b0fb7c5d3a3f035c9b3a1b7bfaa82f70204096a745 xsa299-0010-x86-mm-Fix-nested-de-validation-on-error.patch
81813683d7d83610296c7dfb2f75be7ccf1e332d9abc8fcf741906ddbcaa5b38511a1047c233e34e21437737be2fc343b027f4f73133c4ab823ff879842a5002 xsa299-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
916dc53eddb225a5c118630553baaa784c14a2a2ddc1f031ea83dbbb0241f0b103e664d3429414236951f6de689ff234be3fb900b83d1e1a4a4227385b32d496 xsa301-4.11-1.patch
555d6586543f4b8661d1104851073277290ccce17d05279531c685966186948f933706d834ac1dd748678340df0aaa0423d18ea88be9a35bec67685aeb6258ac xsa301-4.11-2.patch
5cf43aeb65988b7395a2d87bef0587cc172456ebebc2b9b67c231802ebcfb6bc1bdccccaaa09be463f1a79158234cb4003c0cd478990b99f020d812b90acc011 xsa301-4.11-3.patch
6e918e7e6488d89807df5ff5c73926eb6c2990893c25850c5a55d2944619c6e135855ec57a5f54379c809e1ec854a4b56d1acd1c2bc0b50a06d183b470167d0f xsa302-0001-IOMMU-add-missing-HVM-check.patch
cda95d99b8a51175b1ca98318ae4488a7b82f43c1e7a4e9903d8f5f9277c08acb759d05f146b8363363f9f1ed45663190fb935726c43fe667301134b88b21692 xsa302-0002-passthrough-quarantine-PCI-devices.patch
b65de69f7c0097177652fc6fe7c0c12ab44c6bb0a8823b19ee315a574b04f9151a572d518d684fec467b995c9c9756bd5b2d88f7546199c0b807155c5dca43b5 xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
440869c1d7212820ba0c7d4b35681483897d1dcc4aa2f833af1370ac5bd8995b3d2712c598e6309488b90f37e36ca36db232e5de06242afa017d1c991f5d6af6 xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
7d56d0576fcd90ce4296e59cd2eae35929ecae6a7fa40e88c2f66f54234083b09be92630f299e5bb24d23b32949e58d49bafa1bed1e73719b73a4c640b86206f xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
49b540f2585f43685c9f37ea1b6f166a98e71d85e6e0fbf2807b5788b3e71cb35dd71f71b7ad5a6d230ba60706cd11ef4bcecec7c2f250f28fd95dbd50fffc2b xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
8502fd41000664f74382e2691f0a7ceef5121227532a55ffef3046745fe05461b266c93191f505ce3566b2e932b2f0880510dff714948384215fc48093b8d983 xsa304-4.10-1.patch
c0149a445a9f6ef4aa0d928ff321afa7ea6f52d96213042f444a9b96912729fa27c5b81c247c56f45922061f2e45649c8ab462d73765de8ca49022b9994ccf05 xsa304-4.10-2.patch
f7c34c984885f73f51fd3ca0274b7a6b3ca938547b910bb1becc73d7df668b0f9f69d6f402cc3a183a2acff1a9978c2d5775bd2acced4300212568e8ca22d47a xsa304-4.10-3.patch
eeca8ad1ec1b13b7d1849b94537d24e8f91eff6fb7b2e406a08accb9ec72ddb48360c90b2a250ffbc628970f00de557fcddacbcf09062a59a36a8b6ffcbf1909 xsa305-4.10-1.patch
6fc52805ef24510aa5092d1bda61d1299b74c8b37fdca0c17e9df62ec16bb9c7343f09b8dd1f4801c4c5db3b3f6f7208c0c35034ef8aa86b08df308e82597892 xsa305-4.10-2.patch
52c43beb2596d645934d0f909f2d21f7587b6898ed5e5e7046799a8ed6d58f7a09c5809e1634fa26152f3fd4f3e7cfa07da7076f01b4a20cc8f5df8b9cb77e50 xenstored.initd
093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0 xenstored.confd
3c86ed48fbee0af4051c65c4a3893f131fa66e47bf083caf20c9b6aa4b63fdead8832f84a58d0e27964bc49ec8397251b34e5be5c212c139f556916dc8da9523 xenconsoled.initd
......
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/PV: check GDT/LDT limits during emulation
Accesses beyond the LDT limit originating from emulation would trigger
the ASSERT() in pv_map_ldt_shadow_page(). On production builds such
accesses would cause an attempt to promote the touched page (offset from
the present LDT base address) to a segment descriptor one. If this
happens to succeed, guest user mode would be able to elevate its
privileges to that of the guest kernel. This is particularly easy when
there's no LDT at all, in which case the LDT base stored internally to
Xen is simply zero.
Also adjust the ASSERT() that was triggering: It was off by one to
begin with, and for production builds we also better use
ASSERT_UNREACHABLE() instead with suitable recovery code afterwards.
This is XSA-298.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/pv/emul-gate-op.c
+++ b/xen/arch/x86/pv/emul-gate-op.c
@@ -60,7 +60,13 @@ static int read_gate_descriptor(unsigned
(!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v))
+ (gate_sel >> 3);
if ( (gate_sel < 4) ||
- ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
+ /*
+ * We're interested in call gates only, which occupy a single
+ * seg_desc_t for 32-bit and a consecutive pair of them for 64-bit.
+ */
+ ((gate_sel >> 3) + !is_pv_32bit_vcpu(v) >=
+ (gate_sel & 4 ? v->arch.pv_vcpu.ldt_ents
+ : v->arch.pv_vcpu.gdt_ents)) ||
__get_user(desc, pdesc) )
return 0;
@@ -79,7 +85,7 @@ static int read_gate_descriptor(unsigned
if ( !is_pv_32bit_vcpu(v) )
{
if ( (*ar & 0x1f00) != 0x0c00 ||
- (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
+ /* Limit check done above already. */
__get_user(desc, pdesc + 1) ||
(desc.b & 0x1f00) )
return 0;
--- a/xen/arch/x86/pv/emulate.c
+++ b/xen/arch/x86/pv/emulate.c
@@ -31,7 +31,14 @@ int pv_emul_read_descriptor(unsigned int
{
struct desc_struct desc;
- if ( sel < 4)
+ if ( sel < 4 ||
+ /*
+ * Don't apply the GDT limit here, as the selector may be a Xen
+ * provided one. __get_user() will fail (without taking further
+ * action) for ones falling in the gap between guest populated
+ * and Xen ones.
+ */
+ ((sel & 4) && (sel >> 3) >= v->arch.pv_vcpu.ldt_ents) )
desc.b = desc.a = 0;
else if ( __get_user(desc,
(const struct desc_struct *)(!(sel & 4)
--- a/xen/arch/x86/pv/mm.c
+++ b/xen/arch/x86/pv/mm.c
@@ -98,12 +98,16 @@ bool pv_map_ldt_shadow_page(unsigned int
BUG_ON(unlikely(in_irq()));
/*
- * Hardware limit checking should guarantee this property. NB. This is
+ * Prior limit checking should guarantee this property. NB. This is
* safe as updates to the LDT can only be made by MMUEXT_SET_LDT to the
* current vcpu, and vcpu_reset() will block until this vcpu has been
* descheduled before continuing.
*/
- ASSERT((offset >> 3) <= curr->arch.pv_vcpu.ldt_ents);
+ if ( unlikely((offset >> 3) >= curr->arch.pv_vcpu.ldt_ents) )
+ {
+ ASSERT_UNREACHABLE();
+ return false;
+ }
if ( is_pv_32bit_domain(currd) )
linear = (uint32_t)linear;
From bc266a68aa014af2cc3ed0a1f55723fdeac2e545 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 01/11] x86/mm: L1TF checks don't leave a partial entry
On detection of a potential L1TF issue, most validation code returns
-ERESTART to allow the switch to shadow mode to happen and cause the
original operation to be restarted.
However, in the validation code, the return value -ERESTART has been
repurposed to indicate 1) the function has partially completed
something which needs to be undone, and 2) calling put_page_type()
should cleanly undo it. This causes problems in several places.
For L1 tables, on receiving an -ERESTART return from alloc_l1_table(),
alloc_page_type() will set PGT_partial on the page. If for some
reason the original operation never restarts, then on domain
destruction, relinquish_memory() will call free_page_type() on the
page.
Unfortunately, alloc_ and free_l1_table() aren't set up to deal with
PGT_partial. When returning a failure, alloc_l1_table() always
de-validates whatever it's validated so far, and free_l1_table()
always devalidates the whole page. This means that if
relinquish_memory() calls free_page_type() on an L1 that didn't
complete due to an L1TF, it will call put_page_from_l1e() on "page
entries" that have never been validated.
For L2+ tables, setting rc to ERESTART causes the rest of the
alloc_lN_table() function to *think* that the entry in question will
have PGT_partial set. This will cause it to set partial_pte = 1. If
relinqush_memory() then calls free_page_type() on one of those pages,
then free_lN_table() will call put_page_from_lNe() on the entry when
it shouldn't.
Rather than indicating -ERESTART, indicate -EINTR. This is the code
to indicate that nothing has changed from when you started the call
(which is effectively how alloc_l1_table() handles errors).
mod_lN_entry() shouldn't have any of these types of problems, so leave
potential changes there for a clean-up patch later.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index ce2c082caf..0cbca48a02 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1152,7 +1152,7 @@ get_page_from_l2e(
int rc;
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l2e(d, l2e) ? -ERESTART : 1;
+ return pv_l1tf_check_l2e(d, l2e) ? -EINTR : 1;
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
@@ -1188,7 +1188,7 @@ get_page_from_l3e(
int rc;
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l3e(d, l3e) ? -ERESTART : 1;
+ return pv_l1tf_check_l3e(d, l3e) ? -EINTR : 1;
if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
{
@@ -1221,7 +1221,7 @@ get_page_from_l4e(
int rc;
if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
- return pv_l1tf_check_l4e(d, l4e) ? -ERESTART : 1;
+ return pv_l1tf_check_l4e(d, l4e) ? -EINTR : 1;
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
@@ -1435,7 +1435,7 @@ static int alloc_l1_table(struct page_info *page)
{
if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
{
- ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -ERESTART : 0;
+ ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -EINTR : 0;
if ( ret )
goto out;
}
--
2.23.0
From fd7bfe9aaee41c589c16c541ec538285dcde1fb2 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 02/11] x86/mm: Don't re-set PGT_pinned on a partially
de-validated page
When unpinning pagetables, if an operation is interrupted,
relinquish_memory() re-sets PGT_pinned so that the un-pin will
pickedup again when the hypercall restarts.
This is appropriate when put_page_and_type_preemptible() returns
-EINTR, which indicates that the page is back in its initial state
(i.e., completely validated). However, for -ERESTART, this leads to a
state where a page has both PGT_pinned and PGT_partial set.
This happens to work at the moment, although it's not really a
"canonical" state; but in subsequent patches, where we need to make a
distinction in handling between PGT_validated and PGT_partial pages,
this causes issues.
Move to a "canonical" state by:
- Only re-setting PGT_pinned on -EINTR
- Re-dropping the refcount held by PGT_pinned on -ERESTART
In the latter case, the PGT_partial bit will be cleared further down
with the rest of the other PGT_partial pages.
While here, clean up some trainling whitespace.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/domain.c | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 91c2b1c21a..897124f05f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -112,7 +112,7 @@ static void play_dead(void)
* this case, heap corruption or #PF can occur (when heap debugging is
* enabled). For example, even printk() can involve tasklet scheduling,
* which touches per-cpu vars.
- *
+ *
* Consider very carefully when adding code to *dead_idle. Most hypervisor
* subsystems are unsafe to call.
*/
@@ -1837,9 +1837,34 @@ static int relinquish_memory(
break;
case -ERESTART:
case -EINTR:
+ /*
+ * -EINTR means PGT_validated has been re-set; re-set
+ * PGT_pinned again so that it gets picked up next time
+ * around.
+ *
+ * -ERESTART, OTOH, means PGT_partial is set instead. Put
+ * it back on the list, but don't set PGT_pinned; the
+ * section below will finish off de-validation. But we do
+ * need to drop the general ref associated with
+ * PGT_pinned, since put_page_and_type_preemptible()
+ * didn't do it.
+ *
+ * NB we can do an ASSERT for PGT_validated, since we
+ * "own" the type ref; but theoretically, the PGT_partial
+ * could be cleared by someone else.
+ */
+ if ( ret == -EINTR )
+ {
+ ASSERT(page->u.inuse.type_info & PGT_validated);
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ put_page(page);
+
ret = -ERESTART;
+
+ /* Put the page back on the list and drop the ref we grabbed above */
page_list_add(page, list);
- set_bit(_PGT_pinned, &page->u.inuse.type_info);
put_page(page);
goto out;
default:
@@ -2061,7 +2086,7 @@ void vcpu_kick(struct vcpu *v)
* pending flag. These values may fluctuate (after all, we hold no
* locks) but the key insight is that each change will cause
* evtchn_upcall_pending to be polled.
- *
+ *
* NB2. We save the running flag across the unblock to avoid a needless
* IPI for domains that we IPI'd to unblock.
*/
--
2.23.0
From 255ad8804c79dc874322a7060ae0615305bcb8e8 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 04/11] x86/mm: Use flags for _put_page_type rather than a
boolean
This is in mainly in preparation for _put_page_type taking the
partial_flags value in the future. It also makes it easier to read in
the caller (since you see a flag name rather than `true` or `false`).
No functional change intended.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 84ee48ec3f..e3264f8879 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1253,7 +1253,7 @@ get_page_from_l4e(
return rc;
}
-static int _put_page_type(struct page_info *page, bool preemptible,
+static int _put_page_type(struct page_info *page, unsigned int flags,
struct page_info *ptpg);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
@@ -1345,7 +1345,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- rc = _put_page_type(pg, true, ptpg);
+ rc = _put_page_type(pg, PTF_preemptible, ptpg);
}
else if ( flags & PTF_defer )
{
@@ -1354,7 +1354,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
}
else
{
- rc = _put_page_type(pg, true, ptpg);
+ rc = _put_page_type(pg, PTF_preemptible, ptpg);
if ( likely(!rc) )
put_page(pg);
}
@@ -1391,7 +1391,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
}
if ( flags & PTF_defer )
@@ -1401,7 +1401,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
return 0;
}
- rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
if ( likely(!rc) )
put_page(pg);
@@ -1422,7 +1422,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
PTF_partial_set )
{
ASSERT(!(flags & PTF_defer));
- return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
}
if ( flags & PTF_defer )
@@ -1432,7 +1432,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
return 0;
}
- rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+ rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
if ( likely(!rc) )
put_page(pg);
}
@@ -2680,11 +2680,12 @@ static int _put_final_page_type(struct page_info *page, unsigned long type,
}
-static int _put_page_type(struct page_info *page, bool preemptible,
+static int _put_page_type(struct page_info *page, unsigned int flags,
struct page_info *ptpg)
{
unsigned long nx, x, y = page->u.inuse.type_info;
int rc = 0;
+ bool preemptible = flags & PTF_preemptible;
for ( ; ; )
{
@@ -2884,7 +2885,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
if ( unlikely(iommu_ret) )
{
- _put_page_type(page, false, NULL);
+ _put_page_type(page, 0, NULL);
rc = iommu_ret;
goto out;
}
@@ -2911,7 +2912,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
void put_page_type(struct page_info *page)
{
- int rc = _put_page_type(page, false, NULL);
+ int rc = _put_page_type(page, 0, NULL);
ASSERT(rc == 0);
(void)rc;
}
@@ -2927,7 +2928,7 @@ int get_page_type(struct page_info *page, unsigned long type)
int put_page_type_preemptible(struct page_info *page)
{
- return _put_page_type(page, true, NULL);
+ return _put_page_type(page, PTF_preemptible, NULL);
}
int get_page_type_preemptible(struct page_info *page, unsigned long type)
@@ -2943,7 +2944,7 @@ int put_old_guest_table(struct vcpu *v)
if ( !v->arch.old_guest_table )
return 0;
- switch ( rc = _put_page_type(v->arch.old_guest_table, true,
+ switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible,
v->arch.old_guest_ptpg) )
{
case -EINTR:
--
2.23.0
From 36ce2b6e246d41ebaeb994dbf2b4e0e4555893bf Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 05/11] x86/mm: Rework get_page_and_type_from_mfn conditional
Make it easier to read by declaring the conditions in which we will
retain the ref, rather than the conditions under which we release it.
The only way (page == current->arch.old_guest_table) can be true is if
preemptible is true; so remove this from the query itself, and add an
ASSERT() to that effect on the opposite path.
No functional change intended.
NB that alloc_lN_table() mishandle the "linear pt failure" situation
described in the comment; this will be addressed in a future patch.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 39 +++++++++++++++++++++++++++++++++++++--
1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e3264f8879..ce7f5b84f3 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -678,8 +678,43 @@ static int get_page_and_type_from_mfn(
rc = __get_page_type(page, type, preemptible);
- if ( unlikely(rc) && !partial_ref &&
- (!preemptible || page != current->arch.old_guest_table) )
+ /*
+ * Retain the refcount if:
+ * - page is fully validated (rc == 0)
+ * - page is not validated (rc < 0) but:
+ * - We came in with a reference (partial_ref)
+ * - page is partially validated but there's been an error
+ * (page == current->arch.old_guest_table)
+ *
+ * The partial_ref-on-error clause is worth an explanation. There
+ * are two scenarios where partial_ref might be true coming in:
+ * - mfn has been partially demoted as type `type`; i.e. has
+ * PGT_partial set
+ * - mfn has been partially demoted as L(type+1) (i.e., a linear
+ * page; e.g. we're being called from get_page_from_l2e with
+ * type == PGT_l1_table, but the mfn is PGT_l2_table)
+ *
+ * If there's an error, in the first case, _get_page_type will
+ * either return -ERESTART, in which case we want to retain the
+ * ref (as the caller will consider it retained), or -EINVAL, in
+ * which case old_guest_table will be set; in both cases, we need
+ * to retain the ref.
+ *
+ * In the second case, if there's an error, _get_page_type() can
+ * *only* return -EINVAL, and *never* set old_guest_table. In
+ * that case we also want to retain the reference, to allow the
+ * page to continue to be torn down (i.e., PGT_partial cleared)
+ * safely.
+ *
+ * Also note that we shouldn't be able to leave with the reference
+ * count retained unless we succeeded, or the operation was
+ * preemptible.
+ */
+ if ( likely(!rc) || partial_ref )
+ /* nothing */;
+ else if ( page == current->arch.old_guest_table )
+ ASSERT(preemptible);
+ else
put_page(page);
return rc;
--
2.23.0
From 180f638fb5047c478ca32b15dd2ba9ba0ce43623 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 06/11] x86/mm: Have alloc_l[23]_table clear partial_flags when
preempting
In order to allow recursive pagetable promotions and demotions to be
interrupted, Xen must keep track of the state of the sub-pages
promoted or demoted. This is stored in two elements in the page