Commit 221fb962 authored by Henrik Riomar's avatar Henrik Riomar Committed by Leonardo Arena
Browse files

main/xen: add fix for XSA-373

This is CVE-2021-28692
parent 1e89abec
......@@ -216,6 +216,7 @@ options="!strip"
# - CVE-2021-28687 XSA-368
# 4.13.3-r1:
# - CVE-2021-28693 XSA-372
# - CVE-2021-28692 XSA-373
case "$CARCH" in
x86*)
......@@ -279,6 +280,12 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
0001-xen-arm-Create-dom0less-domUs-earlier.patch
0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
xsa373-4.13-1.patch
xsa373-4.13-2.patch
xsa373-4.13-3.patch
xsa373-4.13-4.patch
xsa373-4.13-5.patch
hotplug-Linux-iscsi-block-handle-lun-1.patch
xenstored.initd
......@@ -536,6 +543,11 @@ e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3a
2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch
57bae240ac94fd35e8a2a39a06fdc4178a1cf0782832a77fd768ca3c773d8b27d76692703ac481733874e5a0198ef20d7319ea504c6b7836d4edd0a198adede1 0001-xen-arm-Create-dom0less-domUs-earlier.patch
2b47e612c23c8bb65a2432f93a877f592b75b8de2ae97d5a22ed37588594a38b740f5c3e0694dd7ceff5f949e24ff38113e543038d5ae22e8c1dc142c3e8d1b3 0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
7010225962e7c22d6aa2e14d10e5091b3876a76f195e9725e7f175b108f933ea9ad5a080663d27279ccd20e2d4e344620ec414e17437d971a8f3cb9420520696 xsa373-4.13-1.patch
682476c1e44590268c5f84b96a15a44942ec73a54748264b2879ac7ffdd36336db0fa5b51659de3368c9bc6d12e8ecc551761d04f08301e5055d117ae7430475 xsa373-4.13-2.patch
bb04c86c57058b674237d6d81b8a5a600e39e6c2144ae72b7312ee7e72d4305c5fa4b8d5194a0aecd5631e66fcd2165208a821a1fb7034c0c413ae1b1a5525d4 xsa373-4.13-3.patch
1c93e62bfeb8ed0d5fe6db10baebc00cf54f7a6e2255f53e2770220db86c69fe46dd2fac17502d9da2109a60c93d8703b9bb618977cfe0e9919659f133f87c8d xsa373-4.13-4.patch
8fb77d16b60efa4307c0008c8773a9d5341f1b0577c6de46fe6e5630a7243c7b2eb55089a1ce778e4ed03ebf29fad69042746121b50cb953016e95a60549a728 xsa373-4.13-5.patch
8c9cfc6afca325df1d8026e21ed03fa8cd2c7e1a21a56cc1968301c5ab634bfe849951899e75d328951d7a41273d1e49a2448edbadec0029ed410c43c0549812 hotplug-Linux-iscsi-block-handle-lun-1.patch
52c43beb2596d645934d0f909f2d21f7587b6898ed5e5e7046799a8ed6d58f7a09c5809e1634fa26152f3fd4f3e7cfa07da7076f01b4a20cc8f5df8b9cb77e50 xenstored.initd
093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0 xenstored.confd
......
From: Jan Beulich <jbeulich@suse.com>
Subject: VT-d: size qinval queue dynamically
With the present synchronous model, we need two slots for every
operation (the operation itself and a wait descriptor). There can be
one such pair of requests pending per CPU. To ensure that under all
normal circumstances a slot is always available when one is requested,
size the queue ring according to the number of present CPUs.
This is part of XSA-373 / CVE-2021-28692.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -450,17 +450,9 @@ struct qinval_entry {
}q;
};
-/* Order of queue invalidation pages(max is 8) */
-#define QINVAL_PAGE_ORDER 2
-
-#define QINVAL_ARCH_PAGE_ORDER (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
-#define QINVAL_ARCH_PAGE_NR ( QINVAL_ARCH_PAGE_ORDER < 0 ? \
- 1 : \
- 1 << QINVAL_ARCH_PAGE_ORDER )
-
/* Each entry is 16 bytes, so 2^8 entries per page */
#define QINVAL_ENTRY_ORDER ( PAGE_SHIFT - 4 )
-#define QINVAL_ENTRY_NR (1 << (QINVAL_PAGE_ORDER + 8))
+#define QINVAL_MAX_ENTRY_NR (1u << (7 + QINVAL_ENTRY_ORDER))
/* Status data flag */
#define QINVAL_STAT_INIT 0
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -31,6 +31,9 @@
#define VTD_QI_TIMEOUT 1
+static unsigned int __read_mostly qi_pg_order;
+static unsigned int __read_mostly qi_entry_nr;
+
static int __must_check invalidate_sync(struct vtd_iommu *iommu);
static void print_qi_regs(struct vtd_iommu *iommu)
@@ -55,7 +58,7 @@ static unsigned int qinval_next_index(st
tail >>= QINVAL_INDEX_SHIFT;
/* (tail+1 == head) indicates a full queue, wait for HW */
- while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
+ while ( ((tail + 1) & (qi_entry_nr - 1)) ==
( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
cpu_relax();
@@ -68,7 +71,7 @@ static void qinval_update_qtail(struct v
/* Need hold register lock when update tail */
ASSERT( spin_is_locked(&iommu->register_lock) );
- val = (index + 1) % QINVAL_ENTRY_NR;
+ val = (index + 1) & (qi_entry_nr - 1);
dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT));
}
@@ -403,8 +406,28 @@ int enable_qinval(struct vtd_iommu *iomm
if ( iommu->qinval_maddr == 0 )
{
- iommu->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR,
- iommu->node);
+ if ( !qi_entry_nr )
+ {
+ /*
+ * With the present synchronous model, we need two slots for every
+ * operation (the operation itself and a wait descriptor). There
+ * can be one such pair of requests pending per CPU. One extra
+ * entry is needed as the ring is considered full when there's
+ * only one entry left.
+ */
+ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= QINVAL_MAX_ENTRY_NR);
+ qi_pg_order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
+ (PAGE_SHIFT -
+ QINVAL_ENTRY_ORDER));
+ qi_entry_nr = 1u << (qi_pg_order + QINVAL_ENTRY_ORDER);
+
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "QI: using %u-entry ring(s)\n", qi_entry_nr);
+ }
+
+ iommu->qinval_maddr =
+ alloc_pgtable_maddr(qi_entry_nr >> QINVAL_ENTRY_ORDER,
+ iommu->node);
if ( iommu->qinval_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
@@ -418,15 +441,16 @@ int enable_qinval(struct vtd_iommu *iomm
spin_lock_irqsave(&iommu->register_lock, flags);
- /* Setup Invalidation Queue Address(IQA) register with the
- * address of the page we just allocated. QS field at
- * bits[2:0] to indicate size of queue is one 4KB page.
- * That's 256 entries. Queued Head (IQH) and Queue Tail (IQT)
- * registers are automatically reset to 0 with write
- * to IQA register.
+ /*
+ * Setup Invalidation Queue Address (IQA) register with the address of the
+ * pages we just allocated. The QS field at bits[2:0] indicates the size
+ * (page order) of the queue.
+ *
+ * Queued Head (IQH) and Queue Tail (IQT) registers are automatically
+ * reset to 0 with write to IQA register.
*/
dmar_writeq(iommu->reg, DMAR_IQA_REG,
- iommu->qinval_maddr | QINVAL_PAGE_ORDER);
+ iommu->qinval_maddr | qi_pg_order);
dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: size command buffer dynamically
With the present synchronous model, we need two slots for every
operation (the operation itself and a wait command). There can be one
such pair of commands pending per CPU. To ensure that under all normal
circumstances a slot is always available when one is requested, size the
command ring according to the number of present CPUs.
This is part of XSA-373 / CVE-2021-28692.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -35,8 +35,8 @@ static int queue_iommu_command(struct am
if ( head != tail )
{
memcpy(iommu->cmd_buffer.buffer +
- (iommu->cmd_buffer.tail * IOMMU_CMD_BUFFER_ENTRY_SIZE),
- cmd, IOMMU_CMD_BUFFER_ENTRY_SIZE);
+ (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
+ cmd, sizeof(cmd_entry_t));
iommu->cmd_buffer.tail = tail;
return 1;
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -125,7 +125,7 @@ static void register_iommu_cmd_buffer_in
writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET);
power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.alloc_size) +
- IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE;
+ PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER;
entry = 0;
iommu_set_addr_hi_to_reg(&entry, addr_hi);
@@ -1050,9 +1050,31 @@ static void *__init allocate_ring_buffer
static void * __init allocate_cmd_buffer(struct amd_iommu *iommu)
{
/* allocate 'command buffer' in power of 2 increments of 4K */
+ static unsigned int __read_mostly nr_ents;
+
+ if ( !nr_ents )
+ {
+ unsigned int order;
+
+ /*
+ * With the present synchronous model, we need two slots for every
+ * operation (the operation itself and a wait command). There can be
+ * one such pair of requests pending per CPU. One extra entry is
+ * needed as the ring is considered full when there's only one entry
+ * left.
+ */
+ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= IOMMU_CMD_BUFFER_MAX_ENTRIES);
+ order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
+ IOMMU_CMD_BUFFER_ENTRY_ORDER);
+ nr_ents = 1u << (order + PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER);
+
+ AMD_IOMMU_DEBUG("using %u-entry cmd ring(s)\n", nr_ents);
+ }
+
+ BUILD_BUG_ON(sizeof(cmd_entry_t) != (1u << IOMMU_CMD_BUFFER_ENTRY_ORDER));
+
return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t),
- IOMMU_CMD_BUFFER_DEFAULT_ENTRIES,
- "Command Buffer", false);
+ nr_ents, "Command Buffer", false);
}
static void * __init allocate_event_log(struct amd_iommu *iommu)
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -20,9 +20,6 @@
#ifndef _ASM_X86_64_AMD_IOMMU_DEFS_H
#define _ASM_X86_64_AMD_IOMMU_DEFS_H
-/* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
-#define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES 512
-
/* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
#define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512
@@ -168,8 +165,8 @@ struct amd_iommu_dte {
#define IOMMU_CMD_BUFFER_LENGTH_MASK 0x0F000000
#define IOMMU_CMD_BUFFER_LENGTH_SHIFT 24
-#define IOMMU_CMD_BUFFER_ENTRY_SIZE 16
-#define IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE 8
+#define IOMMU_CMD_BUFFER_ENTRY_ORDER 4
+#define IOMMU_CMD_BUFFER_MAX_ENTRIES (1u << 15)
#define IOMMU_CMD_OPCODE_MASK 0xF0000000
#define IOMMU_CMD_OPCODE_SHIFT 28
From: Jan Beulich <jbeulich@suse.com>
Subject: VT-d: eliminate flush related timeouts
Leaving an in-progress operation pending when it appears to take too
long is problematic: If e.g. a QI command completed later, the write to
the "poll slot" may instead be understood to signal a subsequently
started command's completion. Also our accounting of the timeout period
was actually wrong: We included the time it took for the command to
actually make it to the front of the queue, which could be heavily
affected by guests other than the one for which the flush is being
performed.
Do away with all timeout detection on all flush related code paths.
Log excessively long processing times (with a progressive threshold) to
have some indication of problems in this area.
Additionally log (once) if qinval_next_index() didn't immediately find
an available slot. Together with the earlier change sizing the queue(s)
dynamically, we should now have a guarantee that with our fully
synchronous model any demand for slots can actually be satisfied.
This is part of XSA-373 / CVE-2021-28692.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -127,6 +127,34 @@ do {
} \
} while (0)
+#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts) \
+do { \
+ static unsigned int __read_mostly threshold = 1; \
+ s_time_t start = NOW(); \
+ s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \
+ \
+ for ( ; ; ) \
+ { \
+ sts = op(iommu->reg, offset); \
+ if ( cond ) \
+ break; \
+ if ( timeout && NOW() > timeout ) \
+ { \
+ threshold |= threshold << 1; \
+ printk(XENLOG_WARNING VTDPREFIX \
+ " IOMMU#%u: %s flush taking too long\n", \
+ iommu->index, what); \
+ timeout = 0; \
+ } \
+ cpu_relax(); \
+ } \
+ \
+ if ( !timeout ) \
+ printk(XENLOG_WARNING VTDPREFIX \
+ " IOMMU#%u: %s flush took %lums\n", \
+ iommu->index, what, (NOW() - start) / 10000000); \
+} while ( false )
+
int vtd_hw_check(void);
void disable_pmr(struct vtd_iommu *iommu);
int is_igd_drhd(struct acpi_drhd_unit *drhd);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -320,8 +320,8 @@ static void iommu_flush_write_buffer(str
dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
/* Make sure hardware complete it */
- IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
- !(val & DMA_GSTS_WBFS), val);
+ IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl,
+ !(val & DMA_GSTS_WBFS), val);
spin_unlock_irqrestore(&iommu->register_lock, flags);
}
@@ -370,8 +370,8 @@ int vtd_flush_context_reg(struct vtd_iom
dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
/* Make sure hardware complete it */
- IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
- !(val & DMA_CCMD_ICC), val);
+ IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq,
+ !(val & DMA_CCMD_ICC), val);
spin_unlock_irqrestore(&iommu->register_lock, flags);
/* flush context entry will implicitly flush write buffer */
@@ -448,8 +448,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu
dmar_writeq(iommu->reg, tlb_offset + 8, val);
/* Make sure hardware complete it */
- IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
- !(val & DMA_TLB_IVT), val);
+ IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq,
+ !(val & DMA_TLB_IVT), val);
spin_unlock_irqrestore(&iommu->register_lock, flags);
/* check IOTLB invalidation granularity */
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -29,8 +29,6 @@
#include "extern.h"
#include "../ats.h"
-#define VTD_QI_TIMEOUT 1
-
static unsigned int __read_mostly qi_pg_order;
static unsigned int __read_mostly qi_entry_nr;
@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st
/* (tail+1 == head) indicates a full queue, wait for HW */
while ( ((tail + 1) & (qi_entry_nr - 1)) ==
( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+ {
+ printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n",
+ iommu->index);
cpu_relax();
+ }
return tail;
}
@@ -180,23 +182,32 @@ static int __must_check queue_invalidate
/* Now we don't support interrupt method */
if ( sw )
{
- s_time_t timeout;
-
- /* In case all wait descriptor writes to same addr with same data */
- timeout = NOW() + MILLISECS(flush_dev_iotlb ?
- iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
+ static unsigned int __read_mostly threshold = 1;
+ s_time_t start = NOW();
+ s_time_t timeout = start + (flush_dev_iotlb
+ ? iommu_dev_iotlb_timeout
+ : 100) * MILLISECS(threshold);
while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE )
{
- if ( NOW() > timeout )
+ if ( timeout && NOW() > timeout )
{
- print_qi_regs(iommu);
+ threshold |= threshold << 1;
printk(XENLOG_WARNING VTDPREFIX
- " Queue invalidate wait descriptor timed out\n");
- return -ETIMEDOUT;
+ " IOMMU#%u: QI%s wait descriptor taking too long\n",
+ iommu->index, flush_dev_iotlb ? " dev" : "");
+ print_qi_regs(iommu);
+ timeout = 0;
}
cpu_relax();
}
+
+ if ( !timeout )
+ printk(XENLOG_WARNING VTDPREFIX
+ " IOMMU#%u: QI%s wait descriptor took %lums\n",
+ iommu->index, flush_dev_iotlb ? " dev" : "",
+ (NOW() - start) / 10000000);
+
return 0;
}
From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: wait for command slot to be available
No caller cared about send_iommu_command() indicating unavailability of
a slot. Hence if a sufficient number prior commands timed out, we did
blindly assume that the requested command was submitted to the IOMMU
when really it wasn't. This could mean both a hanging system (waiting
for a command to complete that was never seen by the IOMMU) or blindly
propagating success back to callers, making them believe they're fine
to e.g. free previously unmapped pages.
Fold the three involved functions into one, add spin waiting for an
available slot along the lines of VT-d's qinval_next_index(), and as a
consequence drop all error indicator return types/values.
This is part of XSA-373 / CVE-2021-28692.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -22,48 +22,36 @@
#include <asm/hvm/svm/amd-iommu-proto.h>
#include "../ats.h"
-static int queue_iommu_command(struct amd_iommu *iommu, u32 cmd[])
+static void send_iommu_command(struct amd_iommu *iommu,
+ const uint32_t cmd[4])
{
- uint32_t tail, head;
+ uint32_t tail;
tail = iommu->cmd_buffer.tail;
if ( ++tail == iommu->cmd_buffer.entries )
tail = 0;
- head = iommu_get_rb_pointer(readl(iommu->mmio_base +
- IOMMU_CMD_BUFFER_HEAD_OFFSET));
- if ( head != tail )
+ while ( tail == iommu_get_rb_pointer(readl(iommu->mmio_base +
+ IOMMU_CMD_BUFFER_HEAD_OFFSET)) )
{
- memcpy(iommu->cmd_buffer.buffer +
- (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
- cmd, sizeof(cmd_entry_t));
-
- iommu->cmd_buffer.tail = tail;
- return 1;
+ printk_once(XENLOG_ERR
+ "AMD IOMMU %04x:%02x:%02x.%u: no cmd slot available\n",
+ iommu->seg, PCI_BUS(iommu->bdf),
+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
+ cpu_relax();
}
- return 0;
-}
+ memcpy(iommu->cmd_buffer.buffer +
+ (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
+ cmd, sizeof(cmd_entry_t));
-static void commit_iommu_command_buffer(struct amd_iommu *iommu)
-{
- u32 tail = 0;
+ iommu->cmd_buffer.tail = tail;
+ tail = 0;
iommu_set_rb_pointer(&tail, iommu->cmd_buffer.tail);
writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET);
}
-int send_iommu_command(struct amd_iommu *iommu, u32 cmd[])
-{
- if ( queue_iommu_command(iommu, cmd) )
- {
- commit_iommu_command_buffer(iommu);
- return 1;
- }
-
- return 0;
-}
-
static void flush_command_buffer(struct amd_iommu *iommu)
{
u32 cmd[4], status;
From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: drop command completion timeout
First and foremost - such timeouts were not signaled to callers, making
them believe they're fine to e.g. free previously unmapped pages.
Mirror VT-d's behavior: A fixed number of loop iterations is not a
suitable way to detect timeouts in an environment (CPU and bus speeds)
independent manner anyway. Furthermore, leaving an in-progress operation
pending when it appears to take too long is problematic: If a command
completed later, the signaling of its completion may instead be
understood to signal a subsequently started command's completion.
Log excessively long processing times (with a progressive threshold) to
have some indication of problems in this area. Allow callers to specify
a non-default timeout bias for this logging, using the same values as
VT-d does, which in particular means a (by default) much larger value
for device IO TLB invalidation.
This is part of XSA-373 / CVE-2021-28692.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -52,10 +52,12 @@ static void send_iommu_command(struct am
writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET);
}
-static void flush_command_buffer(struct amd_iommu *iommu)
+static void flush_command_buffer(struct amd_iommu *iommu,
+ unsigned int timeout_base)
{
- u32 cmd[4], status;
- int loop_count, comp_wait;
+ uint32_t cmd[4];
+ s_time_t start, timeout;
+ static unsigned int __read_mostly threshold = 1;
/* RW1C 'ComWaitInt' in status register */
writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
@@ -71,24 +73,31 @@ static void flush_command_buffer(struct
IOMMU_COMP_WAIT_I_FLAG_SHIFT, &cmd[0]);
send_iommu_command(iommu, cmd);
- /* Make loop_count long enough for polling completion wait bit */
- loop_count = 1000;
- do {
- status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
- comp_wait = get_field_from_reg_u32(status,
- IOMMU_STATUS_COMP_WAIT_INT_MASK,
- IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
- --loop_count;
- } while ( !comp_wait && loop_count );
-
- if ( comp_wait )
+ start = NOW();
+ timeout = start + (timeout_base ?: 100) * MILLISECS(threshold);
+ while ( !(readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET) &
+ IOMMU_STATUS_COMP_WAIT_INT_MASK) )
{
- /* RW1C 'ComWaitInt' in status register */
- writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
- iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
- return;
+ if ( timeout && NOW() > timeout )
+ {
+ threshold |= threshold << 1;
+ printk(XENLOG_WARNING
+ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait taking too long\n",
+ iommu->seg, PCI_BUS(iommu->bdf),
+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
+ timeout_base ? "iotlb " : "");
+ timeout = 0;
+ }
+ cpu_relax();
}
- AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
+
+ if ( !timeout )
+ printk(XENLOG_WARNING
+ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait took %lums\n",
+ iommu->seg, PCI_BUS(iommu->bdf),
+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
+ timeout_base ? "iotlb " : "",
+ (NOW() - start) / 10000000);
}
/* Build low level iommu command messages */
@@ -300,7 +309,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
/* send INVALIDATE_IOTLB_PAGES command */
spin_lock_irqsave(&iommu->lock, flags);
invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr, req_id, order);
- flush_command_buffer(iommu);
+ flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -337,7 +346,7 @@ static void _amd_iommu_flush_pages(struc
{
spin_lock_irqsave(&iommu->lock, flags);
invalidate_iommu_pages(iommu, daddr, dom_id, order);
- flush_command_buffer(iommu);
+ flush_command_buffer(iommu, 0);
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -361,7 +370,7 @@ void amd_iommu_flush_device(struct amd_i
ASSERT( spin_is_locked(&iommu->lock) );
invalidate_dev_table_entry(iommu, bdf);
- flush_command_buffer(iommu);
+ flush_command_buffer(iommu, 0);
}
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
@@ -369,7 +378,7 @@ void amd_iommu_flush_intremap(struct amd
ASSERT( spin_is_locked(&iommu->lock) );
invalidate_interrupt_table(iommu, bdf);
- flush_command_buffer(iommu);
+ flush_command_buffer(iommu, 0);
}
void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
@@ -377,7 +386,7 @@ void amd_iommu_flush_all_caches(struct a
ASSERT( spin_is_locked(&iommu->lock) );
invalidate_iommu_all(iommu);
- flush_command_buffer(iommu);
+ flush_command_buffer(iommu, 0);
}
void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[])
@@ -387,7 +396,8 @@ void amd_iommu_send_guest_cmd(struct amd
spin_lock_irqsave(&iommu->lock, flags);
send_iommu_command(iommu, cmd);
- flush_command_buffer(iommu);
+ /* TBD: Timeout selection may require peeking into cmd[]. */
+ flush_command_buffer(iommu, 0);
spin_unlock_irqrestore(&iommu->lock, flags);