mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Replace the VT-d iommu_domain implementation of the VT-d second stage and
first stage page tables with the iommupt VTDSS and x86_64
pagetables. x86_64 is shared with the AMD driver.
There are a couple notable things in VT-d:
- Like AMD the second stage format is not sign extended, unlike AMD it
cannot decode a full 64 bits. The first stage format is a normal sign
extended x86 page table
- The HW caps can indicate how many levels, how many address bits and what
leaf page sizes are supported in HW. As before the highest number of
levels that can translate the entire supported address width is used.
The supported page sizes are adjusted directly from the dedicated
first/second stage cap bits.
- VTD requires flushing 'write buffers'. This logic is left unchanged,
the write buffer flushes on any gather flush or through iotlb_sync_map.
- Like ARM, VTD has an optional non-coherent page table walker that
requires cache flushing. This is supported through PT_FEAT_DMA_INCOHERENT
the same as ARM, however x86 can't use the DMA API for flush, it must
call the arch function clflush_cache_range()
- The PT_FEAT_DYNAMIC_TOP can probably be supported on VT-d someday for the
second stage when it uses 128 bit atomic stores for the HW context
structures.
- PT_FEAT_VTDSS_FORCE_WRITEABLE is used to work around ERRATA_772415_SPR17
- A kernel command line parameter "sp_off" disables all page sizes except
4k
Remove all the unused iommu_domain page table code. The debugfs paths have
their own independent page table walker that is left alone for now.
This corrects a race with the non-coherent walker that the ARM
implementations have fixed:
CPU 0 CPU 1
pfn_to_dma_pte() pfn_to_dma_pte()
pte = &parent[offset];
if (!dma_pte_present(pte)) {
try_cmpxchg64(&pte->val)
pte = &parent[offset];
.. dma_pte_present(pte) ..
[...]
// iommu_map() completes
// Device does DMA
domain_flush_cache(pte)
The CPU 1 mapping operation shares a page table level with the CPU 0
mapping operation. CPU 0 installed a new page table level but has not
flushed it yet. CPU1 returns from iommu_map() and the device does DMA. The
non coherent walker fails to see the new table level installed by CPU 0
and fails the DMA with non-present.
The iommupt PT_FEAT_DMA_INCOHERENT implementation uses the ARM design of
storing a flag when CPU 0 completes the flush. If the flag is not set CPU
1 will also flush to ensure the HW can fully walk to the PTE being
installed.
Cc: Tina Zhang <tina.zhang@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
243 lines
6.4 KiB
C
243 lines
6.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* nested.c - nested mode translation support
|
|
*
|
|
* Copyright (C) 2023 Intel Corporation
|
|
*
|
|
* Author: Lu Baolu <baolu.lu@linux.intel.com>
|
|
* Jacob Pan <jacob.jun.pan@linux.intel.com>
|
|
* Yi Liu <yi.l.liu@intel.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "DMAR: " fmt
|
|
|
|
#include <linux/iommu.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/pci-ats.h>
|
|
|
|
#include "iommu.h"
|
|
#include "pasid.h"
|
|
|
|
static int intel_nested_attach_dev(struct iommu_domain *domain,
|
|
struct device *dev, struct iommu_domain *old)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
|
|
struct intel_iommu *iommu = info->iommu;
|
|
unsigned long flags;
|
|
int ret = 0;
|
|
|
|
device_block_translation(dev);
|
|
|
|
/*
|
|
* Stage-1 domain cannot work alone, it is nested on a s2_domain.
|
|
* The s2_domain will be used in nested translation, hence needs
|
|
* to ensure the s2_domain is compatible with this IOMMU.
|
|
*/
|
|
ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
|
|
if (ret) {
|
|
dev_err_ratelimited(dev, "s2 domain is not compatible\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = domain_attach_iommu(dmar_domain, iommu);
|
|
if (ret) {
|
|
dev_err_ratelimited(dev, "Failed to attach domain to iommu\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = cache_tag_assign_domain(dmar_domain, dev, IOMMU_NO_PASID);
|
|
if (ret)
|
|
goto detach_iommu;
|
|
|
|
ret = iopf_for_domain_set(domain, dev);
|
|
if (ret)
|
|
goto unassign_tag;
|
|
|
|
ret = intel_pasid_setup_nested(iommu, dev,
|
|
IOMMU_NO_PASID, dmar_domain);
|
|
if (ret)
|
|
goto disable_iopf;
|
|
|
|
info->domain = dmar_domain;
|
|
info->domain_attached = true;
|
|
spin_lock_irqsave(&dmar_domain->lock, flags);
|
|
list_add(&info->link, &dmar_domain->devices);
|
|
spin_unlock_irqrestore(&dmar_domain->lock, flags);
|
|
|
|
return 0;
|
|
disable_iopf:
|
|
iopf_for_domain_remove(domain, dev);
|
|
unassign_tag:
|
|
cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID);
|
|
detach_iommu:
|
|
domain_detach_iommu(dmar_domain, iommu);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void intel_nested_domain_free(struct iommu_domain *domain)
|
|
{
|
|
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
|
|
struct dmar_domain *s2_domain = dmar_domain->s2_domain;
|
|
|
|
spin_lock(&s2_domain->s1_lock);
|
|
list_del(&dmar_domain->s2_link);
|
|
spin_unlock(&s2_domain->s1_lock);
|
|
kfree(dmar_domain->qi_batch);
|
|
kfree(dmar_domain);
|
|
}
|
|
|
|
static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
|
|
struct iommu_user_data_array *array)
|
|
{
|
|
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
|
|
struct iommu_hwpt_vtd_s1_invalidate inv_entry;
|
|
u32 index, processed = 0;
|
|
int ret = 0;
|
|
|
|
if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
for (index = 0; index < array->entry_num; index++) {
|
|
ret = iommu_copy_struct_from_user_array(&inv_entry, array,
|
|
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
|
|
index, __reserved);
|
|
if (ret)
|
|
break;
|
|
|
|
if ((inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) ||
|
|
inv_entry.__reserved) {
|
|
ret = -EOPNOTSUPP;
|
|
break;
|
|
}
|
|
|
|
if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
|
|
((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
cache_tag_flush_range(dmar_domain, inv_entry.addr,
|
|
inv_entry.addr + nrpages_to_size(inv_entry.npages) - 1,
|
|
inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF);
|
|
processed++;
|
|
}
|
|
|
|
out:
|
|
array->entry_num = processed;
|
|
return ret;
|
|
}
|
|
|
|
static int domain_setup_nested(struct intel_iommu *iommu,
|
|
struct dmar_domain *domain,
|
|
struct device *dev, ioasid_t pasid,
|
|
struct iommu_domain *old)
|
|
{
|
|
if (!old)
|
|
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
|
|
return intel_pasid_replace_nested(iommu, dev, pasid,
|
|
iommu_domain_did(old, iommu),
|
|
domain);
|
|
}
|
|
|
|
static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
|
|
struct device *dev, ioasid_t pasid,
|
|
struct iommu_domain *old)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
|
|
struct intel_iommu *iommu = info->iommu;
|
|
struct dev_pasid_info *dev_pasid;
|
|
int ret;
|
|
|
|
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (context_copied(iommu, info->bus, info->devfn))
|
|
return -EBUSY;
|
|
|
|
ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
|
|
if (ret)
|
|
return ret;
|
|
|
|
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
|
|
if (IS_ERR(dev_pasid))
|
|
return PTR_ERR(dev_pasid);
|
|
|
|
ret = iopf_for_domain_replace(domain, old, dev);
|
|
if (ret)
|
|
goto out_remove_dev_pasid;
|
|
|
|
ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old);
|
|
if (ret)
|
|
goto out_unwind_iopf;
|
|
|
|
domain_remove_dev_pasid(old, dev, pasid);
|
|
|
|
return 0;
|
|
|
|
out_unwind_iopf:
|
|
iopf_for_domain_replace(old, domain, dev);
|
|
out_remove_dev_pasid:
|
|
domain_remove_dev_pasid(domain, dev, pasid);
|
|
return ret;
|
|
}
|
|
|
|
static const struct iommu_domain_ops intel_nested_domain_ops = {
|
|
.attach_dev = intel_nested_attach_dev,
|
|
.set_dev_pasid = intel_nested_set_dev_pasid,
|
|
.free = intel_nested_domain_free,
|
|
.cache_invalidate_user = intel_nested_cache_invalidate_user,
|
|
};
|
|
|
|
struct iommu_domain *
|
|
intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
|
|
u32 flags,
|
|
const struct iommu_user_data *user_data)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct dmar_domain *s2_domain = to_dmar_domain(parent);
|
|
struct intel_iommu *iommu = info->iommu;
|
|
struct iommu_hwpt_vtd_s1 vtd;
|
|
struct dmar_domain *domain;
|
|
int ret;
|
|
|
|
if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID)
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
|
|
/* Must be nested domain */
|
|
if (user_data->type != IOMMU_HWPT_DATA_VTD_S1)
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
if (!intel_domain_is_ss_paging(s2_domain) || !s2_domain->nested_parent)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
ret = iommu_copy_struct_from_user(&vtd, user_data,
|
|
IOMMU_HWPT_DATA_VTD_S1, __reserved);
|
|
if (ret)
|
|
return ERR_PTR(ret);
|
|
|
|
domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
|
|
if (!domain)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
domain->s2_domain = s2_domain;
|
|
domain->s1_cfg = vtd;
|
|
domain->domain.ops = &intel_nested_domain_ops;
|
|
domain->domain.type = IOMMU_DOMAIN_NESTED;
|
|
INIT_LIST_HEAD(&domain->devices);
|
|
INIT_LIST_HEAD(&domain->dev_pasids);
|
|
INIT_LIST_HEAD(&domain->cache_tags);
|
|
spin_lock_init(&domain->lock);
|
|
spin_lock_init(&domain->cache_lock);
|
|
xa_init(&domain->iommu_array);
|
|
|
|
spin_lock(&s2_domain->s1_lock);
|
|
list_add(&domain->s2_link, &s2_domain->s1_domains);
|
|
spin_unlock(&s2_domain->s1_lock);
|
|
|
|
return &domain->domain;
|
|
}
|