Files
linux/arch/x86/pci/acpi.c
Hans de Goede fa6dae5d82 x86/PCI: Add kernel cmdline options to use/ignore E820 reserved regions
Some firmware supplies PCI host bridge _CRS that includes address space
unusable by PCI devices, e.g., space occupied by host bridge registers or
used by hidden PCI devices.

To avoid this unusable space, Linux currently excludes E820 reserved
regions from _CRS windows; see 4dc2287c18 ("x86: avoid E820 regions when
allocating address space").

However, this use of E820 reserved regions to clip things out of _CRS is
not supported by ACPI, UEFI, or PCI Firmware specs, and some systems have
E820 reserved regions that cover the entire memory window from _CRS.
4dc2287c18 clips the entire window, leaving no space for hot-added or
uninitialized PCI devices.

For example, from a Lenovo IdeaPad 3 15IIL 81WE:

  BIOS-e820: [mem 0x4bc50000-0xcfffffff] reserved
  pci_bus 0000:00: root bus resource [mem 0x65400000-0xbfffffff window]
  pci 0000:00:15.0: BAR 0: [mem 0x00000000-0x00000fff 64bit]
  pci 0000:00:15.0: BAR 0: no space for [mem size 0x00001000 64bit]

Future patches will add quirks to enable/disable E820 clipping
automatically.

Add a "pci=no_e820" kernel command line option to disable clipping with
E820 reserved regions.  Also add a matching "pci=use_e820" option to enable
clipping with E820 reserved regions if that has been disabled by default by
further patches in this patch-set.

Both options taint the kernel because they are intended for debugging and
workaround purposes until a quirk can set them automatically.

[bhelgaas: commit log, add printk]
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 Lenovo IdeaPad 3
Link: https://lore.kernel.org/r/20220519152150.6135-2-hdegoede@redhat.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Benoit Grégoire <benoitg@coeus.ca>
Cc: Hui Wang <hui.wang@canonical.com>
2022-05-19 14:26:55 -05:00

445 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/dmi.h>
#include <linux/slab.h>
#include <linux/pci-acpi.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>
#include <asm/e820/api.h>
struct pci_root_info {
struct acpi_pci_root_info common;
struct pci_sysdata sd;
#ifdef CONFIG_PCI_MMCONFIG
bool mcfg_added;
u8 start_bus;
u8 end_bus;
#endif
};
static bool pci_use_e820 = true;
static bool pci_use_crs = true;
static bool pci_ignore_seg;
static int __init set_use_crs(const struct dmi_system_id *id)
{
pci_use_crs = true;
return 0;
}
static int __init set_nouse_crs(const struct dmi_system_id *id)
{
pci_use_crs = false;
return 0;
}
static int __init set_ignore_seg(const struct dmi_system_id *id)
{
printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident);
pci_ignore_seg = true;
return 0;
}
static const struct dmi_system_id pci_crs_quirks[] __initconst = {
/* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
{
.callback = set_use_crs,
.ident = "IBM System x3800",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
},
},
/* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */
/* 2006 AMD HT/VIA system with two host bridges */
{
.callback = set_use_crs,
.ident = "ASRock ALiveSATA2-GLAN",
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
},
},
/* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */
/* 2006 AMD HT/VIA system with two host bridges */
{
.callback = set_use_crs,
.ident = "ASUS M2V-MX SE",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"),
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
/* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
{
.callback = set_use_crs,
.ident = "MSI MS-7253",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
},
},
/* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */
/* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */
{
.callback = set_use_crs,
.ident = "Foxconn K8M890-8237A",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"),
DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"),
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
},
},
/* Now for the blacklist.. */
/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
{
.callback = set_nouse_crs,
.ident = "Dell Studio 1557",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"),
DMI_MATCH(DMI_BIOS_VERSION, "A09"),
},
},
/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
{
.callback = set_nouse_crs,
.ident = "Thinkpad SL510",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "2847DFG"),
DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"),
},
},
/* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */
{
.callback = set_nouse_crs,
.ident = "Supermicro X8DTH",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"),
DMI_MATCH(DMI_BIOS_VERSION, "2.0a"),
},
},
/* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */
{
.callback = set_ignore_seg,
.ident = "HP xw9300",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"),
},
},
{}
};
void __init pci_acpi_crs_quirks(void)
{
int year = dmi_get_bios_year();
if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff)
pci_use_crs = false;
dmi_check_system(pci_crs_quirks);
/*
* If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
* takes precedence over anything we figured out above.
*/
if (pci_probe & PCI_ROOT_NO_CRS)
pci_use_crs = false;
else if (pci_probe & PCI_USE__CRS)
pci_use_crs = true;
printk(KERN_INFO "PCI: %s host bridge windows from ACPI; "
"if necessary, use \"pci=%s\" and report a bug\n",
pci_use_crs ? "Using" : "Ignoring",
pci_use_crs ? "nocrs" : "use_crs");
/* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
if (pci_probe & PCI_NO_E820)
pci_use_e820 = false;
else if (pci_probe & PCI_USE_E820)
pci_use_e820 = true;
printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n",
pci_use_e820 ? "Using" : "Ignoring");
if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n");
}
#ifdef CONFIG_PCI_MMCONFIG
static int check_segment(u16 seg, struct device *dev, char *estr)
{
if (seg) {
dev_err(dev,
"%s can't access PCI configuration "
"space under this host bridge.\n",
estr);
return -EIO;
}
/*
* Failure in adding MMCFG information is not fatal,
* just can't access extended configuration space of
* devices under this host bridge.
*/
dev_warn(dev,
"%s can't access extended PCI configuration "
"space under this bridge.\n",
estr);
return 0;
}
static int setup_mcfg_map(struct acpi_pci_root_info *ci)
{
int result, seg;
struct pci_root_info *info;
struct acpi_pci_root *root = ci->root;
struct device *dev = &ci->bridge->dev;
info = container_of(ci, struct pci_root_info, common);
info->start_bus = (u8)root->secondary.start;
info->end_bus = (u8)root->secondary.end;
info->mcfg_added = false;
seg = info->sd.domain;
/* return success if MMCFG is not in use */
if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg)
return 0;
if (!(pci_probe & PCI_PROBE_MMCONF))
return check_segment(seg, dev, "MMCONFIG is disabled,");
result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus,
root->mcfg_addr);
if (result == 0) {
/* enable MMCFG if it hasn't been enabled yet */
if (raw_pci_ext_ops == NULL)
raw_pci_ext_ops = &pci_mmcfg;
info->mcfg_added = true;
} else if (result != -EEXIST)
return check_segment(seg, dev,
"fail to add MMCONFIG information,");
return 0;
}
static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
{
struct pci_root_info *info;
info = container_of(ci, struct pci_root_info, common);
if (info->mcfg_added) {
pci_mmconfig_delete(info->sd.domain,
info->start_bus, info->end_bus);
info->mcfg_added = false;
}
}
#else
static int setup_mcfg_map(struct acpi_pci_root_info *ci)
{
return 0;
}
static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
{
}
#endif
static int pci_acpi_root_get_node(struct acpi_pci_root *root)
{
int busnum = root->secondary.start;
struct acpi_device *device = root->device;
int node = acpi_get_node(device->handle);
if (node == NUMA_NO_NODE) {
node = x86_pci_root_bus_node(busnum);
if (node != 0 && node != NUMA_NO_NODE)
dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
node);
}
if (node != NUMA_NO_NODE && !node_online(node))
node = NUMA_NO_NODE;
return node;
}
static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci)
{
return setup_mcfg_map(ci);
}
static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci)
{
teardown_mcfg_map(ci);
kfree(container_of(ci, struct pci_root_info, common));
}
/*
* An IO port or MMIO resource assigned to a PCI host bridge may be
* consumed by the host bridge itself or available to its child
* bus/devices. The ACPI specification defines a bit (Producer/Consumer)
* to tell whether the resource is consumed by the host bridge itself,
* but firmware hasn't used that bit consistently, so we can't rely on it.
*
* On x86 and IA64 platforms, all IO port and MMIO resources are assumed
* to be available to child bus/devices except one special case:
* IO port [0xCF8-0xCFF] is consumed by the host bridge itself
* to access PCI configuration space.
*
* So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
*/
static bool resource_is_pcicfg_ioport(struct resource *res)
{
return (res->flags & IORESOURCE_IO) &&
res->start == 0xCF8 && res->end == 0xCFF;
}
static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
{
struct acpi_device *device = ci->bridge;
int busnum = ci->root->secondary.start;
struct resource_entry *entry, *tmp;
int status;
status = acpi_pci_probe_root_resources(ci);
if (pci_use_e820) {
resource_list_for_each_entry(entry, &ci->resources)
remove_e820_regions(&device->dev, entry->res);
}
if (pci_use_crs) {
resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
if (resource_is_pcicfg_ioport(entry->res))
resource_list_destroy_entry(entry);
return status;
}
resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
dev_printk(KERN_DEBUG, &device->dev,
"host bridge window %pR (ignored)\n", entry->res);
resource_list_destroy_entry(entry);
}
x86_pci_root_bus_resources(busnum, &ci->resources);
return 0;
}
static struct acpi_pci_root_ops acpi_pci_root_ops = {
.pci_ops = &pci_root_ops,
.init_info = pci_acpi_root_init_info,
.release_info = pci_acpi_root_release_info,
.prepare_resources = pci_acpi_root_prepare_resources,
};
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
int domain = root->segment;
int busnum = root->secondary.start;
int node = pci_acpi_root_get_node(root);
struct pci_bus *bus;
if (pci_ignore_seg)
root->segment = domain = 0;
if (domain && !pci_domains_supported) {
printk(KERN_WARNING "pci_bus %04x:%02x: "
"ignored (multiple domains not supported)\n",
domain, busnum);
return NULL;
}
bus = pci_find_bus(domain, busnum);
if (bus) {
/*
* If the desired bus has been scanned already, replace
* its bus->sysdata.
*/
struct pci_sysdata sd = {
.domain = domain,
.node = node,
.companion = root->device
};
memcpy(bus->sysdata, &sd, sizeof(sd));
} else {
struct pci_root_info *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
dev_err(&root->device->dev,
"pci_bus %04x:%02x: ignored (out of memory)\n",
domain, busnum);
else {
info->sd.domain = domain;
info->sd.node = node;
info->sd.companion = root->device;
bus = acpi_pci_root_create(root, &acpi_pci_root_ops,
&info->common, &info->sd);
}
}
/* After the PCI-E bus has been walked and all devices discovered,
* configure any settings of the fabric that might be necessary.
*/
if (bus) {
struct pci_bus *child;
list_for_each_entry(child, &bus->children, node)
pcie_bus_configure_settings(child);
}
return bus;
}
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
/*
* We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
* here, pci_create_root_bus() has been called by someone else and
* sysdata is likely to be different from what we expect. Let it go in
* that case.
*/
if (!bridge->dev.parent) {
struct pci_sysdata *sd = bridge->bus->sysdata;
ACPI_COMPANION_SET(&bridge->dev, sd->companion);
}
return 0;
}
int __init pci_acpi_init(void)
{
struct pci_dev *dev = NULL;
if (acpi_noirq)
return -ENODEV;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
acpi_irq_penalty_init();
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
x86_init.pci.init_irq = x86_init_noop;
if (pci_routeirq) {
/*
* PCI IRQ routing is set up by pci_enable_device(), but we
* also do it here in case there are still broken drivers that
* don't use pci_enable_device().
*/
printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
for_each_pci_dev(dev)
acpi_pci_irq_enable(dev);
}
return 0;
}