mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge branch 'for-6.19/cxl-misc' into cxl-for-next
Misc patches for CXL 6.19 - Remove incorrect page-allocator quirk section in documentation. - Remove unused devm_cxl_port_enumerate_dports() function. - Fix typo in cdat.c code comment. - Replace use of system_wq with system_percpu_wq - Add locked decoder support - Return when generic target updated - Rename region_res_match_cxl_range() to spa_maps_hpa() - Clarify comment in spa_maps_hpa()
This commit is contained in:
@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
|
||||
will fallback to allocate from :code:`ZONE_NORMAL`.
|
||||
|
||||
|
||||
Zone and Node Quirks
|
||||
====================
|
||||
Let's consider a configuration where the local DRAM capacity is largely onlined
|
||||
into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
|
||||
CXL capacity has the opposite configuration - all onlined in
|
||||
:code:`ZONE_MOVABLE`.
|
||||
|
||||
Under the default allocation policy, the page allocator will completely skip
|
||||
:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of
|
||||
Linux v6.15, the page allocator does (approximately) the following: ::
|
||||
|
||||
for (each zone in local_node):
|
||||
|
||||
for (each node in fallback_order):
|
||||
|
||||
attempt_allocation(gfp_flags);
|
||||
|
||||
Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
|
||||
functionally unreachable for direct allocation. As a result, the only way
|
||||
for CXL capacity to be used is via `demotion` in the reclaim path.
|
||||
|
||||
This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
|
||||
capacity - when that capacity is depleted, the page allocator will actually
|
||||
prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.
|
||||
|
||||
We may wish to invert this priority in future Linux versions.
|
||||
|
||||
If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
|
||||
when the DRAM nodes are depleted. See the reclaim section for more details.
|
||||
|
||||
|
||||
CGroups and CPUSets
|
||||
===================
|
||||
Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined
|
||||
|
||||
@@ -888,12 +888,13 @@ static void hmat_register_target(struct memory_target *target)
|
||||
* Register generic port perf numbers. The nid may not be
|
||||
* initialized and is still NUMA_NO_NODE.
|
||||
*/
|
||||
mutex_lock(&target_lock);
|
||||
if (*(u16 *)target->gen_port_device_handle) {
|
||||
hmat_update_generic_target(target);
|
||||
target->registered = true;
|
||||
scoped_guard(mutex, &target_lock) {
|
||||
if (*(u16 *)target->gen_port_device_handle) {
|
||||
hmat_update_generic_target(target);
|
||||
target->registered = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&target_lock);
|
||||
|
||||
/*
|
||||
* Skip offline nodes. This can happen when memory
|
||||
|
||||
@@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
|
||||
cxl_coordinates_combine(coords, coords, ctx->coord);
|
||||
|
||||
/*
|
||||
* Take the min of the calculated bandwdith and the upstream
|
||||
* Take the min of the calculated bandwidth and the upstream
|
||||
* switch SSLBIS bandwidth if there's a parent switch
|
||||
*/
|
||||
if (!is_root)
|
||||
@@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
|
||||
/**
|
||||
* cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
|
||||
* @cxlr: The region being operated on
|
||||
* @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
|
||||
* @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
|
||||
*/
|
||||
static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
|
||||
struct xarray *input_xa)
|
||||
|
||||
@@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
|
||||
return;
|
||||
|
||||
if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
|
||||
return;
|
||||
|
||||
if (port->commit_end == id)
|
||||
cxl_port_commit_reap(cxld);
|
||||
else
|
||||
|
||||
@@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
|
||||
|
||||
struct cxl_walk_context {
|
||||
struct pci_bus *bus;
|
||||
struct cxl_port *port;
|
||||
int type;
|
||||
int error;
|
||||
int count;
|
||||
};
|
||||
|
||||
static int match_add_dports(struct pci_dev *pdev, void *data)
|
||||
{
|
||||
struct cxl_walk_context *ctx = data;
|
||||
struct cxl_port *port = ctx->port;
|
||||
int type = pci_pcie_type(pdev);
|
||||
struct cxl_register_map map;
|
||||
struct cxl_dport *dport;
|
||||
u32 lnkcap, port_num;
|
||||
int rc;
|
||||
|
||||
if (pdev->bus != ctx->bus)
|
||||
return 0;
|
||||
if (!pci_is_pcie(pdev))
|
||||
return 0;
|
||||
if (type != ctx->type)
|
||||
return 0;
|
||||
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
|
||||
&lnkcap))
|
||||
return 0;
|
||||
|
||||
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
|
||||
if (rc)
|
||||
dev_dbg(&port->dev, "failed to find component registers\n");
|
||||
|
||||
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
|
||||
dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
|
||||
if (IS_ERR(dport)) {
|
||||
ctx->error = PTR_ERR(dport);
|
||||
return PTR_ERR(dport);
|
||||
}
|
||||
ctx->count++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
|
||||
* @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
|
||||
*
|
||||
* Returns a positive number of dports enumerated or a negative error
|
||||
* code.
|
||||
*/
|
||||
int devm_cxl_port_enumerate_dports(struct cxl_port *port)
|
||||
{
|
||||
struct pci_bus *bus = cxl_port_to_pci_bus(port);
|
||||
struct cxl_walk_context ctx;
|
||||
int type;
|
||||
|
||||
if (!bus)
|
||||
return -ENXIO;
|
||||
|
||||
if (pci_is_root_bus(bus))
|
||||
type = PCI_EXP_TYPE_ROOT_PORT;
|
||||
else
|
||||
type = PCI_EXP_TYPE_DOWNSTREAM;
|
||||
|
||||
ctx = (struct cxl_walk_context) {
|
||||
.port = port,
|
||||
.bus = bus,
|
||||
.type = type,
|
||||
};
|
||||
pci_walk_bus(bus, match_add_dports, &ctx);
|
||||
|
||||
if (ctx.count == 0)
|
||||
return -ENODEV;
|
||||
if (ctx.error)
|
||||
return ctx.error;
|
||||
return ctx.count;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
|
||||
|
||||
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
|
||||
@@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cxl_walk_context {
|
||||
struct pci_bus *bus;
|
||||
struct cxl_port *port;
|
||||
int type;
|
||||
int error;
|
||||
int count;
|
||||
};
|
||||
|
||||
static int count_dports(struct pci_dev *pdev, void *data)
|
||||
{
|
||||
struct cxl_walk_context *ctx = data;
|
||||
|
||||
@@ -245,6 +245,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
int i;
|
||||
|
||||
if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Before region teardown attempt to flush, evict any data cached for
|
||||
* this region, or scream loudly about missing arch / platform support
|
||||
@@ -419,6 +422,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
|
||||
return len;
|
||||
}
|
||||
|
||||
if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
|
||||
return -EPERM;
|
||||
|
||||
rc = queue_reset(cxlr);
|
||||
if (rc)
|
||||
return rc;
|
||||
@@ -838,16 +844,16 @@ static int match_free_decoder(struct device *dev, const void *data)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool region_res_match_cxl_range(const struct cxl_region_params *p,
|
||||
const struct range *range)
|
||||
static bool spa_maps_hpa(const struct cxl_region_params *p,
|
||||
const struct range *range)
|
||||
{
|
||||
if (!p->res)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If an extended linear cache region then the CXL range is assumed
|
||||
* to be fronted by the DRAM range in current known implementation.
|
||||
* This assumption will be made until a variant implementation exists.
|
||||
* The extended linear cache region is constructed by a 1:1 ratio
|
||||
* where the SPA maps equal amounts of DRAM and CXL HPA capacity with
|
||||
* CXL decoders at the high end of the SPA range.
|
||||
*/
|
||||
return p->res->start + p->cache_size == range->start &&
|
||||
p->res->end == range->end;
|
||||
@@ -865,7 +871,7 @@ static int match_auto_decoder(struct device *dev, const void *data)
|
||||
cxld = to_cxl_decoder(dev);
|
||||
r = &cxld->hpa_range;
|
||||
|
||||
if (region_res_match_cxl_range(p, r))
|
||||
if (spa_maps_hpa(p, r))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@@ -1059,6 +1065,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cxl_region_set_lock(struct cxl_region *cxlr,
|
||||
struct cxl_decoder *cxld)
|
||||
{
|
||||
if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
|
||||
return;
|
||||
|
||||
set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
|
||||
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* cxl_port_attach_region() - track a region's interest in a port by endpoint
|
||||
* @port: port to add a new region reference 'struct cxl_region_ref'
|
||||
@@ -1170,6 +1186,8 @@ static int cxl_port_attach_region(struct cxl_port *port,
|
||||
}
|
||||
}
|
||||
|
||||
cxl_region_set_lock(cxlr, cxld);
|
||||
|
||||
rc = cxl_rr_ep_add(cxl_rr, cxled);
|
||||
if (rc) {
|
||||
dev_dbg(&cxlr->dev,
|
||||
@@ -1465,7 +1483,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
|
||||
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
|
||||
if (cxld->interleave_ways != iw ||
|
||||
(iw > 1 && cxld->interleave_granularity != ig) ||
|
||||
!region_res_match_cxl_range(p, &cxld->hpa_range) ||
|
||||
!spa_maps_hpa(p, &cxld->hpa_range) ||
|
||||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
|
||||
dev_err(&cxlr->dev,
|
||||
"%s:%s %s expected iw: %d ig: %d %pr\n",
|
||||
@@ -2439,6 +2457,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
|
||||
dev->bus = &cxl_bus_type;
|
||||
dev->type = &cxl_region_type;
|
||||
cxlr->id = id;
|
||||
cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
|
||||
|
||||
return cxlr;
|
||||
}
|
||||
@@ -3398,7 +3417,7 @@ static int match_region_by_range(struct device *dev, const void *data)
|
||||
p = &cxlr->params;
|
||||
|
||||
guard(rwsem_read)(&cxl_rwsem.region);
|
||||
return region_res_match_cxl_range(p, r);
|
||||
return spa_maps_hpa(p, r);
|
||||
}
|
||||
|
||||
static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
|
||||
|
||||
@@ -517,6 +517,14 @@ enum cxl_partition_mode {
|
||||
*/
|
||||
#define CXL_REGION_F_NEEDS_RESET 1
|
||||
|
||||
/*
|
||||
* Indicate whether this region is locked due to 1 or more decoders that have
|
||||
* been locked. The approach of all or nothing is taken with regard to the
|
||||
* locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is
|
||||
* set.
|
||||
*/
|
||||
#define CXL_REGION_F_LOCK 2
|
||||
|
||||
/**
|
||||
* struct cxl_region - CXL region
|
||||
* @dev: This region's device
|
||||
|
||||
@@ -127,7 +127,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
|
||||
return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
|
||||
}
|
||||
|
||||
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
|
||||
struct cxl_dev_state;
|
||||
void read_cdat_data(struct cxl_port *port);
|
||||
void cxl_cor_error_detected(struct pci_dev *pdev);
|
||||
|
||||
@@ -136,7 +136,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
|
||||
if (opcode == CXL_MBOX_OP_SANITIZE) {
|
||||
mutex_lock(&cxl_mbox->mbox_mutex);
|
||||
if (mds->security.sanitize_node)
|
||||
mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
|
||||
mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0);
|
||||
mutex_unlock(&cxl_mbox->mbox_mutex);
|
||||
} else {
|
||||
/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
|
||||
|
||||
@@ -4,7 +4,6 @@ ldflags-y += --wrap=is_acpi_device_node
|
||||
ldflags-y += --wrap=acpi_evaluate_integer
|
||||
ldflags-y += --wrap=acpi_pci_find_root
|
||||
ldflags-y += --wrap=nvdimm_bus_register
|
||||
ldflags-y += --wrap=devm_cxl_port_enumerate_dports
|
||||
ldflags-y += --wrap=cxl_await_media_ready
|
||||
ldflags-y += --wrap=devm_cxl_add_rch_dport
|
||||
ldflags-y += --wrap=cxl_rcd_component_reg_phys
|
||||
|
||||
@@ -995,37 +995,6 @@ static int get_port_array(struct cxl_port *port,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
|
||||
{
|
||||
struct platform_device **array;
|
||||
int i, array_size;
|
||||
int rc;
|
||||
|
||||
rc = get_port_array(port, &array, &array_size);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for (i = 0; i < array_size; i++) {
|
||||
struct platform_device *pdev = array[i];
|
||||
struct cxl_dport *dport;
|
||||
|
||||
if (pdev->dev.parent != port->uport_dev) {
|
||||
dev_dbg(&port->dev, "%s: mismatch parent %s\n",
|
||||
dev_name(port->uport_dev),
|
||||
dev_name(pdev->dev.parent));
|
||||
continue;
|
||||
}
|
||||
|
||||
dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
|
||||
CXL_RESOURCE_NONE);
|
||||
|
||||
if (IS_ERR(dport))
|
||||
return PTR_ERR(dport);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
|
||||
struct device *dport_dev)
|
||||
{
|
||||
@@ -1114,7 +1083,6 @@ static struct cxl_mock_ops cxl_mock_ops = {
|
||||
.acpi_pci_find_root = mock_acpi_pci_find_root,
|
||||
.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
|
||||
.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
|
||||
.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
|
||||
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
|
||||
.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
|
||||
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
|
||||
|
||||
@@ -172,21 +172,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
|
||||
|
||||
int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
|
||||
{
|
||||
int rc, index;
|
||||
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
|
||||
|
||||
if (ops && ops->is_mock_port(port->uport_dev))
|
||||
rc = ops->devm_cxl_port_enumerate_dports(port);
|
||||
else
|
||||
rc = devm_cxl_port_enumerate_dports(port);
|
||||
put_cxl_mock_ops(index);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
|
||||
|
||||
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
|
||||
{
|
||||
int rc, index;
|
||||
|
||||
@@ -19,7 +19,6 @@ struct cxl_mock_ops {
|
||||
bool (*is_mock_bus)(struct pci_bus *bus);
|
||||
bool (*is_mock_port)(struct device *dev);
|
||||
bool (*is_mock_dev)(struct device *dev);
|
||||
int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
|
||||
int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
|
||||
int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
|
||||
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
|
||||
|
||||
Reference in New Issue
Block a user