Merge branch 'for-6.19/cxl-misc' into cxl-for-next

Misc patches for CXL 6.19
- Remove incorrect page-allocator quirk section in documentation.
- Remove unused devm_cxl_port_enumerate_dports() function.
- Fix typo in cdat.c code comment.
- Replace use of system_wq with system_percpu_wq
- Add locked decoder support
- Return when generic target updated
- Rename region_res_match_cxl_range() to spa_maps_hpa()
- Clarify comment in spa_maps_hpa()
This commit is contained in:
Dave Jiang
2025-11-13 08:42:46 -07:00
13 changed files with 55 additions and 176 deletions

View File

@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
will fallback to allocate from :code:`ZONE_NORMAL`.
Zone and Node Quirks
====================
Let's consider a configuration where the local DRAM capacity is largely onlined
into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
CXL capacity has the opposite configuration - all onlined in
:code:`ZONE_MOVABLE`.
Under the default allocation policy, the page allocator will completely skip
:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of
Linux v6.15, the page allocator does (approximately) the following: ::
for (each zone in local_node):
for (each node in fallback_order):
attempt_allocation(gfp_flags);
Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
functionally unreachable for direct allocation. As a result, the only way
for CXL capacity to be used is via `demotion` in the reclaim path.
This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
capacity - when that capacity is depleted, the page allocator will actually
prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.
We may wish to invert this priority in future Linux versions.
If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
when the DRAM nodes are depleted. See the reclaim section for more details.
CGroups and CPUSets
===================
Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined

View File

@@ -888,12 +888,13 @@ static void hmat_register_target(struct memory_target *target)
* Register generic port perf numbers. The nid may not be
* initialized and is still NUMA_NO_NODE.
*/
mutex_lock(&target_lock);
if (*(u16 *)target->gen_port_device_handle) {
hmat_update_generic_target(target);
target->registered = true;
scoped_guard(mutex, &target_lock) {
if (*(u16 *)target->gen_port_device_handle) {
hmat_update_generic_target(target);
target->registered = true;
return;
}
}
mutex_unlock(&target_lock);
/*
* Skip offline nodes. This can happen when memory

View File

@@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
cxl_coordinates_combine(coords, coords, ctx->coord);
/*
* Take the min of the calculated bandwdith and the upstream
* Take the min of the calculated bandwidth and the upstream
* switch SSLBIS bandwidth if there's a parent switch
*/
if (!is_root)
@@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
/**
* cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
* @cxlr: The region being operated on
* @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
* @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
*/
static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
struct xarray *input_xa)

View File

@@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
return;
if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
return;
if (port->commit_end == id)
cxl_port_commit_reap(cxld);
else

View File

@@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
struct cxl_walk_context {
struct pci_bus *bus;
struct cxl_port *port;
int type;
int error;
int count;
};
static int match_add_dports(struct pci_dev *pdev, void *data)
{
struct cxl_walk_context *ctx = data;
struct cxl_port *port = ctx->port;
int type = pci_pcie_type(pdev);
struct cxl_register_map map;
struct cxl_dport *dport;
u32 lnkcap, port_num;
int rc;
if (pdev->bus != ctx->bus)
return 0;
if (!pci_is_pcie(pdev))
return 0;
if (type != ctx->type)
return 0;
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
&lnkcap))
return 0;
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
if (rc)
dev_dbg(&port->dev, "failed to find component registers\n");
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
if (IS_ERR(dport)) {
ctx->error = PTR_ERR(dport);
return PTR_ERR(dport);
}
ctx->count++;
return 0;
}
/**
* devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
* @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
*
* Returns a positive number of dports enumerated or a negative error
* code.
*/
int devm_cxl_port_enumerate_dports(struct cxl_port *port)
{
struct pci_bus *bus = cxl_port_to_pci_bus(port);
struct cxl_walk_context ctx;
int type;
if (!bus)
return -ENXIO;
if (pci_is_root_bus(bus))
type = PCI_EXP_TYPE_ROOT_PORT;
else
type = PCI_EXP_TYPE_DOWNSTREAM;
ctx = (struct cxl_walk_context) {
.port = port,
.bus = bus,
.type = type,
};
pci_walk_bus(bus, match_add_dports, &ctx);
if (ctx.count == 0)
return -ENODEV;
if (ctx.error)
return ctx.error;
return ctx.count;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
{
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
@@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
return 0;
}
struct cxl_walk_context {
struct pci_bus *bus;
struct cxl_port *port;
int type;
int error;
int count;
};
static int count_dports(struct pci_dev *pdev, void *data)
{
struct cxl_walk_context *ctx = data;

View File

@@ -245,6 +245,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
struct cxl_region_params *p = &cxlr->params;
int i;
if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
return;
/*
* Before region teardown attempt to flush, evict any data cached for
* this region, or scream loudly about missing arch / platform support
@@ -419,6 +422,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
return len;
}
if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
return -EPERM;
rc = queue_reset(cxlr);
if (rc)
return rc;
@@ -838,16 +844,16 @@ static int match_free_decoder(struct device *dev, const void *data)
return 1;
}
static bool region_res_match_cxl_range(const struct cxl_region_params *p,
const struct range *range)
static bool spa_maps_hpa(const struct cxl_region_params *p,
const struct range *range)
{
if (!p->res)
return false;
/*
* If an extended linear cache region then the CXL range is assumed
* to be fronted by the DRAM range in current known implementation.
* This assumption will be made until a variant implementation exists.
* The extended linear cache region is constructed by a 1:1 ratio
* where the SPA maps equal amounts of DRAM and CXL HPA capacity with
* CXL decoders at the high end of the SPA range.
*/
return p->res->start + p->cache_size == range->start &&
p->res->end == range->end;
@@ -865,7 +871,7 @@ static int match_auto_decoder(struct device *dev, const void *data)
cxld = to_cxl_decoder(dev);
r = &cxld->hpa_range;
if (region_res_match_cxl_range(p, r))
if (spa_maps_hpa(p, r))
return 1;
return 0;
@@ -1059,6 +1065,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
return 0;
}
static void cxl_region_set_lock(struct cxl_region *cxlr,
struct cxl_decoder *cxld)
{
if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
return;
set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
}
/**
* cxl_port_attach_region() - track a region's interest in a port by endpoint
* @port: port to add a new region reference 'struct cxl_region_ref'
@@ -1170,6 +1186,8 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
}
cxl_region_set_lock(cxlr, cxld);
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
dev_dbg(&cxlr->dev,
@@ -1465,7 +1483,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
(iw > 1 && cxld->interleave_granularity != ig) ||
!region_res_match_cxl_range(p, &cxld->hpa_range) ||
!spa_maps_hpa(p, &cxld->hpa_range) ||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
dev_err(&cxlr->dev,
"%s:%s %s expected iw: %d ig: %d %pr\n",
@@ -2439,6 +2457,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
dev->bus = &cxl_bus_type;
dev->type = &cxl_region_type;
cxlr->id = id;
cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
return cxlr;
}
@@ -3398,7 +3417,7 @@ static int match_region_by_range(struct device *dev, const void *data)
p = &cxlr->params;
guard(rwsem_read)(&cxl_rwsem.region);
return region_res_match_cxl_range(p, r);
return spa_maps_hpa(p, r);
}
static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,

View File

@@ -517,6 +517,14 @@ enum cxl_partition_mode {
*/
#define CXL_REGION_F_NEEDS_RESET 1
/*
* Indicate whether this region is locked due to 1 or more decoders that have
* been locked. The approach of all or nothing is taken with regard to the
* locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is
* set.
*/
#define CXL_REGION_F_LOCK 2
/**
* struct cxl_region - CXL region
* @dev: This region's device

View File

@@ -127,7 +127,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
}
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
void read_cdat_data(struct cxl_port *port);
void cxl_cor_error_detected(struct pci_dev *pdev);

View File

@@ -136,7 +136,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
if (opcode == CXL_MBOX_OP_SANITIZE) {
mutex_lock(&cxl_mbox->mbox_mutex);
if (mds->security.sanitize_node)
mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0);
mutex_unlock(&cxl_mbox->mbox_mutex);
} else {
/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */

View File

@@ -4,7 +4,6 @@ ldflags-y += --wrap=is_acpi_device_node
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys

View File

@@ -995,37 +995,6 @@ static int get_port_array(struct cxl_port *port,
return 0;
}
static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
{
struct platform_device **array;
int i, array_size;
int rc;
rc = get_port_array(port, &array, &array_size);
if (rc)
return rc;
for (i = 0; i < array_size; i++) {
struct platform_device *pdev = array[i];
struct cxl_dport *dport;
if (pdev->dev.parent != port->uport_dev) {
dev_dbg(&port->dev, "%s: mismatch parent %s\n",
dev_name(port->uport_dev),
dev_name(pdev->dev.parent));
continue;
}
dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
CXL_RESOURCE_NONE);
if (IS_ERR(dport))
return PTR_ERR(dport);
}
return 0;
}
static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
struct device *dport_dev)
{
@@ -1114,7 +1083,6 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_pci_find_root = mock_acpi_pci_find_root,
.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),

View File

@@ -172,21 +172,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
rc = ops->devm_cxl_port_enumerate_dports(port);
else
rc = devm_cxl_port_enumerate_dports(port);
put_cxl_mock_ops(index);
return rc;
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
{
int rc, index;

View File

@@ -19,7 +19,6 @@ struct cxl_mock_ops {
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);