Files
linux/drivers/cxl/core/hdm.c
Dave Jiang 2230c4bdc4 cxl: Add handling of locked CXL decoder
When a decoder is locked, it means that its configuration cannot be
changed. CXL spec r3.2 8.2.4.20.13 discusses the details regarding
locked decoders. Locking happens when bit 8 of the decoder control
register is set and then the decoder is committed afterwards (CXL
spec r3.2 8.2.4.20.7).

Given that the driver creates a virtual decoder for each CFMWS, the
Fixed Device Configuration (bit 4) of the Window Restriction field is
considered as locking for the virtual decoder by the driver.

The current driver code disregards the locked status and a region can
be destroyed regardless of the locking state.

Add a region flag to indicate the region is in a locked configuration.
The driver will considered a region locked if the CFMWS or any decoder
is configured as locked. The consideration is all or nothing regarding
the locked state. It is reasonable to determine the region "locked"
status while the region is being assembled based on the decoders.

Add a check in region commit_store() to intercept when a 0 is written
to the commit sysfs attribute in order to prevent the destruction of a
region when in locked state. This should be the only entry point from user
space to destroy a region.

Add a check is added to cxl_decoder_reset() to prevent resetting a locked
decoder within the kernel driver.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251105201826.2901915-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-11-12 13:17:19 -07:00

1288 lines
35 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/delay.h>
#include "cxlmem.h"
#include "core.h"
/**
* DOC: cxl core hdm
*
* Compute Express Link Host Managed Device Memory, starting with the
* CXL 2.0 specification, is managed by an array of HDM Decoder register
* instances per CXL port and per CXL endpoint. Define common helpers
* for enumerating these registers and capabilities.
*/
struct cxl_rwsem cxl_rwsem = {
.region = __RWSEM_INITIALIZER(cxl_rwsem.region),
.dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa),
};
static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld)
{
int rc;
rc = cxl_decoder_add_locked(cxld);
if (rc) {
put_device(&cxld->dev);
dev_err(&port->dev, "Failed to add decoder\n");
return rc;
}
rc = cxl_decoder_autoremove(&port->dev, cxld);
if (rc)
return rc;
dev_dbg(port->uport_dev, "%s added to %s\n",
dev_name(&cxld->dev), dev_name(&port->dev));
return 0;
}
/*
* Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
* single ported host-bridges need not publish a decoder capability when a
* passthrough decode can be assumed, i.e. all transactions that the uport sees
* are claimed and passed to the single dport. Disable the range until the first
* CXL region is enumerated / activated.
*/
static int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
{
struct cxl_switch_decoder *cxlsd;
struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
/*
* Capability checks are moot for passthrough decoders, support
* any and all possibilities.
*/
cxlhdm->interleave_mask = ~0U;
cxlhdm->iw_cap_mask = ~0UL;
cxlsd = cxl_switch_decoder_alloc(port, 1);
if (IS_ERR(cxlsd))
return PTR_ERR(cxlsd);
device_lock_assert(&port->dev);
return add_hdm_decoder(port, &cxlsd->cxld);
}
static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
{
u32 hdm_cap;
hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap);
cxlhdm->target_count =
FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap))
cxlhdm->interleave_mask |= GENMASK(11, 8);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
cxlhdm->interleave_mask |= GENMASK(14, 12);
cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
cxlhdm->iw_cap_mask |= BIT(16);
}
static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm;
void __iomem *hdm;
u32 ctrl;
int i;
if (!info)
return false;
cxlhdm = dev_get_drvdata(&info->port->dev);
hdm = cxlhdm->regs.hdm_decoder;
if (!hdm)
return true;
/*
* If HDM decoders are present and the driver is in control of
* Mem_Enable skip DVSEC based emulation
*/
if (!info->mem_enabled)
return false;
/*
* If any decoders are committed already, there should not be any
* emulated DVSEC decoders.
*/
for (i = 0; i < cxlhdm->decoder_count; i++) {
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
dev_dbg(&info->port->dev,
"decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n",
info->port->id, i,
FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl),
readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)),
readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)),
readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)),
readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i)));
if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
return false;
}
return true;
}
/**
* devm_cxl_setup_hdm - map HDM decoder component registers
* @port: cxl_port to map
* @info: cached DVSEC range register info
*/
static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct cxl_register_map *reg_map = &port->reg_map;
struct device *dev = &port->dev;
struct cxl_hdm *cxlhdm;
int rc;
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
dev_set_drvdata(dev, cxlhdm);
/* Memory devices can configure device HDM using DVSEC range regs. */
if (reg_map->resource == CXL_RESOURCE_NONE) {
if (!info || !info->mem_enabled) {
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
cxlhdm->decoder_count = info->ranges;
return cxlhdm;
}
if (!reg_map->component_map.hdm_decoder.valid) {
dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
/* unique error code to indicate no HDM decoder capability */
return ERR_PTR(-ENODEV);
}
rc = cxl_map_component_regs(reg_map, &cxlhdm->regs,
BIT(CXL_CM_CAP_CAP_ID_HDM));
if (rc) {
dev_err(dev, "Failed to map HDM capability.\n");
return ERR_PTR(rc);
}
parse_hdm_decoder_caps(cxlhdm);
if (cxlhdm->decoder_count == 0) {
dev_err(dev, "Spec violation. Caps invalid\n");
return ERR_PTR(-ENXIO);
}
/*
* Now that the hdm capability is parsed, decide if range
* register emulation is needed and fixup cxlhdm accordingly.
*/
if (should_emulate_decoders(info)) {
dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges,
str_plural(info->ranges));
cxlhdm->decoder_count = info->ranges;
}
return cxlhdm;
}
static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth)
{
unsigned long long start = r->start, end = r->end;
seq_printf(file, "%*s%08llx-%08llx : %s\n", depth * 2, "", start, end,
r->name);
}
void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
{
struct resource *p1, *p2;
guard(rwsem_read)(&cxl_rwsem.dpa);
for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
__cxl_dpa_debug(file, p1, 0);
for (p2 = p1->child; p2; p2 = p2->sibling)
__cxl_dpa_debug(file, p2, 1);
}
}
EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL");
/* See request_skip() kernel-doc */
static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds,
const resource_size_t skip_base,
const resource_size_t skip_len,
const char *requester)
{
const resource_size_t skip_end = skip_base + skip_len - 1;
for (int i = 0; i < cxlds->nr_partitions; i++) {
const struct resource *part_res = &cxlds->part[i].res;
resource_size_t adjust_start, adjust_end, size;
adjust_start = max(skip_base, part_res->start);
adjust_end = min(skip_end, part_res->end);
if (adjust_end < adjust_start)
continue;
size = adjust_end - adjust_start + 1;
if (!requester)
__release_region(&cxlds->dpa_res, adjust_start, size);
else if (!__request_region(&cxlds->dpa_res, adjust_start, size,
requester, 0))
return adjust_start - skip_base;
}
return skip_len;
}
#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL)
/*
* Must be called in a context that synchronizes against this decoder's
* port ->remove() callback (like an endpoint decoder sysfs attribute)
*/
static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct resource *res = cxled->dpa_res;
resource_size_t skip_start;
lockdep_assert_held_write(&cxl_rwsem.dpa);
/* save @skip_start, before @res is released */
skip_start = res->start - cxled->skip;
__release_region(&cxlds->dpa_res, res->start, resource_size(res));
if (cxled->skip)
release_skip(cxlds, skip_start, cxled->skip);
cxled->skip = 0;
cxled->dpa_res = NULL;
put_device(&cxled->cxld.dev);
port->hdm_end--;
}
static void cxl_dpa_release(void *cxled)
{
guard(rwsem_write)(&cxl_rwsem.dpa);
__cxl_dpa_release(cxled);
}
/*
* Must be called from context that will not race port device
* unregistration, like decoder sysfs attribute methods
*/
static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
lockdep_assert_held_write(&cxl_rwsem.dpa);
devm_remove_action(&port->dev, cxl_dpa_release, cxled);
__cxl_dpa_release(cxled);
}
/**
* request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree
* @cxlds: CXL.mem device context that parents @cxled
* @cxled: Endpoint decoder establishing new allocation that skips lower DPA
* @skip_base: DPA < start of new DPA allocation (DPAnew)
* @skip_len: @skip_base + @skip_len == DPAnew
*
* DPA 'skip' arises from out-of-sequence DPA allocation events relative
* to free capacity across multiple partitions. It is a wasteful event
* as usable DPA gets thrown away, but if a deployment has, for example,
* a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM
* DPA, the free RAM DPA must be sacrificed to start allocating PMEM.
* See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder
* Protection" for more details.
*
* A 'skip' always covers the last allocated DPA in a previous partition
* to the start of the current partition to allocate. Allocations never
* start in the middle of a partition, and allocations are always
* de-allocated in reverse order (see cxl_dpa_free(), or natural devm
* unwind order from forced in-order allocation).
*
* If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip'
* would always be contained to a single partition. Given
* @cxlds->nr_partitions may be > 2 it results in cases where the 'skip'
* might span "tail capacity of partition[0], all of partition[1], ...,
* all of partition[N-1]" to support allocating from partition[N]. That
* in turn interacts with the partition 'struct resource' boundaries
* within @cxlds->dpa_res whereby 'skip' requests need to be divided by
* partition. I.e. this is a quirk of using a 'struct resource' tree to
* detect range conflicts while also tracking partition boundaries in
* @cxlds->dpa_res.
*/
static int request_skip(struct cxl_dev_state *cxlds,
struct cxl_endpoint_decoder *cxled,
const resource_size_t skip_base,
const resource_size_t skip_len)
{
resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len,
dev_name(&cxled->cxld.dev));
if (skipped == skip_len)
return 0;
dev_dbg(cxlds->dev,
"%s: failed to reserve skipped space (%pa %pa %pa)\n",
dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped);
release_skip(cxlds, skip_base, skipped);
return -EBUSY;
}
static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &port->dev;
struct resource *res;
int rc;
lockdep_assert_held_write(&cxl_rwsem.dpa);
if (!len) {
dev_warn(dev, "decoder%d.%d: empty reservation attempted\n",
port->id, cxled->cxld.id);
return -EINVAL;
}
if (cxled->dpa_res) {
dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
port->id, cxled->cxld.id, cxled->dpa_res);
return -EBUSY;
}
if (port->hdm_end + 1 != cxled->cxld.id) {
/*
* Assumes alloc and commit order is always in hardware instance
* order per expectations from 8.2.5.12.20 Committing Decoder
* Programming that enforce decoder[m] committed before
* decoder[m+1] commit start.
*/
dev_dbg(dev, "decoder%d.%d: expected decoder%d.%d\n", port->id,
cxled->cxld.id, port->id, port->hdm_end + 1);
return -EBUSY;
}
if (skipped) {
rc = request_skip(cxlds, cxled, base - skipped, skipped);
if (rc)
return rc;
}
res = __request_region(&cxlds->dpa_res, base, len,
dev_name(&cxled->cxld.dev), 0);
if (!res) {
dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
port->id, cxled->cxld.id);
if (skipped)
release_skip(cxlds, base - skipped, skipped);
return -EBUSY;
}
cxled->dpa_res = res;
cxled->skip = skipped;
/*
* When allocating new capacity, ->part is already set, when
* discovering decoder settings at initial enumeration, ->part
* is not set.
*/
if (cxled->part < 0)
for (int i = 0; cxlds->nr_partitions; i++)
if (resource_contains(&cxlds->part[i].res, res)) {
cxled->part = i;
break;
}
if (cxled->part < 0)
dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n",
port->id, cxled->cxld.id, res);
port->hdm_end++;
get_device(&cxled->cxld.dev);
return 0;
}
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
{
int rc;
*res = (struct resource) {
.name = type,
.start = start,
.end = start + size - 1,
.flags = IORESOURCE_MEM,
};
if (resource_size(res) == 0) {
dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0;
}
rc = request_resource(parent, res);
if (rc) {
dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
res, rc);
return rc;
}
dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
return 0;
}
static const char *cxl_mode_name(enum cxl_partition_mode mode)
{
switch (mode) {
case CXL_PARTMODE_RAM:
return "ram";
case CXL_PARTMODE_PMEM:
return "pmem";
default:
return "";
};
}
/* if this fails the caller must destroy @cxlds, there is no recovery */
int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info)
{
struct device *dev = cxlds->dev;
guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxlds->nr_partitions)
return -EBUSY;
if (!info->size || !info->nr_partitions) {
cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
cxlds->nr_partitions = 0;
return 0;
}
cxlds->dpa_res = DEFINE_RES_MEM(0, info->size);
for (int i = 0; i < info->nr_partitions; i++) {
const struct cxl_dpa_part_info *part = &info->part[i];
int rc;
cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID;
cxlds->part[i].mode = part->mode;
/* Require ordered + contiguous partitions */
if (i) {
const struct cxl_dpa_part_info *prev = &info->part[i - 1];
if (prev->range.end + 1 != part->range.start)
return -EINVAL;
}
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res,
part->range.start, range_len(&part->range),
cxl_mode_name(part->mode));
if (rc)
return rc;
cxlds->nr_partitions++;
}
return 0;
}
EXPORT_SYMBOL_GPL(cxl_dpa_setup);
int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
{
struct cxl_port *port = cxled_to_port(cxled);
int rc;
scoped_guard(rwsem_write, &cxl_rwsem.dpa)
rc = __cxl_dpa_reserve(cxled, base, len, skipped);
if (rc)
return rc;
return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL");
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
guard(rwsem_read)(&cxl_rwsem.dpa);
if (cxled->dpa_res)
return resource_size(cxled->dpa_res);
return 0;
}
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
{
resource_size_t base = -1;
lockdep_assert_held(&cxl_rwsem.dpa);
if (cxled->dpa_res)
base = cxled->dpa_res->start;
return base;
}
bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr)
{
struct resource _addr = DEFINE_RES_MEM(addr, 1);
return resource_contains(res, &_addr);
}
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
struct device *dev = &cxled->cxld.dev;
guard(rwsem_write)(&cxl_rwsem.dpa);
if (!cxled->dpa_res)
return 0;
if (cxled->cxld.region) {
dev_dbg(dev, "decoder assigned to: %s\n",
dev_name(&cxled->cxld.region->dev));
return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
return -EBUSY;
}
if (cxled->cxld.id != port->hdm_end) {
dev_dbg(dev, "expected decoder%d.%d\n", port->id,
port->hdm_end);
return -EBUSY;
}
devm_cxl_dpa_release(cxled);
return 0;
}
int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
enum cxl_partition_mode mode)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
int part;
guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
return -EBUSY;
for (part = 0; part < cxlds->nr_partitions; part++)
if (cxlds->part[part].mode == mode)
break;
if (part >= cxlds->nr_partitions) {
dev_dbg(dev, "unsupported mode: %d\n", mode);
return -EINVAL;
}
if (!resource_size(&cxlds->part[part].res)) {
dev_dbg(dev, "no available capacity for mode: %d\n", mode);
return -ENXIO;
}
cxled->part = part;
return 0;
}
static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
struct resource *res, *prev = NULL;
resource_size_t start, avail, skip, skip_start;
struct resource *p, *last;
int part;
guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxled->cxld.region) {
dev_dbg(dev, "decoder attached to %s\n",
dev_name(&cxled->cxld.region->dev));
return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
return -EBUSY;
}
part = cxled->part;
if (part < 0) {
dev_dbg(dev, "partition not set\n");
return -EBUSY;
}
res = &cxlds->part[part].res;
for (p = res->child, last = NULL; p; p = p->sibling)
last = p;
if (last)
start = last->end + 1;
else
start = res->start;
/*
* To allocate at partition N, a skip needs to be calculated for all
* unallocated space at lower partitions indices.
*
* If a partition has any allocations, the search can end because a
* previous cxl_dpa_alloc() invocation is assumed to have accounted for
* all previous partitions.
*/
skip_start = CXL_RESOURCE_NONE;
for (int i = part; i; i--) {
prev = &cxlds->part[i - 1].res;
for (p = prev->child, last = NULL; p; p = p->sibling)
last = p;
if (last) {
skip_start = last->end + 1;
break;
}
skip_start = prev->start;
}
avail = res->end - start + 1;
if (skip_start == CXL_RESOURCE_NONE)
skip = 0;
else
skip = res->start - skip_start;
if (size > avail) {
dev_dbg(dev, "%llu exceeds available %s capacity: %llu\n", size,
res->name, (u64)avail);
return -ENOSPC;
}
return __cxl_dpa_reserve(cxled, start, size, skip);
}
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
struct cxl_port *port = cxled_to_port(cxled);
int rc;
rc = __cxl_dpa_alloc(cxled, size);
if (rc)
return rc;
return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
{
u16 eig;
u8 eiw;
/*
* Input validation ensures these warns never fire, but otherwise
* suppress unititalized variable usage warnings.
*/
if (WARN_ONCE(ways_to_eiw(cxld->interleave_ways, &eiw),
"invalid interleave_ways: %d\n", cxld->interleave_ways))
return;
if (WARN_ONCE(granularity_to_eig(cxld->interleave_granularity, &eig),
"invalid interleave_granularity: %d\n",
cxld->interleave_granularity))
return;
u32p_replace_bits(ctrl, eig, CXL_HDM_DECODER0_CTRL_IG_MASK);
u32p_replace_bits(ctrl, eiw, CXL_HDM_DECODER0_CTRL_IW_MASK);
*ctrl |= CXL_HDM_DECODER0_CTRL_COMMIT;
}
static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl)
{
u32p_replace_bits(ctrl,
!!(cxld->target_type == CXL_DECODER_HOSTONLYMEM),
CXL_HDM_DECODER0_CTRL_HOSTONLY);
}
static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
{
struct cxl_dport **t = &cxlsd->target[0];
int ways = cxlsd->cxld.interleave_ways;
*tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id);
if (ways > 1)
*tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id);
if (ways > 2)
*tgt |= FIELD_PREP(GENMASK(23, 16), t[2]->port_id);
if (ways > 3)
*tgt |= FIELD_PREP(GENMASK(31, 24), t[3]->port_id);
if (ways > 4)
*tgt |= FIELD_PREP(GENMASK_ULL(39, 32), t[4]->port_id);
if (ways > 5)
*tgt |= FIELD_PREP(GENMASK_ULL(47, 40), t[5]->port_id);
if (ways > 6)
*tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id);
if (ways > 7)
*tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id);
}
/*
* Per CXL 2.0 8.2.5.12.20 Committing Decoder Programming, hardware must set
* committed or error within 10ms, but just be generous with 20ms to account for
* clock skew and other marginal behavior
*/
#define COMMIT_TIMEOUT_MS 20
static int cxld_await_commit(void __iomem *hdm, int id)
{
u32 ctrl;
int i;
for (i = 0; i < COMMIT_TIMEOUT_MS; i++) {
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMIT_ERROR, ctrl)) {
ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
return -EIO;
}
if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
return 0;
fsleep(1000);
}
return -ETIMEDOUT;
}
static void setup_hw_decoder(struct cxl_decoder *cxld, void __iomem *hdm)
{
int id = cxld->id;
u64 base, size;
u32 ctrl;
/* common decoder settings */
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
cxld_set_interleave(cxld, &ctrl);
cxld_set_type(cxld, &ctrl);
base = cxld->hpa_range.start;
size = range_len(&cxld->hpa_range);
writel(upper_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
writel(lower_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
writel(upper_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
writel(lower_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
if (is_switch_decoder(&cxld->dev)) {
struct cxl_switch_decoder *cxlsd =
to_cxl_switch_decoder(&cxld->dev);
void __iomem *tl_hi = hdm + CXL_HDM_DECODER0_TL_HIGH(id);
void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id);
u64 targets;
cxlsd_set_targets(cxlsd, &targets);
writel(upper_32_bits(targets), tl_hi);
writel(lower_32_bits(targets), tl_lo);
} else {
struct cxl_endpoint_decoder *cxled =
to_cxl_endpoint_decoder(&cxld->dev);
void __iomem *sk_hi = hdm + CXL_HDM_DECODER0_SKIP_HIGH(id);
void __iomem *sk_lo = hdm + CXL_HDM_DECODER0_SKIP_LOW(id);
writel(upper_32_bits(cxled->skip), sk_hi);
writel(lower_32_bits(cxled->skip), sk_lo);
}
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
}
static int cxl_decoder_commit(struct cxl_decoder *cxld)
{
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
int id = cxld->id, rc;
if (cxld->flags & CXL_DECODER_F_ENABLE)
return 0;
if (cxl_num_decoders_committed(port) != id) {
dev_dbg(&port->dev,
"%s: out of order commit, expected decoder%d.%d\n",
dev_name(&cxld->dev), port->id,
cxl_num_decoders_committed(port));
return -EBUSY;
}
/*
* For endpoint decoders hosted on CXL memory devices that
* support the sanitize operation, make sure sanitize is not in-flight.
*/
if (is_endpoint_decoder(&cxld->dev)) {
struct cxl_endpoint_decoder *cxled =
to_cxl_endpoint_decoder(&cxld->dev);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_memdev_state *mds =
to_cxl_memdev_state(cxlmd->cxlds);
if (mds && mds->security.sanitize_active) {
dev_dbg(&cxlmd->dev,
"attempted to commit %s during sanitize\n",
dev_name(&cxld->dev));
return -EBUSY;
}
}
scoped_guard(rwsem_read, &cxl_rwsem.dpa)
setup_hw_decoder(cxld, hdm);
port->commit_end++;
rc = cxld_await_commit(hdm, cxld->id);
if (rc) {
dev_dbg(&port->dev, "%s: error %d committing decoder\n",
dev_name(&cxld->dev), rc);
cxld->reset(cxld);
return rc;
}
cxld->flags |= CXL_DECODER_F_ENABLE;
return 0;
}
static int commit_reap(struct device *dev, void *data)
{
struct cxl_port *port = to_cxl_port(dev->parent);
struct cxl_decoder *cxld;
if (!is_switch_decoder(dev) && !is_endpoint_decoder(dev))
return 0;
cxld = to_cxl_decoder(dev);
if (port->commit_end == cxld->id &&
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
port->commit_end--;
dev_dbg(&port->dev, "reap: %s commit_end: %d\n",
dev_name(&cxld->dev), port->commit_end);
}
return 0;
}
void cxl_port_commit_reap(struct cxl_decoder *cxld)
{
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
lockdep_assert_held_write(&cxl_rwsem.region);
/*
* Once the highest committed decoder is disabled, free any other
* decoders that were pinned allocated by out-of-order release.
*/
port->commit_end--;
dev_dbg(&port->dev, "reap: %s commit_end: %d\n", dev_name(&cxld->dev),
port->commit_end);
device_for_each_child_reverse_from(&port->dev, &cxld->dev, NULL,
commit_reap);
}
EXPORT_SYMBOL_NS_GPL(cxl_port_commit_reap, "CXL");
static void cxl_decoder_reset(struct cxl_decoder *cxld)
{
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
int id = cxld->id;
u32 ctrl;
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
return;
if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
return;
if (port->commit_end == id)
cxl_port_commit_reap(cxld);
else
dev_dbg(&port->dev,
"%s: out of order reset, expected decoder%d.%d\n",
dev_name(&cxld->dev), port->id, port->commit_end);
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
cxld->flags &= ~CXL_DECODER_F_ENABLE;
/* Userspace is now responsible for reconfiguring this decoder */
if (is_endpoint_decoder(&cxld->dev)) {
struct cxl_endpoint_decoder *cxled;
cxled = to_cxl_endpoint_decoder(&cxld->dev);
cxled->state = CXL_DECODER_STATE_MANUAL;
}
}
static int cxl_setup_hdm_decoder_from_dvsec(
struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base,
int which, struct cxl_endpoint_dvsec_info *info)
{
struct cxl_endpoint_decoder *cxled;
u64 len;
int rc;
if (!is_cxl_endpoint(port))
return -EOPNOTSUPP;
cxled = to_cxl_endpoint_decoder(&cxld->dev);
len = range_len(&info->dvsec_range[which]);
if (!len)
return -ENOENT;
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->commit = NULL;
cxld->reset = NULL;
cxld->hpa_range = info->dvsec_range[which];
/*
* Set the emulated decoder as locked pending additional support to
* change the range registers at run time.
*/
cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;
port->commit_end = cxld->id;
rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0);
if (rc) {
dev_err(&port->dev,
"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc);
return rc;
}
*dpa_base += len;
cxled->state = CXL_DECODER_STATE_AUTO;
return 0;
}
static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
void __iomem *hdm, int which,
u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
{
struct cxl_endpoint_decoder *cxled = NULL;
u64 size, base, skip, dpa_size, lo, hi;
bool committed;
u32 remainder;
int i, rc;
u32 ctrl;
union {
u64 value;
unsigned char target_id[8];
} target_list;
if (should_emulate_decoders(info))
return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base,
which, info);
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
base = (hi << 32) + lo;
lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
size = (hi << 32) + lo;
committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
cxld->commit = cxl_decoder_commit;
cxld->reset = cxl_decoder_reset;
if (!committed)
size = 0;
if (base == U64_MAX || size == U64_MAX) {
dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n",
port->id, cxld->id);
return -ENXIO;
}
if (info)
cxled = to_cxl_endpoint_decoder(&cxld->dev);
cxld->hpa_range = (struct range) {
.start = base,
.end = base + size - 1,
};
/* decoders are enabled if committed */
if (committed) {
cxld->flags |= CXL_DECODER_F_ENABLE;
if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
cxld->flags |= CXL_DECODER_F_LOCK;
if (FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl))
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
else
cxld->target_type = CXL_DECODER_DEVMEM;
guard(rwsem_write)(&cxl_rwsem.region);
if (cxld->id != cxl_num_decoders_committed(port)) {
dev_warn(&port->dev,
"decoder%d.%d: Committed out of order\n",
port->id, cxld->id);
return -ENXIO;
}
if (size == 0) {
dev_warn(&port->dev,
"decoder%d.%d: Committed with zero size\n",
port->id, cxld->id);
return -ENXIO;
}
port->commit_end = cxld->id;
} else {
if (cxled) {
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
/*
* Default by devtype until a device arrives that needs
* more precision.
*/
if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
else
cxld->target_type = CXL_DECODER_DEVMEM;
} else {
/* To be overridden by region type at commit time */
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
}
if (!FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl) &&
cxld->target_type == CXL_DECODER_HOSTONLYMEM) {
ctrl |= CXL_HDM_DECODER0_CTRL_HOSTONLY;
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
}
}
rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
&cxld->interleave_ways);
if (rc) {
dev_warn(&port->dev,
"decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
port->id, cxld->id, ctrl);
return rc;
}
rc = eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
&cxld->interleave_granularity);
if (rc) {
dev_warn(&port->dev,
"decoder%d.%d: Invalid interleave granularity (ctrl: %#x)\n",
port->id, cxld->id, ctrl);
return rc;
}
dev_dbg(&port->dev, "decoder%d.%d: range: %#llx-%#llx iw: %d ig: %d\n",
port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end,
cxld->interleave_ways, cxld->interleave_granularity);
if (!cxled) {
lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
target_list.value = (hi << 32) + lo;
for (i = 0; i < cxld->interleave_ways; i++)
cxld->target_map[i] = target_list.target_id[i];
return 0;
}
if (!committed)
return 0;
dpa_size = div_u64_rem(size, cxld->interleave_ways, &remainder);
if (remainder) {
dev_err(&port->dev,
"decoder%d.%d: invalid committed configuration size: %#llx ways: %d\n",
port->id, cxld->id, size, cxld->interleave_ways);
return -ENXIO;
}
lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
skip = (hi << 32) + lo;
rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
if (rc) {
dev_err(&port->dev,
"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
port->id, cxld->id, *dpa_base,
*dpa_base + dpa_size + skip - 1, rc);
return rc;
}
*dpa_base += dpa_size + skip;
cxled->state = CXL_DECODER_STATE_AUTO;
return 0;
}
static void cxl_settle_decoders(struct cxl_hdm *cxlhdm)
{
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
int committed, i;
u32 ctrl;
if (!hdm)
return;
/*
* Since the register resource was recently claimed via request_region()
* be careful about trusting the "not-committed" status until the commit
* timeout has elapsed. The commit timeout is 10ms (CXL 2.0
* 8.2.5.12.20), but double it to be tolerant of any clock skew between
* host and target.
*/
for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) {
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
committed++;
}
/* ensure that future checks of committed can be trusted */
if (committed != cxlhdm->decoder_count)
msleep(20);
}
/**
* devm_cxl_enumerate_decoders - add decoder objects per HDM register set
* @cxlhdm: Structure to populate with HDM capabilities
* @info: cached DVSEC range register info
*/
static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
struct cxl_endpoint_dvsec_info *info)
{
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
struct cxl_port *port = cxlhdm->port;
int i;
u64 dpa_base = 0;
cxl_settle_decoders(cxlhdm);
for (i = 0; i < cxlhdm->decoder_count; i++) {
int rc, target_count = cxlhdm->target_count;
struct cxl_decoder *cxld;
if (is_cxl_endpoint(port)) {
struct cxl_endpoint_decoder *cxled;
cxled = cxl_endpoint_decoder_alloc(port);
if (IS_ERR(cxled)) {
dev_warn(&port->dev,
"Failed to allocate decoder%d.%d\n",
port->id, i);
return PTR_ERR(cxled);
}
cxld = &cxled->cxld;
} else {
struct cxl_switch_decoder *cxlsd;
cxlsd = cxl_switch_decoder_alloc(port, target_count);
if (IS_ERR(cxlsd)) {
dev_warn(&port->dev,
"Failed to allocate decoder%d.%d\n",
port->id, i);
return PTR_ERR(cxlsd);
}
cxld = &cxlsd->cxld;
}
rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info);
if (rc) {
dev_warn(&port->dev,
"Failed to initialize decoder%d.%d\n",
port->id, i);
put_device(&cxld->dev);
return rc;
}
rc = add_hdm_decoder(port, cxld);
if (rc) {
dev_warn(&port->dev,
"Failed to add decoder%d.%d\n", port->id, i);
return rc;
}
}
return 0;
}
/**
* __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders
* @port: CXL port context
*
* Return 0 or -errno on error
*/
int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
struct cxl_hdm *cxlhdm;
if (is_cxl_root(port) || is_cxl_endpoint(port))
return -EOPNOTSUPP;
cxlhdm = devm_cxl_setup_hdm(port, NULL);
if (!IS_ERR(cxlhdm))
return devm_cxl_enumerate_decoders(cxlhdm, NULL);
if (PTR_ERR(cxlhdm) != -ENODEV) {
dev_err(&port->dev, "Failed to map HDM decoder capability\n");
return PTR_ERR(cxlhdm);
}
if (cxl_port_get_possible_dports(port) == 1) {
dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
return devm_cxl_add_passthrough_decoder(port);
}
dev_err(&port->dev, "HDM decoder capability not found\n");
return -ENXIO;
}
EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL");
/**
* devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders
* @port: CXL port context
*
* Return 0 or -errno on error
*/
int devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_endpoint_dvsec_info info = { .port = port };
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_hdm *cxlhdm;
int rc;
if (!is_cxl_endpoint(port))
return -EOPNOTSUPP;
rc = cxl_dvsec_rr_decode(cxlds, &info);
if (rc < 0)
return rc;
cxlhdm = devm_cxl_setup_hdm(port, &info);
if (IS_ERR(cxlhdm)) {
if (PTR_ERR(cxlhdm) == -ENODEV)
dev_err(&port->dev, "HDM decoder registers not found\n");
return PTR_ERR(cxlhdm);
}
rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info);
if (rc)
return rc;
return devm_cxl_enumerate_decoders(cxlhdm, &info);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL");