From d3911f1639e67fc7b12aae0efa5a540976d7443b Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Wed, 5 Jun 2024 18:53:27 +0500 Subject: [PATCH 0001/1523] power: supply: rt5033: Bring back i2c_set_clientdata Commit 3a93da231c12 ("power: supply: rt5033: Use devm_power_supply_register() helper") reworked the driver to use devm. While at it, the i2c_set_clientdata was dropped along with the remove callback. Unfortunately other parts of the driver also rely on i2c clientdata so this causes kernel oops. Bring the call back to fix the driver. Fixes: 3a93da231c12 ("power: supply: rt5033: Use devm_power_supply_register() helper") Tested-by: Raymond Hackley Signed-off-by: Nikita Travkin Link: https://lore.kernel.org/r/20240605-rt5033-null-clientdata-v1-1-558d710eeb4d@trvn.ru Signed-off-by: Sebastian Reichel --- drivers/power/supply/rt5033_battery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 32eafe2c00af..7a27b262fb84 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -159,6 +159,7 @@ static int rt5033_battery_probe(struct i2c_client *client) return -EINVAL; } + i2c_set_clientdata(client, battery); psy_cfg.of_node = client->dev.of_node; psy_cfg.drv_data = battery; From 9fc97277eb2d17492de636b68cf7d2f5c4f15c1b Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Tue, 25 Jun 2024 17:26:52 -0300 Subject: [PATCH 0002/1523] drm/i915: Skip programming FIA link enable bits for MTL+ Starting with Xe_LPD+, although FIA is still used to readout Type-C pin assignment, part of Type-C support is moved to PICA and programming PORT_TX_DFLEXDPMLE1(*) registers is not applicable anymore like it was for previous display IPs (e.g. see BSpec 49190). v2: - Mention Bspec 49190 as a reference of instructions for previous IPs. (Shekhar Chauhan) - s/Xe_LPDP/Xe_LPD+/ in the commit message. (Matt Roper) - Update commit message to be more accurate to the changes in the IP. (Imre Deak) Bspec: 65750, 65448 Reviewed-by: Shekhar Chauhan Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240625202652.315936-1-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 9887967b2ca5..6f2ee7dbc43b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -393,6 +393,9 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; u32 val; + if (DISPLAY_VER(i915) >= 14) + return; + drm_WARN_ON(&i915->drm, lane_reversal && tc->mode != TC_PORT_LEGACY); From f666604321f1da2b9bd237ef8a1afdd47460e74b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 30 May 2024 00:47:22 +0100 Subject: [PATCH 0003/1523] USB: serial: spcp8x5: remove unused struct 'spcp8x5_usb_ctrl_arg' 'spcp8x5_usb_ctrl_arg' has been unused since the original commit 619a6f1d1423 ("USB: add usb-serial spcp8x5 driver"). Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Johan Hovold --- drivers/usb/serial/spcp8x5.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 09a972a838ee..6b294bf8bc43 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -49,16 +49,6 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); -struct spcp8x5_usb_ctrl_arg { - u8 type; - u8 cmd; - u8 cmd_type; - u16 value; - u16 index; - u16 length; -}; - - /* spcp8x5 spec register define */ #define MCR_CONTROL_LINE_RTS 0x02 #define MCR_CONTROL_LINE_DTR 0x01 From 9f4dc05107a6db3743e6b9ea4014cbdc3795682d Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 11 Jun 2024 10:52:54 -0700 Subject: [PATCH 0004/1523] USB: serial: add missing MODULE_DESCRIPTION() macros Since commit 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing"), ARCH=x86 make allmodconfig && make W=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/ch341.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/usb_debug.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/mxuport.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/navman.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/qcaux.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/usb-serial-simple.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/usb/serial/symbolserial.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson [ johan: amend commit message with commit introducing W=1 warning; tweak some descriptions ] Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 + drivers/usb/serial/mxuport.c | 1 + drivers/usb/serial/navman.c | 1 + drivers/usb/serial/qcaux.c | 1 + drivers/usb/serial/symbolserial.c | 1 + drivers/usb/serial/usb-serial-simple.c | 1 + drivers/usb/serial/usb_debug.c | 1 + 7 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 612bea504d7a..0870c6533f80 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -863,4 +863,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Winchiphead CH341 USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index 1f7bb3e4fcf2..942cb0153423 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -1315,4 +1315,5 @@ module_usb_serial_driver(serial_drivers, mxuport_idtable); MODULE_AUTHOR("Andrew Lunn "); MODULE_AUTHOR(""); +MODULE_DESCRIPTION("Moxa UPORT USB Serial driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index 20277c52dded..82791fd67c46 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -112,4 +112,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Navman USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c index 929ffba663f2..015bb7c5d19d 100644 --- a/drivers/usb/serial/qcaux.c +++ b/drivers/usb/serial/qcaux.c @@ -84,4 +84,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Qualcomm USB Auxiliary Serial Port driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index d7f73ad6e778..9aabb087f733 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -190,4 +190,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Symbol USB barcode to serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 24b8772a345e..82f4f0b992aa 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -163,4 +163,5 @@ static const struct usb_device_id id_table[] = { MODULE_DEVICE_TABLE(usb, id_table); module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("USB Serial 'Simple' driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index 6934970f180d..8188776b57d1 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -104,4 +104,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table_combined); +MODULE_DESCRIPTION("USB Debug cable driver"); MODULE_LICENSE("GPL v2"); From 55a15b3a713a3f24360cf9d8dcfd2a3e337321d6 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 19 Jun 2024 21:42:44 +0200 Subject: [PATCH 0005/1523] USB: serial: garmin_gps: annotate struct garmin_packet with __counted_by Use the __counted_by compiler attribute for the data[] flexible array member to improve the results of array bound sanitizers. Reviewed-by: Nathan Chancellor Signed-off-by: Javier Carrasco Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Johan Hovold --- drivers/usb/serial/garmin_gps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 670e942fdaaa..57df6ad183ff 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -104,7 +104,7 @@ struct garmin_packet { int seq; /* the real size of the data array, always > 0 */ int size; - __u8 data[]; + __u8 data[] __counted_by(size); }; /* structure used to keep the current state of the driver */ From df8c0b8a03e871431587a13a6765cb4c601e1573 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 19 Jun 2024 21:42:45 +0200 Subject: [PATCH 0006/1523] USB: serial: garmin_gps: use struct_size() to allocate pkt Use the struct_size macro to calculate the size of the pkt, which includes a trailing flexible array. Suggested-by: Nathan Chancellor Signed-off-by: Javier Carrasco Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Johan Hovold --- drivers/usb/serial/garmin_gps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 57df6ad183ff..6d6ec7eed87c 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -267,8 +267,7 @@ static int pkt_add(struct garmin_data *garmin_data_p, /* process only packets containing data ... */ if (data_length) { - pkt = kmalloc(sizeof(struct garmin_packet)+data_length, - GFP_ATOMIC); + pkt = kmalloc(struct_size(pkt, data, data_length), GFP_ATOMIC); if (!pkt) return 0; From 7b5bdae7740eb6a3d09f9cd4e4b07362a15b86b3 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 28 Jun 2024 17:56:43 -0700 Subject: [PATCH 0007/1523] i915/perf: Remove code to update PWR_CLK_STATE for gen12 PWR_CLK_STATE only needs to be modified up until gen11. For gen12 this code is not applicable. Remove code to update context image with PWR_CLK_STATE for gen12. Fixes: 00a7f0d7155c ("drm/i915/tgl: Add perf support on TGL") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240629005643.3050678-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 33 -------------------------------- 1 file changed, 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b1cd4c7a525..025a79fe5920 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2748,26 +2748,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, return 0; } -static int -gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config, - struct i915_active *active) -{ - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE), - CTX_R_PWR_CLK_STATE, - }, - }; - - if (stream->engine->class != RENDER_CLASS) - return 0; - - return oa_configure_all_contexts(stream, - regs, ARRAY_SIZE(regs), - active); -} - static int lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, @@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; u32 sqcnt1; @@ -2918,15 +2897,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); - /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = gen12_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; - /* * For Gen12, performance counters are context * saved/restored. Only enable it for the context that @@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); } - /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL, NULL); - /* disable the context save/restore or OAR counters */ if (stream->ctx) gen12_configure_oar_context(stream, NULL); From b2013783c4458a1fe8b25c0b249d2e878bcf6999 Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Thu, 4 Jul 2024 13:56:38 +0530 Subject: [PATCH 0008/1523] drm/i915/display: Cache adpative sync caps to use it later Add new member to struct intel_dp to cache support of Adaptive Sync SDP capabilities and use it whenever required to avoid HW access to read capability during each atomic commit. -v2: - Squash both the patches Signed-off-by: Mitul Golani Reviewed-by: Arun R Murthy Link: https://patchwork.freedesktop.org/patch/msgid/20240704082638.2302092-2-mitulkumar.ajitkumar.golani@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_alpm.c | 2 +- .../drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 22 ++++++++++--------- drivers/gpu/drm/i915/display/intel_dp.h | 1 - drivers/gpu/drm/i915/display/intel_vrr.c | 3 +-- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 866b3b409c4d..f4f05a859379 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -280,7 +280,7 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, if (DISPLAY_VER(i915) < 20) return; - if (!intel_dp_as_sdp_supported(intel_dp)) + if (!intel_dp->as_sdp_supported) return; if (crtc_state->has_psr) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 8713835e2307..a9d2acdc51a4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1806,6 +1806,7 @@ struct intel_dp { /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; + bool as_sdp_supported; struct drm_dp_tunnel *tunnel; bool tunnel_suspended:1; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3903f6ead6e6..edfb30857479 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -129,14 +129,6 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp) return dig_port->base.type == INTEL_OUTPUT_EDP; } -bool intel_dp_as_sdp_supported(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - - return HAS_AS_SDP(i915) && - drm_dp_as_sdp_supported(&intel_dp->aux, intel_dp->dpcd); -} - static void intel_dp_unset_edid(struct intel_dp *intel_dp); /* Is link rate UHBR and thus 128b/132b? */ @@ -2625,8 +2617,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!crtc_state->vrr.enable || - !intel_dp_as_sdp_supported(intel_dp)) + if (!crtc_state->vrr.enable || intel_dp->as_sdp_supported) return; crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); @@ -5900,6 +5891,15 @@ intel_dp_detect_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *conn connector); } +static void +intel_dp_detect_sdp_caps(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + intel_dp->as_sdp_supported = HAS_AS_SDP(i915) && + drm_dp_as_sdp_supported(&intel_dp->aux, intel_dp->dpcd); +} + static int intel_dp_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, @@ -5958,6 +5958,8 @@ intel_dp_detect(struct drm_connector *connector, intel_dp_detect_dsc_caps(intel_dp, intel_connector); + intel_dp_detect_sdp_caps(intel_dp); + intel_dp_mst_configure(intel_dp); if (intel_dp->reset_link_params) { diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index a0f990a95ecc..9be539edf817 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -85,7 +85,6 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state); bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp); bool intel_dp_is_edp(struct intel_dp *intel_dp); -bool intel_dp_as_sdp_supported(struct intel_dp *intel_dp); bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state); bool intel_dp_has_dsc(const struct intel_connector *connector); int intel_dp_link_symbol_size(int rate); diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5a0da64c7db3..7e1d9c718214 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -233,8 +233,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } - if (intel_dp_as_sdp_supported(intel_dp) && - crtc_state->vrr.enable) { + if (intel_dp->as_sdp_supported && crtc_state->vrr.enable) { crtc_state->vrr.vsync_start = (crtc_state->hw.adjusted_mode.crtc_vtotal - crtc_state->hw.adjusted_mode.vsync_start); From 7fbad577c82c5dd6db7217855c26f51554e53d85 Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Mon, 8 Jul 2024 14:02:46 +0530 Subject: [PATCH 0009/1523] drm/i915/display: WA for Re-initialize dispcnlunitt1 xosc clock The dispcnlunit1_cp_xosc_clk should be de-asserted in display off and only asserted in display on. As part of this workaround, Display driver shall execute set-reset sequence at the end of the initialize sequence to ensure clk does not remain active in display OFF. --v2: - Rebase. --v3: - Correct HSD number in commit message. --v4: - Reformat commit message. - Use intel_de_rmw instead of intel_de_write --v5: - Build Fixes. WA: 15013987218 Signed-off-by: Mitul Golani Reviewed-by: Nemesa Garg Reviewed-by: Suraj Kandpal Signed-off-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20240708083247.2611258-1-mitulkumar.ajitkumar.golani@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e288a1b21d7e..0af1e34ef2a7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1704,6 +1704,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, /* Wa_14011503030:xelpd */ if (DISPLAY_VER(dev_priv) == 13) intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); + + /* Wa_15013987218 */ + if (DISPLAY_VER(dev_priv) == 20) { + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + 0, PCH_GMBUSUNIT_CLOCK_GATE_DISABLE); + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + PCH_GMBUSUNIT_CLOCK_GATE_DISABLE, 0); + } } static void icl_display_core_uninit(struct drm_i915_private *dev_priv) From a4d5ce61765c08ab364aa4b327f6739b646e6cfa Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:24 +0300 Subject: [PATCH 0010/1523] drm/i915/dp: Reset intel_dp->link_trained before retraining the link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Regularly retraining a link during an atomic commit happens with the given pipe/link already disabled and hence intel_dp->link_trained being false. Ensure this also for retraining a DP SST link via direct calls to the link training functions (vs. an actual commit as for DP MST). So far nothing depended on this, however the next patch will depend on link_trained==false for changing the LTTPR mode to non-transparent. Cc: # v5.15+ Cc: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index edfb30857479..d4b1b18453dc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5305,6 +5305,8 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + intel_dp->link_trained = false; + intel_dp_check_frl_training(intel_dp); intel_dp_pcon_dsc_configure(intel_dp, crtc_state); intel_dp_start_link_train(NULL, intel_dp, crtc_state); From 211ad49cf8ccfdc798a719b4d1e000d0a8a9e588 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:25 +0300 Subject: [PATCH 0011/1523] drm/i915/dp: Don't switch the LTTPR mode on an active link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switching to transparent mode leads to a loss of link synchronization, so prevent doing this on an active link. This happened at least on an Intel N100 system / DELL UD22 dock, the LTTPR residing either on the host or the dock. To fix the issue, keep the current mode on an active link, adjusting the LTTPR count accordingly (resetting it to 0 in transparent mode). v2: Adjust code comment during link training about reiniting the LTTPRs. (Ville) Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training") Reported-and-tested-by: Gareth Yu Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902 Cc: # v5.15+ Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-3-imre.deak@intel.com --- .../drm/i915/display/intel_dp_link_training.c | 55 ++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 1bc4ef84ff3b..d044c8e36bb3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; } -static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) +{ + return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == + DP_PHY_REPEATER_MODE_TRANSPARENT; +} + +/* + * Read the LTTPR common capabilities and switch the LTTPR PHYs to + * non-transparent mode if this is supported. Preserve the + * transparent/non-transparent mode on an active link. + * + * Return the number of detected LTTPRs in non-transparent mode or 0 if the + * LTTPRs are in transparent mode or the detection failed. + */ +static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { int lttpr_count; - int i; if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) return 0; @@ -134,6 +148,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI if (lttpr_count == 0) return 0; + /* + * Don't change the mode on an active link, to prevent a loss of link + * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR + * resetting its internal state when the mode is changed from + * non-transparent to transparent. + */ + if (intel_dp->link_trained) { + if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) + goto out_reset_lttpr_count; + + return lttpr_count; + } + /* * See DP Standard v2.0 3.6.6.1. about the explicit disabling of * non-transparent mode and the disable->enable non-transparent mode @@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI "Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n"); intel_dp_set_lttpr_transparent_mode(intel_dp, true); - intel_dp_reset_lttpr_count(intel_dp); - return 0; + goto out_reset_lttpr_count; } + return lttpr_count; + +out_reset_lttpr_count: + intel_dp_reset_lttpr_count(intel_dp); + + return 0; +} + +static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + int lttpr_count; + int i; + + lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); + for (i = 0; i < lttpr_count; i++) intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); @@ -1482,10 +1523,10 @@ void intel_dp_start_link_train(struct intel_atomic_state *state, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; bool passed; - /* - * TODO: Reiniting LTTPRs here won't be needed once proper connector - * HW state readout is added. + * Reinit the LTTPRs here to ensure that they are switched to + * non-transparent mode. During an earlier LTTPR detection this + * could've been prevented by an active link. */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); From 4613aa66e42ba1b0c896495a207b3b26e94e44d5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:26 +0300 Subject: [PATCH 0012/1523] drm/i915/dp: Reset cached LTTPR count if number of LTTPRs is unsupported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After detection the cached LTTPR count can be checked to determine if LTTPRs in non-transparent mode were detected. Reset the cached LTTPR count if the reported number of LTTPRs is invalid to ensure the above checks work as expected. Reviewed-by: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-4-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index d044c8e36bb3..56b9c5cb1254 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -174,7 +174,7 @@ static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_ * still taking into account any LTTPR common lane- rate/count limits. */ if (lttpr_count < 0) - return 0; + goto out_reset_lttpr_count; if (!intel_dp_set_lttpr_transparent_mode(intel_dp, false)) { lt_dbg(intel_dp, DP_PHY_DPRX, From 61fe488fc78a3024b8d4cafc90a34158f4562dae Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:27 +0300 Subject: [PATCH 0013/1523] drm/i915/dp: Keep cached LTTPR mode up-to-date MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing depends on the cached LTTPR mode, however for consistency keep it up-to-date with the value programmed to the DPCD register. Reviewed-by: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 56b9c5cb1254..af0b71bdf1fc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -114,7 +114,13 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) u8 val = enable ? DP_PHY_REPEATER_MODE_TRANSPARENT : DP_PHY_REPEATER_MODE_NON_TRANSPARENT; - return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; + if (drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) != 1) + return false; + + intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] = val; + + return true; } static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) From 3b0974d7b74a908d1caa27f05a97551943e1bffe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:19 +0300 Subject: [PATCH 0014/1523] drm/i915: Calculate vblank delay more accurately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calculate the vblank delay in the vblank evasion code correctly for interlaced modes. The current code assumes that we won't be using an interlaced mode. That assumption is actually valid since we've defeatured interlaced scanout in commit f71c9b7bc35f ("drm/i915/display: Prune Interlace modes for Display >=12") for DSB capable platforms. However the feature is still present in the hardware, and if we ever find the need to re-enable it seems better to calculate the vblank delay correctly. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-2-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_vblank.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 5b065e1cd4e4..f183e0d4b2ba 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -652,7 +652,8 @@ void intel_vblank_evade_init(const struct intel_crtc_state *old_crtc_state, */ if (intel_color_uses_dsb(new_crtc_state) || new_crtc_state->update_m_n || new_crtc_state->update_lrr) - evade->min -= adjusted_mode->crtc_vblank_start - adjusted_mode->crtc_vdisplay; + evade->min -= intel_mode_vblank_start(adjusted_mode) - + intel_mode_vdisplay(adjusted_mode); } /* must be called with vblank interrupt already enabled! */ From 09af6037128c101c280c95c56d970ce8475506c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:20 +0300 Subject: [PATCH 0015/1523] drm/i915: Make vrr_{enabling,disabling}() usable outside intel_display.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give vrr_enabling() and vrr_disabling() slightly fancier names, and pass in the whole atomic state so that they'll be easier to use. We'll need to call at least the disabling part from the DSB code soon enough (so that we can do vblank evasions/etc. correctly on the DSB). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-3-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_display.c | 26 +++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c2c388212e2e..01a5faa3fea5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1014,9 +1014,14 @@ static bool cmrr_params_changed(const struct intel_crtc_state *old_crtc_state, old_crtc_state->cmrr.cmrr_n != new_crtc_state->cmrr.cmrr_n; } -static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +static bool intel_crtc_vrr_enabling(struct intel_atomic_state *state, + struct intel_crtc *crtc) { + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + if (!new_crtc_state->hw.active) return false; @@ -1026,9 +1031,14 @@ static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, vrr_params_changed(old_crtc_state, new_crtc_state))); } -static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +static bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, + struct intel_crtc *crtc) { + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + if (!old_crtc_state->hw.active) return false; @@ -1181,7 +1191,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); enum pipe pipe = crtc->pipe; - if (vrr_disabling(old_crtc_state, new_crtc_state)) { + if (intel_crtc_vrr_disabling(state, crtc)) { intel_vrr_disable(old_crtc_state); intel_crtc_update_active_timings(old_crtc_state, false); } @@ -6830,8 +6840,6 @@ static void commit_pipe_post_planes(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -6844,7 +6852,7 @@ static void commit_pipe_post_planes(struct intel_atomic_state *state, !intel_crtc_needs_modeset(new_crtc_state)) skl_detach_scalers(new_crtc_state); - if (vrr_enabling(old_crtc_state, new_crtc_state)) + if (intel_crtc_vrr_enabling(state, crtc)) intel_vrr_enable(new_crtc_state); } @@ -6944,7 +6952,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, * * FIXME Should be synchronized with the start of vblank somehow... */ - if (vrr_enabling(old_crtc_state, new_crtc_state) || + if (intel_crtc_vrr_enabling(state, crtc) || new_crtc_state->update_m_n || new_crtc_state->update_lrr) intel_crtc_update_active_timings(new_crtc_state, new_crtc_state->vrr.enable); From bc34d310b578952d37b5200a7fa7475ab2a2bd5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:35 +0300 Subject: [PATCH 0016/1523] drm/i915/fbc: Extract intel_fbc_has_fences() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the "do we have fences?" check into a single helper in the FBC code. Avoids having to call to outside the display code in multiple places for this. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-2-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 67116c9f1464..360b3e6df8fa 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -228,6 +228,11 @@ static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_s return 0; } +static bool intel_fbc_has_fences(struct drm_i915_private *i915) +{ + return intel_gt_support_legacy_fencing(to_gt(i915)); +} + static u32 i8xx_fbc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; @@ -619,7 +624,7 @@ static void ivb_fbc_activate(struct intel_fbc *fbc) else if (DISPLAY_VER(i915) == 9) skl_fbc_program_cfb_stride(fbc); - if (intel_gt_support_legacy_fencing(to_gt(i915))) + if (intel_fbc_has_fences(i915)) snb_fbc_program_fence(fbc); /* wa_14019417088 Alternative WA*/ @@ -1153,7 +1158,7 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, fbc_state->fence_y_offset = intel_plane_fence_y_offset(plane_state); drm_WARN_ON(&i915->drm, plane_state->flags & PLANE_HAS_FENCE && - !intel_gt_support_legacy_fencing(to_gt(i915))); + !intel_fbc_has_fences(i915)); if (plane_state->flags & PLANE_HAS_FENCE) fbc_state->fence_id = i915_vma_fence_id(plane_state->ggtt_vma); From 84f78178b6fe37b5eb8b90b5bb1239abce0b64d8 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Wed, 10 Jul 2024 10:36:13 -0500 Subject: [PATCH 0017/1523] arm64: dts: ti: k3-j784s4-evm: Assign only lanes 0 and 1 to PCIe1 Currently PCIe1 is setup to use SERDES0 lanes 0 thru 3, and USB0 is setup to use SERDES0 lane 3 as well. This overlap in lanes causes the following reset related lane splat: [ 4.846266] WARNING: CPU: 4 PID: 308 at drivers/reset/core.c:792 __reset_control_get_internal+0x128/0x160 ... [ 4.846405] Call trace: [ 4.846407] __reset_control_get_internal+0x128/0x160 [ 4.846413] __of_reset_control_get+0x4e0/0x528 [ 4.846418] of_reset_control_array_get+0xa4/0x1f8 [ 4.846423] cdns_torrent_phy_probe+0xbc8/0x1068 [phy_cadence_torrent] [ 4.846445] platform_probe+0xb4/0xe8 ... [ 4.846577] cdns-torrent-phy 5060000.serdes: phy@0: failed to get reset Let's limit the PCIe1 SERDES0 lanes to 0 and 1 to avoid overlap here. This works since PCIe1 operates in x2 mode and doesn't need 4 SERDES0 lanes. Fixes: 27ce26fe52d4 ("arm64: dts: ti: k3-j784s4-evm: Enable PCIe0 and PCIe1 in RC Mode") Suggested-by: Siddharth Vadapalli Signed-off-by: Andrew Halaney Reviewed-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20240710-k3-j784s4-evm-serdes0-cleanup-v1-1-03850fe33922@redhat.com Signed-off-by: Vignesh Raghavendra --- arch/arm64/boot/dts/ti/k3-j784s4-evm.dts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts index 9338d987180d..e54ccf4f3795 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts @@ -1391,11 +1391,10 @@ serdes0_pcie1_link: phy@0 { reg = <0>; - cdns,num-lanes = <4>; + cdns,num-lanes = <2>; #phy-cells = <0>; cdns,phy-type = ; - resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>, - <&serdes_wiz0 3>, <&serdes_wiz0 4>; + resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; }; }; From cc5049007d722364bca4a4eeb619d5629733a004 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Wed, 10 Jul 2024 10:36:14 -0500 Subject: [PATCH 0018/1523] arm64: dts: ti: k3-j784s4-evm: Consolidate serdes0 references Subnodes were added to serdes0 in two different spots (due to independent development of their consumer usage). Let's go ahead and combine those into one reference for readability's sake. Signed-off-by: Andrew Halaney Reviewed-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20240710-k3-j784s4-evm-serdes0-cleanup-v1-2-03850fe33922@redhat.com Signed-off-by: Vignesh Raghavendra --- arch/arm64/boot/dts/ti/k3-j784s4-evm.dts | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts index e54ccf4f3795..ffa38f41679d 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts @@ -1262,6 +1262,14 @@ &serdes0 { status = "okay"; + serdes0_pcie1_link: phy@0 { + reg = <0>; + cdns,num-lanes = <2>; + #phy-cells = <0>; + cdns,phy-type = ; + resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; + }; + serdes0_usb_link: phy@3 { reg = <3>; cdns,num-lanes = <1>; @@ -1386,22 +1394,6 @@ phys = <&transceiver3>; }; -&serdes0 { - status = "okay"; - - serdes0_pcie1_link: phy@0 { - reg = <0>; - cdns,num-lanes = <2>; - #phy-cells = <0>; - cdns,phy-type = ; - resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; - }; -}; - -&serdes_wiz0 { - status = "okay"; -}; - &pcie1_rc { status = "okay"; num-lanes = <2>; From fd5a9b950ea8ba2c08ce07418b36b296a04d712b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:36 +0300 Subject: [PATCH 0019/1523] drm/i915/fbc: Convert to intel_display, mostly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch the FBC code over to intel_display from i915, as much as possible. This is the future direction so that the display code can be shared between i915 and xe more cleanly. Some of the platform checks and the stolen mem facing stiff still need i915 around though. v2: Drop some redundant to_i915() casts Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-3-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- .../drm/i915/display/intel_display_debugfs.c | 4 +- .../drm/i915/display/intel_display_driver.c | 4 +- drivers/gpu/drm/i915/display/intel_fbc.c | 420 ++++++++++-------- drivers/gpu/drm/i915/display/intel_fbc.h | 13 +- .../drm/i915/display/intel_fifo_underrun.c | 2 +- .../drm/i915/display/intel_modeset_setup.c | 2 +- 6 files changed, 238 insertions(+), 207 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 91757fed9c6d..5cf9b4af9adf 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1008,7 +1008,7 @@ i915_fifo_underrun_reset_write(struct file *filp, return ret; } - intel_fbc_reset_underrun(dev_priv); + intel_fbc_reset_underrun(&dev_priv->display); return cnt; } @@ -1063,7 +1063,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) intel_bios_debugfs_register(i915); intel_cdclk_debugfs_register(i915); intel_dmc_debugfs_register(i915); - intel_fbc_debugfs_register(i915); + intel_fbc_debugfs_register(&i915->display); intel_hpd_debugfs_register(i915); intel_opregion_debugfs_register(i915); intel_psr_debugfs_register(i915); diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 794b4af38055..13e206ec450f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -265,7 +265,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) intel_init_quirks(display); - intel_fbc_init(i915); + intel_fbc_init(display); return 0; @@ -607,7 +607,7 @@ void intel_display_driver_remove_noirq(struct drm_i915_private *i915) destroy_workqueue(i915->display.wq.flip); destroy_workqueue(i915->display.wq.modeset); - intel_fbc_cleanup(i915); + intel_fbc_cleanup(&i915->display); } /* part #3: call after gem init */ diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 360b3e6df8fa..9c51f48b54f6 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -60,13 +60,13 @@ #include "intel_fbc_regs.h" #include "intel_frontbuffer.h" -#define for_each_fbc_id(__dev_priv, __fbc_id) \ +#define for_each_fbc_id(__display, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ - for_each_if(DISPLAY_RUNTIME_INFO(__dev_priv)->fbc_mask & BIT(__fbc_id)) + for_each_if(DISPLAY_RUNTIME_INFO(__display)->fbc_mask & BIT(__fbc_id)) -#define for_each_intel_fbc(__dev_priv, __fbc, __fbc_id) \ - for_each_fbc_id((__dev_priv), (__fbc_id)) \ - for_each_if((__fbc) = (__dev_priv)->display.fbc[(__fbc_id)]) +#define for_each_intel_fbc(__display, __fbc, __fbc_id) \ + for_each_fbc_id((__display), (__fbc_id)) \ + for_each_if((__fbc) = (__display)->fbc[(__fbc_id)]) struct intel_fbc_funcs { void (*activate)(struct intel_fbc *fbc); @@ -89,7 +89,7 @@ struct intel_fbc_state { }; struct intel_fbc { - struct drm_i915_private *i915; + struct intel_display *display; const struct intel_fbc_funcs *funcs; /* @@ -150,7 +150,7 @@ static unsigned int _intel_fbc_cfb_stride(const struct intel_plane_state *plane_ /* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int limit = 4; /* 1:4 compression limit is the worst case */ unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; @@ -164,7 +164,7 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane * Wa_16011863758: icl+ * Avoid some hardware segment address miscalculation. */ - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) stride += 64; /* @@ -180,7 +180,7 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane /* properly aligned cfb stride in bytes, assuming 1:1 compression limit */ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int stride = _intel_fbc_cfb_stride(plane_state); /* @@ -188,7 +188,7 @@ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_s * be 512 byte aligned. Aligning each line to 512 bytes guarantees * that regardless of the compression limit we choose later. */ - if (DISPLAY_VER(i915) >= 9) + if (DISPLAY_VER(display) >= 9) return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(plane_state)); else return stride; @@ -196,12 +196,12 @@ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_s static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); int lines = drm_rect_height(&plane_state->uapi.src) >> 16; - if (DISPLAY_VER(i915) == 7) + if (DISPLAY_VER(display) == 7) lines = min(lines, 2048); - else if (DISPLAY_VER(i915) >= 8) + else if (DISPLAY_VER(display) >= 8) lines = min(lines, 2560); return lines * intel_fbc_cfb_stride(plane_state); @@ -209,7 +209,7 @@ static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_sta static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int stride_aligned = intel_fbc_cfb_stride(plane_state); unsigned int stride = _intel_fbc_cfb_stride(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -222,28 +222,31 @@ static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_s * we always need to use the override there. */ if (stride != stride_aligned || - (DISPLAY_VER(i915) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR)) + (DISPLAY_VER(display) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR)) return stride_aligned * 4 / 64; return 0; } -static bool intel_fbc_has_fences(struct drm_i915_private *i915) +static bool intel_fbc_has_fences(struct intel_display *display) { + struct drm_i915_private __maybe_unused *i915 = to_i915(display->drm); + return intel_gt_support_legacy_fencing(to_gt(i915)); } static u32 i8xx_fbc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); unsigned int cfb_stride; u32 fbc_ctl; cfb_stride = fbc_state->cfb_stride / fbc->limit; /* FBC_CTL wants 32B or 64B units */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) cfb_stride = (cfb_stride / 32) - 1; else cfb_stride = (cfb_stride / 64) - 1; @@ -277,21 +280,21 @@ static u32 i965_fbc_ctl2(struct intel_fbc *fbc) static void i8xx_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 fbc_ctl; /* Disable compression */ - fbc_ctl = intel_de_read(i915, FBC_CONTROL); + fbc_ctl = intel_de_read(display, FBC_CONTROL); if ((fbc_ctl & FBC_CTL_EN) == 0) return; fbc_ctl &= ~FBC_CTL_EN; - intel_de_write(i915, FBC_CONTROL, fbc_ctl); + intel_de_write(display, FBC_CONTROL, fbc_ctl); /* Wait for compressing bit to clear */ - if (intel_de_wait_for_clear(i915, FBC_STATUS, + if (intel_de_wait_for_clear(display, FBC_STATUS, FBC_STAT_COMPRESSING, 10)) { - drm_dbg_kms(&i915->drm, "FBC idle timed out\n"); + drm_dbg_kms(display->drm, "FBC idle timed out\n"); return; } } @@ -299,32 +302,32 @@ static void i8xx_fbc_deactivate(struct intel_fbc *fbc) static void i8xx_fbc_activate(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; int i; /* Clear old tags */ for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) - intel_de_write(i915, FBC_TAG(i), 0); + intel_de_write(display, FBC_TAG(i), 0); - if (DISPLAY_VER(i915) == 4) { - intel_de_write(i915, FBC_CONTROL2, + if (DISPLAY_VER(display) == 4) { + intel_de_write(display, FBC_CONTROL2, i965_fbc_ctl2(fbc)); - intel_de_write(i915, FBC_FENCE_OFF, + intel_de_write(display, FBC_FENCE_OFF, fbc_state->fence_y_offset); } - intel_de_write(i915, FBC_CONTROL, + intel_de_write(display, FBC_CONTROL, FBC_CTL_EN | i8xx_fbc_ctl(fbc)); } static bool i8xx_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, FBC_CONTROL) & FBC_CTL_EN; + return intel_de_read(fbc->display, FBC_CONTROL) & FBC_CTL_EN; } static bool i8xx_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, FBC_STATUS) & + return intel_de_read(fbc->display, FBC_STATUS) & (FBC_STAT_COMPRESSING | FBC_STAT_COMPRESSED); } @@ -332,7 +335,7 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; - struct drm_i915_private *dev_priv = fbc->i915; + struct drm_i915_private *dev_priv = to_i915(fbc->display->drm); intel_de_write_fw(dev_priv, DSPADDR(dev_priv, i9xx_plane), intel_de_read_fw(dev_priv, DSPADDR(dev_priv, i9xx_plane))); @@ -340,13 +343,14 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_fb), U32_MAX)); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_llb), U32_MAX)); @@ -369,7 +373,7 @@ static void i965_fbc_nuke(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; - struct drm_i915_private *dev_priv = fbc->i915; + struct drm_i915_private *dev_priv = to_i915(fbc->display->drm); intel_de_write_fw(dev_priv, DSPSURF(dev_priv, i9xx_plane), intel_de_read_fw(dev_priv, DSPSURF(dev_priv, i9xx_plane))); @@ -402,7 +406,8 @@ static u32 g4x_dpfc_ctl_limit(struct intel_fbc *fbc) static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc) | @@ -414,7 +419,7 @@ static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) if (fbc_state->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN_G4X; - if (DISPLAY_VER(i915) < 6) + if (DISPLAY_VER(display) < 6) dpfc_ctl |= DPFC_CTL_FENCENO(fbc_state->fence_id); } @@ -424,43 +429,43 @@ static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) static void g4x_fbc_activate(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, DPFC_FENCE_YOFF, + intel_de_write(display, DPFC_FENCE_YOFF, fbc_state->fence_y_offset); - intel_de_write(i915, DPFC_CONTROL, + intel_de_write(display, DPFC_CONTROL, DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); } static void g4x_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; /* Disable compression */ - dpfc_ctl = intel_de_read(i915, DPFC_CONTROL); + dpfc_ctl = intel_de_read(display, DPFC_CONTROL); if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; - intel_de_write(i915, DPFC_CONTROL, dpfc_ctl); + intel_de_write(display, DPFC_CONTROL, dpfc_ctl); } } static bool g4x_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, DPFC_CONTROL) & DPFC_CTL_EN; + return intel_de_read(fbc->display, DPFC_CONTROL) & DPFC_CTL_EN; } static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, DPFC_STATUS) & DPFC_COMP_SEG_MASK; + return intel_de_read(fbc->display, DPFC_STATUS) & DPFC_COMP_SEG_MASK; } static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, DPFC_CB_BASE, + intel_de_write(display, DPFC_CB_BASE, i915_gem_stolen_node_offset(&fbc->compressed_fb)); } @@ -476,43 +481,43 @@ static const struct intel_fbc_funcs g4x_fbc_funcs = { static void ilk_fbc_activate(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, ILK_DPFC_FENCE_YOFF(fbc->id), + intel_de_write(display, ILK_DPFC_FENCE_YOFF(fbc->id), fbc_state->fence_y_offset); - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); } static void ilk_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; /* Disable compression */ - dpfc_ctl = intel_de_read(i915, ILK_DPFC_CONTROL(fbc->id)); + dpfc_ctl = intel_de_read(display, ILK_DPFC_CONTROL(fbc->id)); if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); } } static bool ilk_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_CONTROL(fbc->id)) & DPFC_CTL_EN; + return intel_de_read(fbc->display, ILK_DPFC_CONTROL(fbc->id)) & DPFC_CTL_EN; } static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_STATUS(fbc->id)) & DPFC_COMP_SEG_MASK; + return intel_de_read(fbc->display, ILK_DPFC_STATUS(fbc->id)) & DPFC_COMP_SEG_MASK; } static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), + intel_de_write(display, ILK_DPFC_CB_BASE(fbc->id), i915_gem_stolen_node_offset(&fbc->compressed_fb)); } @@ -528,14 +533,14 @@ static const struct intel_fbc_funcs ilk_fbc_funcs = { static void snb_fbc_program_fence(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 ctl = 0; if (fbc_state->fence_id >= 0) ctl = SNB_DPFC_FENCE_EN | SNB_DPFC_FENCENO(fbc_state->fence_id); - intel_de_write(i915, SNB_DPFC_CTL_SA, ctl); - intel_de_write(i915, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset); + intel_de_write(display, SNB_DPFC_CTL_SA, ctl); + intel_de_write(display, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset); } static void snb_fbc_activate(struct intel_fbc *fbc) @@ -547,10 +552,10 @@ static void snb_fbc_activate(struct intel_fbc *fbc) static void snb_fbc_nuke(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, MSG_FBC_REND_STATE(fbc->id), FBC_REND_NUKE); - intel_de_posting_read(i915, MSG_FBC_REND_STATE(fbc->id)); + intel_de_write(display, MSG_FBC_REND_STATE(fbc->id), FBC_REND_NUKE); + intel_de_posting_read(display, MSG_FBC_REND_STATE(fbc->id)); } static const struct intel_fbc_funcs snb_fbc_funcs = { @@ -565,20 +570,20 @@ static const struct intel_fbc_funcs snb_fbc_funcs = { static void glk_fbc_program_cfb_stride(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 val = 0; if (fbc_state->override_cfb_stride) val |= FBC_STRIDE_OVERRIDE | FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); - intel_de_write(i915, GLK_FBC_STRIDE(fbc->id), val); + intel_de_write(display, GLK_FBC_STRIDE(fbc->id), val); } static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 val = 0; /* Display WA #0529: skl, kbl, bxt. */ @@ -586,7 +591,7 @@ static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) val |= CHICKEN_FBC_STRIDE_OVERRIDE | CHICKEN_FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); - intel_de_rmw(i915, CHICKEN_MISC_4, + intel_de_rmw(display, CHICKEN_MISC_4, CHICKEN_FBC_STRIDE_OVERRIDE | CHICKEN_FBC_STRIDE_MASK, val); } @@ -594,7 +599,8 @@ static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc); @@ -602,7 +608,7 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) if (IS_IVYBRIDGE(i915)) dpfc_ctl |= DPFC_CTL_PLANE_IVB(fbc_state->plane->i9xx_plane); - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) dpfc_ctl |= DPFC_CTL_PLANE_BINDING(fbc_state->plane->id); if (fbc_state->fence_id >= 0) @@ -616,35 +622,35 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) static void ivb_fbc_activate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; - if (DISPLAY_VER(i915) >= 10) + if (DISPLAY_VER(display) >= 10) glk_fbc_program_cfb_stride(fbc); - else if (DISPLAY_VER(i915) == 9) + else if (DISPLAY_VER(display) == 9) skl_fbc_program_cfb_stride(fbc); - if (intel_fbc_has_fences(i915)) + if (intel_fbc_has_fences(display)) snb_fbc_program_fence(fbc); /* wa_14019417088 Alternative WA*/ dpfc_ctl = ivb_dpfc_ctl(fbc); - if (DISPLAY_VER(i915) >= 20) - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); + if (DISPLAY_VER(display) >= 20) + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_EN | dpfc_ctl); } static bool ivb_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_STATUS2(fbc->id)) & DPFC_COMP_SEG_MASK_IVB; + return intel_de_read(fbc->display, ILK_DPFC_STATUS2(fbc->id)) & DPFC_COMP_SEG_MASK_IVB; } static void ivb_fbc_set_false_color(struct intel_fbc *fbc, bool enable) { - intel_de_rmw(fbc->i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_rmw(fbc->display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_FALSE_COLOR, enable ? DPFC_CTL_FALSE_COLOR : 0); } @@ -689,10 +695,10 @@ static bool intel_fbc_is_compressing(struct intel_fbc *fbc) static void intel_fbc_nuke(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; lockdep_assert_held(&fbc->lock); - drm_WARN_ON(&i915->drm, fbc->flip_pending); + drm_WARN_ON(display->drm, fbc->flip_pending); trace_intel_fbc_nuke(fbc->state.plane); @@ -719,16 +725,19 @@ static void intel_fbc_deactivate(struct intel_fbc *fbc, const char *reason) fbc->no_fbc_reason = reason; } -static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) +static u64 intel_fbc_cfb_base_max(struct intel_display *display) { - if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return BIT_ULL(28); else return BIT_ULL(32); } -static u64 intel_fbc_stolen_end(struct drm_i915_private *i915) +static u64 intel_fbc_stolen_end(struct intel_display *display) { + struct drm_i915_private __maybe_unused *i915 = to_i915(display->drm); u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen @@ -736,12 +745,12 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private *i915) * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ if (IS_BROADWELL(i915) || - (DISPLAY_VER(i915) == 9 && !IS_BROXTON(i915))) + (DISPLAY_VER(display) == 9 && !IS_BROXTON(i915))) end = i915_gem_stolen_area_size(i915) - 8 * 1024 * 1024; else end = U64_MAX; - return min(end, intel_fbc_cfb_base_max(i915)); + return min(end, intel_fbc_cfb_base_max(display)); } static int intel_fbc_min_limit(const struct intel_plane_state *plane_state) @@ -749,8 +758,10 @@ static int intel_fbc_min_limit(const struct intel_plane_state *plane_state) return plane_state->hw.fb->format->cpp[0] == 2 ? 2 : 1; } -static int intel_fbc_max_limit(struct drm_i915_private *i915) +static int intel_fbc_max_limit(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); + /* WaFbcOnly1to1Ratio:ctg */ if (IS_G4X(i915)) return 1; @@ -765,8 +776,9 @@ static int intel_fbc_max_limit(struct drm_i915_private *i915) static int find_compression_limit(struct intel_fbc *fbc, unsigned int size, int min_limit) { - struct drm_i915_private *i915 = fbc->i915; - u64 end = intel_fbc_stolen_end(i915); + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); + u64 end = intel_fbc_stolen_end(display); int ret, limit = min_limit; size /= limit; @@ -777,7 +789,7 @@ static int find_compression_limit(struct intel_fbc *fbc, if (ret == 0) return limit; - for (; limit <= intel_fbc_max_limit(i915); limit <<= 1) { + for (; limit <= intel_fbc_max_limit(display); limit <<= 1) { ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb, size >>= 1, 4096, 0, end); if (ret == 0) @@ -790,15 +802,16 @@ static int find_compression_limit(struct intel_fbc *fbc, static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, unsigned int size, int min_limit) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); int ret; - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, i915_gem_stolen_node_allocated(&fbc->compressed_fb)); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, i915_gem_stolen_node_allocated(&fbc->compressed_llb)); - if (DISPLAY_VER(i915) < 5 && !IS_G4X(i915)) { + if (DISPLAY_VER(display) < 5 && !IS_G4X(i915)) { ret = i915_gem_stolen_insert_node(i915, &fbc->compressed_llb, 4096, 4096); if (ret) @@ -809,12 +822,12 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, if (!ret) goto err_llb; else if (ret > min_limit) - drm_info_once(&i915->drm, + drm_info_once(display->drm, "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); fbc->limit = ret; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n", i915_gem_stolen_node_size(&fbc->compressed_fb), fbc->limit); return 0; @@ -824,7 +837,8 @@ err_llb: i915_gem_stolen_remove_node(i915, &fbc->compressed_llb); err: if (i915_gem_stolen_initialized(i915)) - drm_info_once(&i915->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + drm_info_once(display->drm, + "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -835,14 +849,15 @@ static void intel_fbc_program_cfb(struct intel_fbc *fbc) static void intel_fbc_program_workarounds(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (IS_SKYLAKE(i915) || IS_BROXTON(i915)) { /* * WaFbcHighMemBwCorruptionAvoidance:skl,bxt * Display WA #0883: skl,bxt */ - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_DISABLE_DUMMY0); } @@ -852,24 +867,25 @@ static void intel_fbc_program_workarounds(struct intel_fbc *fbc) * WaFbcNukeOnHostModify:skl,kbl,cfl * Display WA #0873: skl,kbl,cfl */ - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_NUKE_ON_ANY_MODIFICATION); } /* Wa_1409120013:icl,jsl,tgl,dg1 */ - if (IS_DISPLAY_VER(i915, 11, 12)) - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + if (IS_DISPLAY_VER(display, 11, 12)) + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_COMP_DUMMY_PIXEL); /* Wa_22014263786:icl,jsl,tgl,dg1,rkl,adls,adlp,mtl */ - if (DISPLAY_VER(i915) >= 11 && !IS_DG2(i915)) - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + if (DISPLAY_VER(display) >= 11 && !IS_DG2(i915)) + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_FORCE_SLB_INVALIDATION); } static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (WARN_ON(intel_fbc_hw_is_active(fbc))) return; @@ -880,12 +896,12 @@ static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) i915_gem_stolen_remove_node(i915, &fbc->compressed_fb); } -void intel_fbc_cleanup(struct drm_i915_private *i915) +void intel_fbc_cleanup(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) { + for_each_intel_fbc(display, fbc, fbc_id) { mutex_lock(&fbc->lock); __intel_fbc_cleanup_cfb(fbc); mutex_unlock(&fbc->lock); @@ -937,15 +953,16 @@ static bool icl_fbc_stride_is_valid(const struct intel_plane_state *plane_state) static bool stride_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) return icl_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 9) + else if (DISPLAY_VER(display) >= 9) return skl_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) == 4) + else if (DISPLAY_VER(display) == 4) return i965_fbc_stride_is_valid(plane_state); else return i8xx_fbc_stride_is_valid(plane_state); @@ -953,7 +970,7 @@ static bool stride_is_valid(const struct intel_plane_state *plane_state) static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; switch (fb->format->format) { @@ -963,7 +980,7 @@ static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane case DRM_FORMAT_XRGB1555: case DRM_FORMAT_RGB565: /* 16bpp not supported on gen2 */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) return false; return true; default: @@ -973,7 +990,8 @@ static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane static bool g4x_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); const struct drm_framebuffer *fb = plane_state->hw.fb; switch (fb->format->format) { @@ -1008,11 +1026,12 @@ static bool lnl_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_ static bool pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) return lnl_fbc_pixel_format_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_pixel_format_is_valid(plane_state); else return i8xx_fbc_pixel_format_is_valid(plane_state); @@ -1042,11 +1061,12 @@ static bool skl_fbc_rotation_is_valid(const struct intel_plane_state *plane_stat static bool rotation_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 9) + if (DISPLAY_VER(display) >= 9) return skl_fbc_rotation_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_rotation_is_valid(plane_state); else return i8xx_fbc_rotation_is_valid(plane_state); @@ -1060,19 +1080,20 @@ static bool rotation_is_valid(const struct intel_plane_state *plane_state) */ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); unsigned int effective_w, effective_h, max_w, max_h; - if (DISPLAY_VER(i915) >= 11) { + if (DISPLAY_VER(display) >= 11) { max_w = 8192; max_h = 4096; - } else if (DISPLAY_VER(i915) >= 10) { + } else if (DISPLAY_VER(display) >= 10) { max_w = 5120; max_h = 4096; - } else if (DISPLAY_VER(i915) >= 7) { + } else if (DISPLAY_VER(display) >= 7) { max_w = 4096; max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { + } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { max_w = 4096; max_h = 2048; } else { @@ -1090,16 +1111,17 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state * static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); unsigned int w, h, max_w, max_h; - if (DISPLAY_VER(i915) >= 10) { + if (DISPLAY_VER(display) >= 10) { max_w = 5120; max_h = 4096; - } else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) { + } else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(i915)) { max_w = 4096; max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { + } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { max_w = 4096; max_h = 2048; } else { @@ -1127,9 +1149,9 @@ static bool skl_fbc_tiling_valid(const struct intel_plane_state *plane_state) static bool tiling_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - if (DISPLAY_VER(i915) >= 9) + if (DISPLAY_VER(display) >= 9) return skl_fbc_tiling_valid(plane_state); else return i8xx_fbc_tiling_valid(plane_state); @@ -1139,7 +1161,7 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); const struct intel_plane_state *plane_state = @@ -1157,8 +1179,8 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, fbc_state->fence_y_offset = intel_plane_fence_y_offset(plane_state); - drm_WARN_ON(&i915->drm, plane_state->flags & PLANE_HAS_FENCE && - !intel_fbc_has_fences(i915)); + drm_WARN_ON(display->drm, plane_state->flags & PLANE_HAS_FENCE && + !intel_fbc_has_fences(display)); if (plane_state->flags & PLANE_HAS_FENCE) fbc_state->fence_id = i915_vma_fence_id(plane_state->ggtt_vma); @@ -1172,7 +1194,7 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); /* * The use of a CPU fence is one of two ways to detect writes by the @@ -1186,7 +1208,7 @@ static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state) * so have no fence associated with it) due to aperture constraints * at the time of pinning. */ - return DISPLAY_VER(i915) >= 9 || + return DISPLAY_VER(display) >= 9 || (plane_state->flags & PLANE_HAS_FENCE && i915_vma_fence_id(plane_state->ggtt_vma) != -1); } @@ -1211,7 +1233,8 @@ static bool intel_fbc_is_ok(const struct intel_plane_state *plane_state) static int intel_fbc_check_plane(struct intel_atomic_state *state, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1232,7 +1255,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!i915->display.params.enable_fbc) { + if (!display->params.enable_fbc) { plane_state->no_fbc_reason = "disabled per module param or by default"; return 0; } @@ -1265,14 +1288,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_sel_update && + if (IS_DISPLAY_VER(display, 12, 14) && crtc_state->has_sel_update && !crtc_state->has_panel_replay) { plane_state->no_fbc_reason = "PSR2 enabled"; return 0; } /* Wa_14016291713 */ - if ((IS_DISPLAY_VER(i915, 12, 13) || + if ((IS_DISPLAY_VER(display, 12, 13) || IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) && crtc_state->has_psr && !crtc_state->has_panel_replay) { plane_state->no_fbc_reason = "PSR1 enabled (Wa_14016291713)"; @@ -1299,7 +1322,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (DISPLAY_VER(i915) < 20 && + if (DISPLAY_VER(display) < 20 && plane_state->hw.pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE && fb->format->has_alpha) { plane_state->no_fbc_reason = "per-pixel alpha not supported"; @@ -1321,14 +1344,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (DISPLAY_VER(i915) >= 9 && + if (DISPLAY_VER(display) >= 9 && plane_state->view.color_plane[0].y & 3) { plane_state->no_fbc_reason = "plane start Y offset misaligned"; return 0; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ - if (DISPLAY_VER(i915) >= 11 && + if (DISPLAY_VER(display) >= 11 && (plane_state->view.color_plane[0].y + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; @@ -1404,7 +1427,7 @@ static bool __intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); struct intel_fbc *fbc = plane->fbc; bool need_vblank_wait = false; @@ -1430,7 +1453,7 @@ static bool __intel_fbc_pre_update(struct intel_atomic_state *state, * and skipping the extra vblank wait before the plane update * if at least one frame has already passed. */ - if (fbc->activated && DISPLAY_VER(i915) >= 10) + if (fbc->activated && DISPLAY_VER(display) >= 10) need_vblank_wait = true; fbc->activated = false; @@ -1464,13 +1487,13 @@ bool intel_fbc_pre_update(struct intel_atomic_state *state, static void __intel_fbc_disable(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; struct intel_plane *plane = fbc->state.plane; lockdep_assert_held(&fbc->lock); - drm_WARN_ON(&i915->drm, fbc->active); + drm_WARN_ON(display->drm, fbc->active); - drm_dbg_kms(&i915->drm, "Disabling FBC on [PLANE:%d:%s]\n", + drm_dbg_kms(display->drm, "Disabling FBC on [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); __intel_fbc_cleanup_cfb(fbc); @@ -1547,7 +1570,7 @@ void intel_fbc_invalidate(struct drm_i915_private *i915, struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(&i915->display, fbc, fbc_id) __intel_fbc_invalidate(fbc, frontbuffer_bits, origin); } @@ -1586,7 +1609,7 @@ void intel_fbc_flush(struct drm_i915_private *i915, struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(&i915->display, fbc, fbc_id) __intel_fbc_flush(fbc, frontbuffer_bits, origin); } @@ -1611,7 +1634,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); struct intel_fbc *fbc = plane->fbc; @@ -1630,7 +1653,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, __intel_fbc_disable(fbc); } - drm_WARN_ON(&i915->drm, fbc->active); + drm_WARN_ON(display->drm, fbc->active); fbc->no_fbc_reason = plane_state->no_fbc_reason; if (fbc->no_fbc_reason) @@ -1652,7 +1675,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, return; } - drm_dbg_kms(&i915->drm, "Enabling FBC on [PLANE:%d:%s]\n", + drm_dbg_kms(display->drm, "Enabling FBC on [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); fbc->no_fbc_reason = "FBC enabled but not active yet\n"; @@ -1670,10 +1693,10 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, */ void intel_fbc_disable(struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc->base.dev); struct intel_plane *plane; - for_each_intel_plane(&i915->drm, plane) { + for_each_intel_plane(display->drm, plane) { struct intel_fbc *fbc = plane->fbc; if (!fbc || plane->pipe != crtc->pipe) @@ -1718,7 +1741,8 @@ void intel_fbc_update(struct intel_atomic_state *state, static void intel_fbc_underrun_work_fn(struct work_struct *work) { struct intel_fbc *fbc = container_of(work, typeof(*fbc), underrun_work); - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); mutex_lock(&fbc->lock); @@ -1726,7 +1750,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work) if (fbc->underrun_detected || !fbc->state.plane) goto out; - drm_dbg_kms(&i915->drm, "Disabling FBC due to FIFO underrun.\n"); + drm_dbg_kms(display->drm, "Disabling FBC due to FIFO underrun.\n"); fbc->underrun_detected = true; intel_fbc_deactivate(fbc, "FIFO underrun"); @@ -1739,14 +1763,14 @@ out: static void __intel_fbc_reset_underrun(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; cancel_work_sync(&fbc->underrun_work); mutex_lock(&fbc->lock); if (fbc->underrun_detected) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Re-allowing FBC after fifo underrun\n"); fbc->no_fbc_reason = "FIFO underrun cleared"; } @@ -1757,22 +1781,24 @@ static void __intel_fbc_reset_underrun(struct intel_fbc *fbc) /* * intel_fbc_reset_underrun - reset FBC fifo underrun status. - * @i915: the i915 device + * @display: display * * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we * want to re-enable FBC after an underrun to increase test coverage. */ -void intel_fbc_reset_underrun(struct drm_i915_private *i915) +void intel_fbc_reset_underrun(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(display, fbc, fbc_id) __intel_fbc_reset_underrun(fbc); } static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) { + struct drm_i915_private *i915 = to_i915(fbc->display->drm); + /* * There's no guarantee that underrun_detected won't be set to true * right after this check and before the work is scheduled, but that's @@ -1784,12 +1810,12 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) if (READ_ONCE(fbc->underrun_detected)) return; - queue_work(fbc->i915->unordered_wq, &fbc->underrun_work); + queue_work(i915->unordered_wq, &fbc->underrun_work); } /** * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun - * @i915: i915 device + * @display: display * * Without FBC, most underruns are harmless and don't really cause too many * problems, except for an annoying message on dmesg. With FBC, underruns can @@ -1801,12 +1827,12 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) * * This function is called from the IRQ handler. */ -void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) +void intel_fbc_handle_fifo_underrun_irq(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(display, fbc, fbc_id) __intel_fbc_handle_fifo_underrun_irq(fbc); } @@ -1819,15 +1845,17 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) * space to change the value during runtime without sanitizing it again. IGT * relies on being able to change i915.enable_fbc at runtime. */ -static int intel_sanitize_fbc_option(struct drm_i915_private *i915) +static int intel_sanitize_fbc_option(struct intel_display *display) { - if (i915->display.params.enable_fbc >= 0) - return !!i915->display.params.enable_fbc; + struct drm_i915_private *i915 = to_i915(display->drm); - if (!HAS_FBC(i915)) + if (display->params.enable_fbc >= 0) + return !!display->params.enable_fbc; + + if (!HAS_FBC(display)) return 0; - if (IS_BROADWELL(i915) || DISPLAY_VER(i915) >= 9) + if (IS_BROADWELL(i915) || DISPLAY_VER(display) >= 9) return 1; return 0; @@ -1838,9 +1866,10 @@ void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane) plane->fbc = fbc; } -static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, +static struct intel_fbc *intel_fbc_create(struct intel_display *display, enum intel_fbc_id fbc_id) { + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_fbc *fbc; fbc = kzalloc(sizeof(*fbc), GFP_KERNEL); @@ -1848,19 +1877,19 @@ static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, return NULL; fbc->id = fbc_id; - fbc->i915 = i915; + fbc->display = display; INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); mutex_init(&fbc->lock); - if (DISPLAY_VER(i915) >= 7) + if (DISPLAY_VER(display) >= 7) fbc->funcs = &ivb_fbc_funcs; - else if (DISPLAY_VER(i915) == 6) + else if (DISPLAY_VER(display) == 6) fbc->funcs = &snb_fbc_funcs; - else if (DISPLAY_VER(i915) == 5) + else if (DISPLAY_VER(display) == 5) fbc->funcs = &ilk_fbc_funcs; else if (IS_G4X(i915)) fbc->funcs = &g4x_fbc_funcs; - else if (DISPLAY_VER(i915) == 4) + else if (DISPLAY_VER(display) == 4) fbc->funcs = &i965_fbc_funcs; else fbc->funcs = &i8xx_fbc_funcs; @@ -1870,36 +1899,36 @@ static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, /** * intel_fbc_init - Initialize FBC - * @i915: the i915 device + * @display: display * * This function might be called during PM init process. */ -void intel_fbc_init(struct drm_i915_private *i915) +void intel_fbc_init(struct intel_display *display) { enum intel_fbc_id fbc_id; - i915->display.params.enable_fbc = intel_sanitize_fbc_option(i915); - drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n", - i915->display.params.enable_fbc); + display->params.enable_fbc = intel_sanitize_fbc_option(display); + drm_dbg_kms(display->drm, "Sanitized enable_fbc value: %d\n", + display->params.enable_fbc); - for_each_fbc_id(i915, fbc_id) - i915->display.fbc[fbc_id] = intel_fbc_create(i915, fbc_id); + for_each_fbc_id(display, fbc_id) + display->fbc[fbc_id] = intel_fbc_create(display, fbc_id); } /** * intel_fbc_sanitize - Sanitize FBC - * @i915: the i915 device + * @display: display * * Make sure FBC is initially disabled since we have no * idea eg. into which parts of stolen it might be scribbling * into. */ -void intel_fbc_sanitize(struct drm_i915_private *i915) +void intel_fbc_sanitize(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) { + for_each_intel_fbc(display, fbc, fbc_id) { if (intel_fbc_hw_is_active(fbc)) intel_fbc_hw_deactivate(fbc); } @@ -1908,11 +1937,12 @@ void intel_fbc_sanitize(struct drm_i915_private *i915) static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) { struct intel_fbc *fbc = m->private; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_plane *plane; intel_wakeref_t wakeref; - drm_modeset_lock_all(&i915->drm); + drm_modeset_lock_all(display->drm); wakeref = intel_runtime_pm_get(&i915->runtime_pm); mutex_lock(&fbc->lock); @@ -1925,7 +1955,7 @@ static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); } - for_each_intel_plane(&i915->drm, plane) { + for_each_intel_plane(display->drm, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -1941,7 +1971,7 @@ static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) mutex_unlock(&fbc->lock); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - drm_modeset_unlock_all(&i915->drm); + drm_modeset_unlock_all(display->drm); return 0; } @@ -1998,12 +2028,12 @@ void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc) } /* FIXME: remove this once igt is on board with per-crtc stuff */ -void intel_fbc_debugfs_register(struct drm_i915_private *i915) +void intel_fbc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; struct intel_fbc *fbc; - fbc = i915->display.fbc[INTEL_FBC_A]; + fbc = display->fbc[INTEL_FBC_A]; if (fbc) intel_fbc_debugfs_add(fbc, minor->debugfs_root); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h index 6720ec8ee8a2..ceae55458e14 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.h +++ b/drivers/gpu/drm/i915/display/intel_fbc.h @@ -13,6 +13,7 @@ struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_fbc; struct intel_plane; struct intel_plane_state; @@ -31,9 +32,9 @@ bool intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_post_update(struct intel_atomic_state *state, struct intel_crtc *crtc); -void intel_fbc_init(struct drm_i915_private *dev_priv); -void intel_fbc_cleanup(struct drm_i915_private *dev_priv); -void intel_fbc_sanitize(struct drm_i915_private *dev_priv); +void intel_fbc_init(struct intel_display *display); +void intel_fbc_cleanup(struct intel_display *display); +void intel_fbc_sanitize(struct intel_display *display); void intel_fbc_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_disable(struct intel_crtc *crtc); @@ -43,9 +44,9 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv, void intel_fbc_flush(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits, enum fb_op_origin origin); void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane); -void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915); -void intel_fbc_reset_underrun(struct drm_i915_private *i915); +void intel_fbc_handle_fifo_underrun_irq(struct intel_display *display); +void intel_fbc_reset_underrun(struct intel_display *display); void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc); -void intel_fbc_debugfs_register(struct drm_i915_private *i915); +void intel_fbc_debugfs_register(struct intel_display *display); #endif /* __INTEL_FBC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index e5e4ca7cc499..8949fbb1cc60 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -440,7 +440,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, drm_err(&dev_priv->drm, "CPU pipe %c FIFO underrun\n", pipe_name(pipe)); } - intel_fbc_handle_fifo_underrun_irq(dev_priv); + intel_fbc_handle_fifo_underrun_irq(&dev_priv->display); } /** diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 7602cb30ebf1..6f85f5352455 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -966,7 +966,7 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, } } - intel_fbc_sanitize(i915); + intel_fbc_sanitize(&i915->display); intel_sanitize_plane_mapping(i915); From 6c9ee258b1f4e7021224c9bf541fe6b17a03ee40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:37 +0300 Subject: [PATCH 0020/1523] drm/i915/fbc: s/_intel_fbc_cfb_stride()/intel_fbc_plane_cfb_stride()/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _intel_fbc_cfb_stride() calculates the CFB stride the hardware would automagically generate from the plane's stride. Rename the function to intel_fbc_plane_cfb_stride() to better reflect its purpose. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-4-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 9c51f48b54f6..8b851996cf29 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -140,7 +140,7 @@ static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane } /* plane stride based cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int _intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int intel_fbc_plane_cfb_stride(const struct intel_plane_state *plane_state) { unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ @@ -181,7 +181,7 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - unsigned int stride = _intel_fbc_cfb_stride(plane_state); + unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); /* * At least some of the platforms require each 4 line segment to @@ -211,7 +211,7 @@ static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_s { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int stride_aligned = intel_fbc_cfb_stride(plane_state); - unsigned int stride = _intel_fbc_cfb_stride(plane_state); + unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; /* From a5beee65241d25ad20dd814668c975ba13a42615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:38 +0300 Subject: [PATCH 0021/1523] drm/i915/fbc: Extract intel_fbc_max_plane_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract intel_fbc_max_plane_size() from intel_fbc_plane_size_valid(). We'll have another use for this soon in determining how much stolen memory we'd like to keep reserved for FBC. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-5-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 35 ++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b851996cf29..05297c5bff09 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1109,25 +1109,32 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state * return effective_w <= max_w && effective_h <= max_h; } +static void intel_fbc_max_plane_size(struct intel_display *display, + unsigned int *w, unsigned int *h) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 10) { + *w = 5120; + *h = 4096; + } else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(i915)) { + *w = 4096; + *h = 4096; + } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { + *w = 4096; + *h = 2048; + } else { + *w = 2048; + *h = 1536; + } +} + static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - struct drm_i915_private *i915 = to_i915(display->drm); unsigned int w, h, max_w, max_h; - if (DISPLAY_VER(display) >= 10) { - max_w = 5120; - max_h = 4096; - } else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(i915)) { - max_w = 4096; - max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { - max_w = 4096; - max_h = 2048; - } else { - max_w = 2048; - max_h = 1536; - } + intel_fbc_max_plane_size(display, &max_w, &max_h); w = drm_rect_width(&plane_state->uapi.src) >> 16; h = drm_rect_height(&plane_state->uapi.src) >> 16; From da9bbdb97469383a2ac97435a3b09543d14139fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:39 +0300 Subject: [PATCH 0022/1523] drm/i915/fbc: Extract intel_fbc_max_surface_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract intel_fbc_max_surface_size() from intel_fbc_hw_tracking_covers_screen(), mainly to mirror the "max plane size" counterparts. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-6-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 41 ++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 05297c5bff09..bf655d6dc8ff 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1072,6 +1072,29 @@ static bool rotation_is_valid(const struct intel_plane_state *plane_state) return i8xx_fbc_rotation_is_valid(plane_state); } +static void intel_fbc_max_surface_size(struct intel_display *display, + unsigned int *w, unsigned int *h) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 11) { + *w = 8192; + *h = 4096; + } else if (DISPLAY_VER(display) >= 10) { + *w = 5120; + *h = 4096; + } else if (DISPLAY_VER(display) >= 7) { + *w = 4096; + *h = 4096; + } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { + *w = 4096; + *h = 2048; + } else { + *w = 2048; + *h = 1536; + } +} + /* * For some reason, the hardware tracking starts looking at whatever we * programmed as the display plane base address register. It does not look at @@ -1081,25 +1104,9 @@ static bool rotation_is_valid(const struct intel_plane_state *plane_state) static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - struct drm_i915_private *i915 = to_i915(display->drm); unsigned int effective_w, effective_h, max_w, max_h; - if (DISPLAY_VER(display) >= 11) { - max_w = 8192; - max_h = 4096; - } else if (DISPLAY_VER(display) >= 10) { - max_w = 5120; - max_h = 4096; - } else if (DISPLAY_VER(display) >= 7) { - max_w = 4096; - max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { - max_w = 4096; - max_h = 2048; - } else { - max_w = 2048; - max_h = 1536; - } + intel_fbc_max_surface_size(display, &max_w, &max_h); effective_w = plane_state->view.color_plane[0].x + (drm_rect_width(&plane_state->uapi.src) >> 16); From 4c46e34e67587b268abc3916c9613426018561e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:40 +0300 Subject: [PATCH 0023/1523] drm/i915/fbc: s/intel_fbc_hw_tracking_covers_screen()/intel_fbc_surface_size_ok()/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_fbc_hw_tracking_covers_screen() to intel_fbc_surface_size_ok() so that the naming scheme is the same for the surface size vs. plane size checks. "surface size" is what bspec talks about. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-7-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index bf655d6dc8ff..779e96dd3f6f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1101,7 +1101,7 @@ static void intel_fbc_max_surface_size(struct intel_display *display, * the X and Y offset registers. That's why we include the src x/y offsets * instead of just looking at the plane size. */ -static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state) +static bool intel_fbc_surface_size_ok(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int effective_w, effective_h, max_w, max_h; @@ -1348,7 +1348,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!intel_fbc_hw_tracking_covers_screen(plane_state)) { + if (!intel_fbc_surface_size_ok(plane_state)) { plane_state->no_fbc_reason = "surface size too big"; return 0; } From 94900000380300b230dc1bed489b598a2a2c55a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:41 +0300 Subject: [PATCH 0024/1523] drm/i915/fbc: Adjust g4x+ platform checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do the "is this ilk+ or g4x" checks in the customary order instead of the reverse order. Easier for the poor brain to parse this when it's always done the same way. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-8-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 779e96dd3f6f..6853573f8486 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1086,7 +1086,7 @@ static void intel_fbc_max_surface_size(struct intel_display *display, } else if (DISPLAY_VER(display) >= 7) { *w = 4096; *h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { + } else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) { *w = 4096; *h = 2048; } else { @@ -1127,7 +1127,7 @@ static void intel_fbc_max_plane_size(struct intel_display *display, } else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(i915)) { *w = 4096; *h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(display) >= 5) { + } else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) { *w = 4096; *h = 2048; } else { From 6a1738b3fdd92d48e61de438d05dedbc00fc598f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:42 +0300 Subject: [PATCH 0025/1523] drm/i915/fbc: Extract _intel_fbc_cfb_stride() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the lower level stuff out from intel_fbc_cfb_stride() into a separate function that doesn't depend on the plane_state. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-9-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_fbc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 6853573f8486..e9adb1b8b9a7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -148,12 +148,11 @@ static unsigned int intel_fbc_plane_cfb_stride(const struct intel_plane_state *p } /* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int skl_fbc_min_cfb_stride(struct intel_display *display, + unsigned int width) { - struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int limit = 4; /* 1:4 compression limit is the worst case */ unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ - unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; unsigned int height = 4; /* FBC segment is 4 lines */ unsigned int stride; @@ -178,22 +177,29 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane } /* properly aligned cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int _intel_fbc_cfb_stride(struct intel_display *display, + unsigned int width, unsigned int stride) { - struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); - /* * At least some of the platforms require each 4 line segment to * be 512 byte aligned. Aligning each line to 512 bytes guarantees * that regardless of the compression limit we choose later. */ if (DISPLAY_VER(display) >= 9) - return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(plane_state)); + return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(display, width)); else return stride; } +static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +{ + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); + unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; + + return _intel_fbc_cfb_stride(display, width, stride); +} + static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); From 566ad72ba636b745beb8a440bd4c05e779b95728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:43 +0300 Subject: [PATCH 0026/1523] drm/i915/fbc: s/lines/height/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the more customary name 'height' instead of 'lines'. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-10-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index e9adb1b8b9a7..da54cb55906c 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -203,14 +203,14 @@ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_s static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - int lines = drm_rect_height(&plane_state->uapi.src) >> 16; + int height = drm_rect_height(&plane_state->uapi.src) >> 16; if (DISPLAY_VER(display) == 7) - lines = min(lines, 2048); + height = min(height, 2048); else if (DISPLAY_VER(display) >= 8) - lines = min(lines, 2560); + height = min(height, 2560); - return lines * intel_fbc_cfb_stride(plane_state); + return height * intel_fbc_cfb_stride(plane_state); } static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) From 4290eaa8424905dcee4daa0666ec841dc3a0bfb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:44 +0300 Subject: [PATCH 0027/1523] drm/i915/fbc: Reoder CFB max height platform checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rearrange the max CFB max height platform into the more common "new first, old last" order. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-11-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index da54cb55906c..737e3c1e2304 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -205,10 +205,10 @@ static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_sta struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); int height = drm_rect_height(&plane_state->uapi.src) >> 16; - if (DISPLAY_VER(display) == 7) - height = min(height, 2048); - else if (DISPLAY_VER(display) >= 8) + if (DISPLAY_VER(display) >= 8) height = min(height, 2560); + else if (DISPLAY_VER(display) == 7) + height = min(height, 2048); return height * intel_fbc_cfb_stride(plane_state); } From f89d7664c3617836aecd291c56c8ab63fe228fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:45 +0300 Subject: [PATCH 0028/1523] drm/i915/fbc: Extract intel_fbc_max_cfb_height() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to determine the maximum CFB height into a separate function. To make this work we need to declare an explicit max height for all older platforms as well. But that is actually just the max plane height as pre-HSW hardware supposedly doesn't have the trick of leaving the extra lines uncompressed. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-12-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_fbc.c | 27 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 737e3c1e2304..4d83a7e92144 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -200,17 +200,30 @@ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_s return _intel_fbc_cfb_stride(display, width, stride); } +/* + * Maximum height the hardware will compress, on HSW+ + * additional lines (up to the actual plane height) will + * remain uncompressed. + */ +static unsigned int intel_fbc_max_cfb_height(struct intel_display *display) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 8) + return 2560; + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) + return 2048; + else + return 1536; +} + static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - int height = drm_rect_height(&plane_state->uapi.src) >> 16; + unsigned int height = drm_rect_height(&plane_state->uapi.src) >> 16; - if (DISPLAY_VER(display) >= 8) - height = min(height, 2560); - else if (DISPLAY_VER(display) == 7) - height = min(height, 2048); - - return height * intel_fbc_cfb_stride(plane_state); + return min(height, intel_fbc_max_cfb_height(display)) * + intel_fbc_cfb_stride(plane_state); } static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) From 4d722029930bca7f53d99dbfc300a40dc297dee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:46 +0300 Subject: [PATCH 0029/1523] drm/i915/fbc: Extract _intel_fbc_cfb_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the lower level stuff out from intel_fbc_cfb_size() into a separate function that doesn't depend on the plane_state. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-13-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_fbc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 4d83a7e92144..bf4240cefe58 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -217,13 +217,18 @@ static unsigned int intel_fbc_max_cfb_height(struct intel_display *display) return 1536; } +static unsigned int _intel_fbc_cfb_size(struct intel_display *display, + unsigned int height, unsigned int stride) +{ + return min(height, intel_fbc_max_cfb_height(display)) * stride; +} + static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int height = drm_rect_height(&plane_state->uapi.src) >> 16; - return min(height, intel_fbc_max_cfb_height(display)) * - intel_fbc_cfb_stride(plane_state); + return _intel_fbc_cfb_size(display, height, intel_fbc_cfb_stride(plane_state)); } static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) From d43caea1faf5c31dc02aa1adb8bb1aa1c33bf86f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Jul 2024 17:52:47 +0300 Subject: [PATCH 0030/1523] drm/i915/fbc: Extract intel_fbc_cfb_cpp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract a helper to determine the CFB bytes per pixel value. Currently this is always 4, but that could change in the future. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240705145254.3355-14-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_fbc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index bf4240cefe58..4c91a2b69a09 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -139,20 +139,24 @@ static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane return stride; } +static unsigned int intel_fbc_cfb_cpp(void) +{ + return 4; /* FBC always 4 bytes per pixel */ +} + /* plane stride based cfb stride in bytes, assuming 1:1 compression limit */ static unsigned int intel_fbc_plane_cfb_stride(const struct intel_plane_state *plane_state) { - unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ + unsigned int cpp = intel_fbc_cfb_cpp(); return intel_fbc_plane_stride(plane_state) * cpp; } /* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */ static unsigned int skl_fbc_min_cfb_stride(struct intel_display *display, - unsigned int width) + unsigned int cpp, unsigned int width) { unsigned int limit = 4; /* 1:4 compression limit is the worst case */ - unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ unsigned int height = 4; /* FBC segment is 4 lines */ unsigned int stride; @@ -178,7 +182,8 @@ static unsigned int skl_fbc_min_cfb_stride(struct intel_display *display, /* properly aligned cfb stride in bytes, assuming 1:1 compression limit */ static unsigned int _intel_fbc_cfb_stride(struct intel_display *display, - unsigned int width, unsigned int stride) + unsigned int cpp, unsigned int width, + unsigned int stride) { /* * At least some of the platforms require each 4 line segment to @@ -186,7 +191,7 @@ static unsigned int _intel_fbc_cfb_stride(struct intel_display *display, * that regardless of the compression limit we choose later. */ if (DISPLAY_VER(display) >= 9) - return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(display, width)); + return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(display, cpp, width)); else return stride; } @@ -196,8 +201,9 @@ static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_s struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; + unsigned int cpp = intel_fbc_cfb_cpp(); - return _intel_fbc_cfb_stride(display, width, stride); + return _intel_fbc_cfb_stride(display, cpp, width, stride); } /* From 7df0be6e6280c6fca01d039864bb123e5e36604b Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Thu, 11 Jul 2024 22:02:08 +0530 Subject: [PATCH 0031/1523] drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8 We're seeing a GPU hang issue on a CHV platform, which was caused by commit bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for Gen8"). The Gen8 platform only supports timeslicing and doesn't have a preemption mechanism, as its engines do not have a preemption timer. Commit 751f82b353a6 ("drm/i915/gt: Only disable preemption on Gen8 render engines") addressed this issue only for render engines. This patch extends that fix by ensuring that preemption is not considered for all engines on Gen8 platforms. v4: - Use the correct Fixes tag (Rodrigo Vivi) - Reworded commit log (Andi Shyti) v3: - Inside need_preempt(), condition of can_preempt() is not required as simplified can_preempt() is enough. (Chris Wilson) v2: Simplify can_preempt() function (Tvrtko Ursulin) Fixes: 751f82b353a6 ("drm/i915/gt: Only disable preemption on gen8 render engines") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396 Suggested-by: Andi Shyti Signed-off-by: Nitin Gote Cc: Chris Wilson CC: # v5.12+ Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240711163208.1355736-1-nitin.r.gote@intel.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 21829439e686..72090f52fb85 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3315,11 +3315,7 @@ static void remove_from_engine(struct i915_request *rq) static bool can_preempt(struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) > 8) - return true; - - /* GPGPU on bdw requires extra w/a; not implemented */ - return engine->class != RENDER_CLASS; + return GRAPHICS_VER(engine->i915) > 8; } static void kick_execlists(const struct i915_request *rq, int prio) From d75dec1fcbcb05b021c08b62551649567ab8955c Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Fri, 12 Jul 2024 14:41:56 -0700 Subject: [PATCH 0032/1523] drm/i915: Allow NULL memory region Prevent a NULL pointer access in intel_memory_regions_hw_probe. Fixes: 05da7d9f717b ("drm/i915/gem: Downgrade stolen lmem setup warning") Reported-by: Dan Carpenter Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11704 Signed-off-by: Jonathan Cavitt Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240712214156.3969584-1-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/intel_memory_region.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 172dfa7c3588..d40ee1b42110 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -368,8 +368,10 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) goto out_cleanup; } - mem->id = i; - i915->mm.regions[i] = mem; + if (mem) { /* Skip on non-fatal errors */ + mem->id = i; + i915->mm.regions[i] = mem; + } } for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { From 104bcfae57d80c484185c012cc0ee8d8d5889a7e Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 21 Jun 2024 17:46:35 -0700 Subject: [PATCH 0033/1523] drm/i915/arl: Enable Wa_14019159160 for ARL The context switch out workaround also applies to ARL. Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240622004636.662081-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5e60a34692af..097fc6bd1285 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -296,7 +296,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) /* Wa_16019325821 */ /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) flags |= GUC_WA_RCS_CCS_SWITCHOUT; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7995f059f30d..f1fe5f905453 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -850,7 +850,7 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) guc_waklv_enable_simple(guc, GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, &offset, &remain); From e4a0251d36fc6c190c545aa95702eec0efe05a4f Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 21 Jun 2024 17:46:36 -0700 Subject: [PATCH 0034/1523] drm/i915/guc: Extend w/a 14019159160 There is a new part to an existing workaround, so enable that piece as well. v2: Extend even further. v3: Drop DG2 as there are CI failures still to resolve. Also re-order the parameters to a function to reduce excessive line wrapping. Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240622004636.662081-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 37ff539a6963..0c709e6c15be 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -107,6 +107,7 @@ enum { enum { GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006, }; #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index f1fe5f905453..46fabbfc775e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -815,8 +815,7 @@ engine_instance_list: return PAGE_ALIGN(total_size); } -static void guc_waklv_enable_simple(struct intel_guc *guc, - u32 klv_id, u32 *offset, u32 *remain) +static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id) { u32 size; u32 klv_entry[] = { @@ -850,19 +849,20 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, - &offset, &remain); + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE); + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE); + } /* Wa_16021333562 */ if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || IS_DG2(gt->i915))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - &offset, &remain); + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); size = guc_ads_waklv_size(guc) - remain; if (!size) From 785280973472dbdee2c31cb740633c4b6460a8ee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:54:22 +0200 Subject: [PATCH 0035/1523] sunrpc: avoid -Wformat-security warning Using a non-constant string as an sprintf-style is potentially dangerous: net/sunrpc/svc.c: In function 'param_get_pool_mode': net/sunrpc/svc.c:164:32: error: format not a string literal and no format arguments [-Werror=format-security] Use a literal "%s" format instead. Fixes: 5f71f3c32553 ("sunrpc: refactor pool_mode setting code") Signed-off-by: Arnd Bergmann Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e03f14024e47..88a59cfa5583 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -161,7 +161,7 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) str[len] = '\n'; str[len + 1] = '\0'; - return sysfs_emit(buf, str); + return sysfs_emit(buf, "%s", str); } module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, From 2885d283cce544cf3619f35f0acb3d073de036e1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jul 2024 16:57:22 +0300 Subject: [PATCH 0036/1523] drm/i915/dp: Retrain SST links via a modeset commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of direct calls of the link training functions, use a modeset commit to retrain a DP link in SST mode, similarly to how this is done in DP-MST mode. Originally the current way was chosen presumedly, because there wasn't a well-established way in place for the driver to do an internal (vs. userspace/kernel client) commit. Since then such internal commits became a common place (initial-, HDMI/TC link reset commit), so there is no reason to handle the DP-SST link-retraining case differently. At the end of the current sequence the HW reported a FIFO underrun - without other issues visible to users - because during retraining the link's encoder/port was disabled/re-enabled without also disabling/re-enabling the corresponding pipe/transcoder (as required by the spec); the corresponding underrun error message was suppressed as a known issue. Based on Ankit's test on DG2 the underrun error was still reported as it got detected with some (vblank) delay wrt. other platforms. Switching to a modeset commit resolves these underrun related issues. Cc: Ankit Nautiyal Cc: Ville Syrjälä Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240712135724.660399-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 67 ++++--------------------- 1 file changed, 9 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d4b1b18453dc..f83128ac6075 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5242,8 +5242,6 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct intel_crtc *crtc; - bool mst_output = false; u8 pipe_mask; int ret; @@ -5272,64 +5270,17 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, encoder->base.base.id, encoder->base.name, str_yes_no(intel_dp->link.force_retrain)); - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); + ret = intel_modeset_commit_pipes(dev_priv, pipe_mask, ctx); + if (ret == -EDEADLK) + return ret; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { - mst_output = true; - break; - } + intel_dp->link.force_retrain = false; - /* Suppress underruns caused by re-training */ - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), false); - } - - /* TODO: use a modeset for SST as well. */ - if (mst_output) { - ret = intel_modeset_commit_pipes(dev_priv, pipe_mask, ctx); - - if (ret && ret != -EDEADLK) - drm_dbg_kms(&dev_priv->drm, - "[ENCODER:%d:%s] link retraining failed: %pe\n", - encoder->base.base.id, encoder->base.name, - ERR_PTR(ret)); - - goto out; - } - - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - intel_dp->link_trained = false; - - intel_dp_check_frl_training(intel_dp); - intel_dp_pcon_dsc_configure(intel_dp, crtc_state); - intel_dp_start_link_train(NULL, intel_dp, crtc_state); - intel_dp_stop_link_train(intel_dp, crtc_state); - break; - } - - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - /* Keep underrun reporting disabled until things are stable */ - intel_crtc_wait_for_next_vblank(crtc); - - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), true); - } - -out: - if (ret != -EDEADLK) - intel_dp->link.force_retrain = false; + if (ret) + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] link retraining failed: %pe\n", + encoder->base.base.id, encoder->base.name, + ERR_PTR(ret)); return ret; } From ec92c47d7d098e85eae7fb25c6bc07158686e675 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jul 2024 16:57:23 +0300 Subject: [PATCH 0037/1523] drm/i915/dp: Require a valid atomic state for SST link training After the previous patch link training happens always with a valid atomic state, so remove the NOTE comments and asserts which required a valid state only for DP-MST and allowed for a NULL state for DP-SST. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240712135724.660399-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 ----- .../gpu/drm/i915/display/intel_dp_link_training.c | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f83128ac6075..0365fb1e77ea 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2866,7 +2866,6 @@ static void intel_dp_queue_modeset_retry_work(struct intel_connector *connector) drm_connector_put(&connector->base); } -/* NOTE: @state is only valid for MST links and can be %NULL for SST. */ void intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, struct intel_encoder *encoder, @@ -2875,7 +2874,6 @@ intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, struct intel_connector *connector; struct intel_digital_connector_state *conn_state; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); int i; if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { @@ -2884,9 +2882,6 @@ intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, return; } - if (drm_WARN_ON(&i915->drm, !state)) - return; - for_each_new_intel_connector_in_state(state, connector, conn_state, i) { if (!conn_state->base.crtc) continue; diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index af0b71bdf1fc..58dea87a9fa2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1234,12 +1234,10 @@ static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } -/* NOTE: @state is only valid for MST links and can be %NULL for SST. */ static bool intel_dp_schedule_fallback_link_training(struct intel_atomic_state *state, struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; if (!intel_digital_port_connected(&dp_to_dig_port(intel_dp)->base)) { @@ -1255,11 +1253,6 @@ static bool intel_dp_schedule_fallback_link_training(struct intel_atomic_state * return false; } - if (drm_WARN_ON(&i915->drm, - intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) && - !state)) - return false; - /* Schedule a Hotplug Uevent to userspace to start modeset */ intel_dp_queue_modeset_retry_for_link(state, encoder, crtc_state); @@ -1518,8 +1511,6 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, * retraining with reduced link rate/lane parameters if the link training * fails. * After calling this function intel_dp_stop_link_train() must be called. - * - * NOTE: @state is only valid for MST links and can be %NULL for SST. */ void intel_dp_start_link_train(struct intel_atomic_state *state, struct intel_dp *intel_dp, @@ -1536,11 +1527,6 @@ void intel_dp_start_link_train(struct intel_atomic_state *state, */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); - if (drm_WARN_ON(&i915->drm, - intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) && - !state)) - return; - if (lttpr_count < 0) /* Still continue with enabling the port and link training. */ lttpr_count = 0; From f6f22012e6c73867b959524604984ee5ebfd105f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jul 2024 16:57:24 +0300 Subject: [PATCH 0038/1523] drm/i915/dp: Don't WARN on failed link-retrain modeset After a bad link state is detected, the sink capabilities with which the link was originally trained could have changed: for instance another sink got connected or the retraining was forced after the rate/lane count got decreased (as a fallback). In these cases the retraining modeset fails as expected also printing a debug message, so don't WARN on it. Reviewed-by: Rodrigo Vivi Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240712135724.660399-3-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0365fb1e77ea..1e43e32e0519 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5282,14 +5282,11 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, void intel_dp_link_check(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct drm_modeset_acquire_ctx ctx; int ret; intel_modeset_lock_ctx_retry(&ctx, NULL, 0, ret) ret = intel_dp_retrain_link(encoder, &ctx); - - drm_WARN_ON(&i915->drm, ret); } void intel_dp_check_link_state(struct intel_dp *intel_dp) From bc3ca4d94369838c3b6668a183467d4a69a5a482 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 11 Jul 2024 05:27:31 +0000 Subject: [PATCH 0039/1523] drm/i915: Make I2C terminology more inclusive I2C v7, SMBus 3.2, and I3C 1.1.1 specifications have replaced "master/slave" with more appropriate terms. Inspired by Wolfram's series to fix drivers/i2c/, fix the terminology for users of I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists in the specification. Reviewed-by: Rodrigo Vivi Acked-by: Rodrigo Vivi Acked-by: Zhi Wang Signed-off-by: Easwar Hariharan Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240711052734.1273652-4-eahariha@linux.microsoft.com --- drivers/gpu/drm/i915/display/dvo_ch7017.c | 14 ++++----- drivers/gpu/drm/i915/display/dvo_ch7xxx.c | 18 +++++------ drivers/gpu/drm/i915/display/dvo_ivch.c | 16 +++++----- drivers/gpu/drm/i915/display/dvo_ns2501.c | 18 +++++------ drivers/gpu/drm/i915/display/dvo_sil164.c | 18 +++++------ drivers/gpu/drm/i915/display/dvo_tfp410.c | 18 +++++------ drivers/gpu/drm/i915/display/intel_bios.c | 22 +++++++------- .../gpu/drm/i915/display/intel_display_core.h | 2 +- drivers/gpu/drm/i915/display/intel_dsi.h | 2 +- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 20 ++++++------- drivers/gpu/drm/i915/display/intel_dvo.c | 14 ++++----- drivers/gpu/drm/i915/display/intel_dvo_dev.h | 2 +- drivers/gpu/drm/i915/display/intel_gmbus.c | 4 +-- drivers/gpu/drm/i915/display/intel_sdvo.c | 30 +++++++++---------- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 4 +-- drivers/gpu/drm/i915/gvt/edid.c | 28 ++++++++--------- drivers/gpu/drm/i915/gvt/edid.h | 4 +-- drivers/gpu/drm/i915/gvt/opregion.c | 2 +- 18 files changed, 118 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/i915/display/dvo_ch7017.c b/drivers/gpu/drm/i915/display/dvo_ch7017.c index d0c3880d7f80..493e730c685b 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7017.c @@ -170,13 +170,13 @@ static bool ch7017_read(struct intel_dvo_device *dvo, u8 addr, u8 *val) { struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = &addr, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = val, @@ -189,7 +189,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 addr, u8 val) { u8 buf[2] = { addr, val }; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = buf, @@ -197,7 +197,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 addr, u8 val) return i2c_transfer(dvo->i2c_bus, &msg, 1) == 1; } -/** Probes for a CH7017 on the given bus and slave address. */ +/** Probes for a CH7017 on the given bus and target address. */ static bool ch7017_init(struct intel_dvo_device *dvo, struct i2c_adapter *adapter) { @@ -227,13 +227,13 @@ static bool ch7017_init(struct intel_dvo_device *dvo, break; default: DRM_DEBUG_KMS("ch701x not detected, got %d: from %s " - "slave %d.\n", - val, adapter->name, dvo->slave_addr); + "target %d.\n", + val, adapter->name, dvo->target_addr); goto fail; } DRM_DEBUG_KMS("%s detected on %s, addr %d\n", - str, adapter->name, dvo->slave_addr); + str, adapter->name, dvo->target_addr); return true; fail: diff --git a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c index 2e8e85da5a40..534b8544e0a4 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c @@ -153,13 +153,13 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -176,7 +176,7 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!ch7xxx->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -188,7 +188,7 @@ static bool ch7xxx_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -202,7 +202,7 @@ static bool ch7xxx_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!ch7xxx->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -229,8 +229,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, name = ch7xxx_get_id(vendor); if (!name) { - DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s slave %d.\n", - vendor, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s target %d.\n", + vendor, adapter->name, dvo->target_addr); goto out; } @@ -240,8 +240,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, devid = ch7xxx_get_did(device); if (!devid) { - DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s slave %d.\n", - device, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s target %d.\n", + device, adapter->name, dvo->target_addr); goto out; } diff --git a/drivers/gpu/drm/i915/display/dvo_ivch.c b/drivers/gpu/drm/i915/display/dvo_ivch.c index eef72bb3b767..0d5cce6051b1 100644 --- a/drivers/gpu/drm/i915/display/dvo_ivch.c +++ b/drivers/gpu/drm/i915/display/dvo_ivch.c @@ -198,7 +198,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 0, }, @@ -209,7 +209,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD | I2C_M_NOSTART, .len = 2, .buf = in_buf, @@ -226,7 +226,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) if (!priv->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from " "%s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -238,7 +238,7 @@ static bool ivch_write(struct intel_dvo_device *dvo, int addr, u16 data) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[3]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 3, .buf = out_buf, @@ -253,13 +253,13 @@ static bool ivch_write(struct intel_dvo_device *dvo, int addr, u16 data) if (!priv->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } -/* Probes the given bus and slave address for an ivch */ +/* Probes the given bus and target address for an ivch */ static bool ivch_init(struct intel_dvo_device *dvo, struct i2c_adapter *adapter) { @@ -283,10 +283,10 @@ static bool ivch_init(struct intel_dvo_device *dvo, * very unique, check that the value in the base address field matches * the address it's responding on. */ - if ((temp & VR00_BASE_ADDRESS_MASK) != dvo->slave_addr) { + if ((temp & VR00_BASE_ADDRESS_MASK) != dvo->target_addr) { DRM_DEBUG_KMS("ivch detect failed due to address mismatch " "(%d vs %d)\n", - (temp & VR00_BASE_ADDRESS_MASK), dvo->slave_addr); + (temp & VR00_BASE_ADDRESS_MASK), dvo->target_addr); goto out; } diff --git a/drivers/gpu/drm/i915/display/dvo_ns2501.c b/drivers/gpu/drm/i915/display/dvo_ns2501.c index 21486008dae9..9d47f8a93e94 100644 --- a/drivers/gpu/drm/i915/display/dvo_ns2501.c +++ b/drivers/gpu/drm/i915/display/dvo_ns2501.c @@ -398,13 +398,13 @@ static bool ns2501_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -422,7 +422,7 @@ static bool ns2501_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!ns->quiet) { DRM_DEBUG_KMS ("Unable to read register 0x%02x from %s:0x%02x.\n", addr, - adapter->name, dvo->slave_addr); + adapter->name, dvo->target_addr); } return false; @@ -441,7 +441,7 @@ static bool ns2501_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -456,7 +456,7 @@ static bool ns2501_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!ns->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -487,8 +487,8 @@ static bool ns2501_init(struct intel_dvo_device *dvo, goto out; if (ch != (NS2501_VID & 0xff)) { - DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } @@ -496,8 +496,8 @@ static bool ns2501_init(struct intel_dvo_device *dvo, goto out; if (ch != (NS2501_DID & 0xff)) { - DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } ns->quiet = false; diff --git a/drivers/gpu/drm/i915/display/dvo_sil164.c b/drivers/gpu/drm/i915/display/dvo_sil164.c index 6c461024c8e3..a8dd40c00997 100644 --- a/drivers/gpu/drm/i915/display/dvo_sil164.c +++ b/drivers/gpu/drm/i915/display/dvo_sil164.c @@ -79,13 +79,13 @@ static bool sil164_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -102,7 +102,7 @@ static bool sil164_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!sil->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -113,7 +113,7 @@ static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -127,7 +127,7 @@ static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!sil->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -153,8 +153,8 @@ static bool sil164_init(struct intel_dvo_device *dvo, goto out; if (ch != (SIL164_VID & 0xff)) { - DRM_DEBUG_KMS("sil164 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("sil164 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } @@ -162,8 +162,8 @@ static bool sil164_init(struct intel_dvo_device *dvo, goto out; if (ch != (SIL164_DID & 0xff)) { - DRM_DEBUG_KMS("sil164 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("sil164 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } sil->quiet = false; diff --git a/drivers/gpu/drm/i915/display/dvo_tfp410.c b/drivers/gpu/drm/i915/display/dvo_tfp410.c index 0939e097f4f9..d9a0cd753a87 100644 --- a/drivers/gpu/drm/i915/display/dvo_tfp410.c +++ b/drivers/gpu/drm/i915/display/dvo_tfp410.c @@ -100,13 +100,13 @@ static bool tfp410_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -123,7 +123,7 @@ static bool tfp410_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!tfp->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -134,7 +134,7 @@ static bool tfp410_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -148,7 +148,7 @@ static bool tfp410_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!tfp->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -183,15 +183,15 @@ static bool tfp410_init(struct intel_dvo_device *dvo, if ((id = tfp410_getid(dvo, TFP410_VID_LO)) != TFP410_VID) { DRM_DEBUG_KMS("tfp410 not detected got VID %X: from %s " - "Slave %d.\n", - id, adapter->name, dvo->slave_addr); + "Target %d.\n", + id, adapter->name, dvo->target_addr); goto out; } if ((id = tfp410_getid(dvo, TFP410_DID_LO)) != TFP410_DID) { DRM_DEBUG_KMS("tfp410 not detected got DID %X: from %s " - "Slave %d.\n", - id, adapter->name, dvo->slave_addr); + "Target %d.\n", + id, adapter->name, dvo->target_addr); goto out; } tfp->quiet = false; diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ec1e3a380360..02443462bec3 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -72,8 +72,8 @@ struct intel_bios_encoder_data { struct list_head node; }; -#define SLAVE_ADDR1 0x70 -#define SLAVE_ADDR2 0x72 +#define TARGET_ADDR1 0x70 +#define TARGET_ADDR2 0x72 /* Get BDB block size given a pointer to Block ID. */ static u32 _get_blocksize(const u8 *block_base) @@ -1227,10 +1227,10 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) const struct child_device_config *child = &devdata->child; struct sdvo_device_mapping *mapping; - if (child->slave_addr != SLAVE_ADDR1 && - child->slave_addr != SLAVE_ADDR2) { + if (child->target_addr != TARGET_ADDR1 && + child->target_addr != TARGET_ADDR2) { /* - * If the slave address is neither 0x70 nor 0x72, + * If the target address is neither 0x70 nor 0x72, * it is not a SDVO device. Skip it. */ continue; @@ -1243,22 +1243,22 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) continue; } drm_dbg_kms(&i915->drm, - "the SDVO device with slave addr %2x is found on" + "the SDVO device with target addr %2x is found on" " %s port\n", - child->slave_addr, + child->target_addr, (child->dvo_port == DEVICE_PORT_DVOB) ? "SDVOB" : "SDVOC"); mapping = &i915->display.vbt.sdvo_mappings[child->dvo_port - 1]; if (!mapping->initialized) { mapping->dvo_port = child->dvo_port; - mapping->slave_addr = child->slave_addr; + mapping->target_addr = child->target_addr; mapping->dvo_wiring = child->dvo_wiring; mapping->ddc_pin = child->ddc_pin; mapping->i2c_pin = child->i2c_pin; mapping->initialized = 1; drm_dbg_kms(&i915->drm, "SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d\n", - mapping->dvo_port, mapping->slave_addr, + mapping->dvo_port, mapping->target_addr, mapping->dvo_wiring, mapping->ddc_pin, mapping->i2c_pin); } else { @@ -1266,11 +1266,11 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) "Maybe one SDVO port is shared by " "two SDVO device.\n"); } - if (child->slave2_addr) { + if (child->target2_addr) { /* Maybe this is a SDVO device with multiple inputs */ /* And the mapping info is not added */ drm_dbg_kms(&i915->drm, - "there exists the slave2_addr. Maybe this" + "there exists the target2_addr. Maybe this" " is a SDVO device with multiple inputs.\n"); } count++; diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 7715fc329057..0a711114ff2b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -237,7 +237,7 @@ struct intel_vbt_data { struct sdvo_device_mapping { u8 initialized; u8 dvo_port; - u8 slave_addr; + u8 target_addr; u8 dvo_wiring; u8 i2c_pin; u8 ddc_pin; diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index e99c94edfaae..e8ba4ccd99d3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -66,7 +66,7 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; - /* i2c bus associated with the slave device */ + /* i2c bus associated with the target device */ int i2c_bus_num; /* diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 072ef1d62bda..d8951464bd2b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -56,7 +56,7 @@ #define MIPI_PORT_SHIFT 3 struct i2c_adapter_lookup { - u16 slave_addr; + u16 target_addr; struct intel_dsi *intel_dsi; acpi_handle dev_handle; }; @@ -443,7 +443,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) if (!i2c_acpi_get_i2c_resource(ares, &sb)) return 1; - if (lookup->slave_addr != sb->slave_address) + if (lookup->target_addr != sb->slave_address) return 1; status = acpi_get_handle(lookup->dev_handle, @@ -460,12 +460,12 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) } static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, - const u16 slave_addr) + const u16 target_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct acpi_device *adev = ACPI_COMPANION(drm_dev->dev); struct i2c_adapter_lookup lookup = { - .slave_addr = slave_addr, + .target_addr = target_addr, .intel_dsi = intel_dsi, .dev_handle = acpi_device_handle(adev), }; @@ -476,7 +476,7 @@ static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, } #else static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, - const u16 slave_addr) + const u16 target_addr) { } #endif @@ -488,17 +488,17 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); - u16 slave_addr = *(u16 *)(data + 3); + u16 target_addr = *(u16 *)(data + 3); u8 reg_offset = *(data + 5); u8 payload_size = *(data + 6); u8 *payload_data; - drm_dbg_kms(&i915->drm, "bus %d client-addr 0x%02x reg 0x%02x data %*ph\n", - vbt_i2c_bus_num, slave_addr, reg_offset, payload_size, data + 7); + drm_dbg_kms(&i915->drm, "bus %d target-addr 0x%02x reg 0x%02x data %*ph\n", + vbt_i2c_bus_num, target_addr, reg_offset, payload_size, data + 7); if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - i2c_acpi_find_adapter(intel_dsi, slave_addr); + i2c_acpi_find_adapter(intel_dsi, target_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); @@ -514,7 +514,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) payload_data[0] = reg_offset; memcpy(&payload_data[1], (data + 7), payload_size); - msg.addr = slave_addr; + msg.addr = target_addr; msg.flags = 0; msg.len = payload_size + 1; msg.buf = payload_data; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 091824334f26..12e7628cbecf 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -60,42 +60,42 @@ static const struct intel_dvo_device intel_dvo_devices[] = { .type = INTEL_DVO_CHIP_TMDS, .name = "sil164", .port = PORT_C, - .slave_addr = SIL164_ADDR, + .target_addr = SIL164_ADDR, .dev_ops = &sil164_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "ch7xxx", .port = PORT_C, - .slave_addr = CH7xxx_ADDR, + .target_addr = CH7xxx_ADDR, .dev_ops = &ch7xxx_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "ch7xxx", .port = PORT_C, - .slave_addr = 0x75, /* For some ch7010 */ + .target_addr = 0x75, /* For some ch7010 */ .dev_ops = &ch7xxx_ops, }, { .type = INTEL_DVO_CHIP_LVDS, .name = "ivch", .port = PORT_A, - .slave_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */ + .target_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */ .dev_ops = &ivch_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "tfp410", .port = PORT_C, - .slave_addr = TFP410_ADDR, + .target_addr = TFP410_ADDR, .dev_ops = &tfp410_ops, }, { .type = INTEL_DVO_CHIP_LVDS, .name = "ch7017", .port = PORT_C, - .slave_addr = 0x75, + .target_addr = 0x75, .gpio = GMBUS_PIN_DPB, .dev_ops = &ch7017_ops, }, @@ -103,7 +103,7 @@ static const struct intel_dvo_device intel_dvo_devices[] = { .type = INTEL_DVO_CHIP_LVDS_NO_FIXED, .name = "ns2501", .port = PORT_B, - .slave_addr = NS2501_ADDR, + .target_addr = NS2501_ADDR, .dev_ops = &ns2501_ops, }, }; diff --git a/drivers/gpu/drm/i915/display/intel_dvo_dev.h b/drivers/gpu/drm/i915/display/intel_dvo_dev.h index af7b04539b93..4bf476656b8c 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo_dev.h +++ b/drivers/gpu/drm/i915/display/intel_dvo_dev.h @@ -38,7 +38,7 @@ struct intel_dvo_device { enum port port; /* GPIO register used for i2c bus to control this device */ u32 gpio; - int slave_addr; + int target_addr; const struct intel_dvo_dev_ops *dev_ops; void *dev_priv; diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 9c8e1e91ff1c..6470f75106bd 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -478,7 +478,7 @@ gmbus_xfer_read_chunk(struct drm_i915_private *i915, /* * HW spec says that 512Bytes in Burst read need special treatment. * But it doesn't talk about other multiple of 256Bytes. And couldn't locate - * an I2C slave, which supports such a lengthy burst read too for experiments. + * an I2C target, which supports such a lengthy burst read too for experiments. * * So until things get clarified on HW support, to avoid the burst read length * in fold of 256Bytes except 512, max burst read length is fixed at 767Bytes. @@ -701,7 +701,7 @@ clear_err: /* Toggle the Software Clear Interrupt bit. This has the effect * of resetting the GMBUS controller and so clearing the - * BUS_ERROR raised by the slave's NAK. + * BUS_ERROR raised by the target's NAK. */ intel_de_write_fw(i915, GMBUS1(i915), GMBUS_SW_CLR_INT); intel_de_write_fw(i915, GMBUS1(i915), 0); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index d0d712405129..4cfa27ca8c22 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -95,7 +95,7 @@ struct intel_sdvo { struct intel_encoder base; struct i2c_adapter *i2c; - u8 slave_addr; + u8 target_addr; struct intel_sdvo_ddc ddc[3]; @@ -255,13 +255,13 @@ static bool intel_sdvo_read_byte(struct intel_sdvo *intel_sdvo, u8 addr, u8 *ch) struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); struct i2c_msg msgs[] = { { - .addr = intel_sdvo->slave_addr, + .addr = intel_sdvo->target_addr, .flags = 0, .len = 1, .buf = &addr, }, { - .addr = intel_sdvo->slave_addr, + .addr = intel_sdvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = ch, @@ -483,14 +483,14 @@ static bool __intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, intel_sdvo_debug_write(intel_sdvo, cmd, args, args_len); for (i = 0; i < args_len; i++) { - msgs[i].addr = intel_sdvo->slave_addr; + msgs[i].addr = intel_sdvo->target_addr; msgs[i].flags = 0; msgs[i].len = 2; msgs[i].buf = buf + 2 *i; buf[2*i + 0] = SDVO_I2C_ARG_0 - i; buf[2*i + 1] = ((u8*)args)[i]; } - msgs[i].addr = intel_sdvo->slave_addr; + msgs[i].addr = intel_sdvo->target_addr; msgs[i].flags = 0; msgs[i].len = 2; msgs[i].buf = buf + 2*i; @@ -499,12 +499,12 @@ static bool __intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, /* the following two are to read the response */ status = SDVO_I2C_CMD_STATUS; - msgs[i+1].addr = intel_sdvo->slave_addr; + msgs[i+1].addr = intel_sdvo->target_addr; msgs[i+1].flags = 0; msgs[i+1].len = 1; msgs[i+1].buf = &status; - msgs[i+2].addr = intel_sdvo->slave_addr; + msgs[i+2].addr = intel_sdvo->target_addr; msgs[i+2].flags = I2C_M_RD; msgs[i+2].len = 1; msgs[i+2].buf = &status; @@ -2652,9 +2652,9 @@ intel_sdvo_select_i2c_bus(struct intel_sdvo *sdvo) else pin = GMBUS_PIN_DPB; - drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] I2C pin %d, slave addr 0x%x\n", + drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] I2C pin %d, target addr 0x%x\n", sdvo->base.base.base.id, sdvo->base.base.name, - pin, sdvo->slave_addr); + pin, sdvo->target_addr); sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin); @@ -2680,7 +2680,7 @@ intel_sdvo_is_hdmi_connector(struct intel_sdvo *intel_sdvo) } static u8 -intel_sdvo_get_slave_addr(struct intel_sdvo *sdvo) +intel_sdvo_get_target_addr(struct intel_sdvo *sdvo) { struct drm_i915_private *dev_priv = to_i915(sdvo->base.base.dev); const struct sdvo_device_mapping *my_mapping, *other_mapping; @@ -2694,15 +2694,15 @@ intel_sdvo_get_slave_addr(struct intel_sdvo *sdvo) } /* If the BIOS described our SDVO device, take advantage of it. */ - if (my_mapping->slave_addr) - return my_mapping->slave_addr; + if (my_mapping->target_addr) + return my_mapping->target_addr; /* * If the BIOS only described a different SDVO device, use the * address that it isn't using. */ - if (other_mapping->slave_addr) { - if (other_mapping->slave_addr == 0x70) + if (other_mapping->target_addr) { + if (other_mapping->target_addr == 0x70) return 0x72; else return 0x70; @@ -3405,7 +3405,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, "SDVO %c", port_name(port)); intel_sdvo->sdvo_reg = sdvo_reg; - intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(intel_sdvo) >> 1; + intel_sdvo->target_addr = intel_sdvo_get_target_addr(intel_sdvo) >> 1; intel_sdvo_select_i2c_bus(intel_sdvo); diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 1af8407e2081..e613288937e4 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -493,7 +493,7 @@ struct child_device_config { u16 addin_offset; u8 dvo_port; /* See DEVICE_PORT_* and DVO_PORT_* above */ u8 i2c_pin; - u8 slave_addr; + u8 target_addr; u8 ddc_pin; u16 edid_ptr; u8 dvo_cfg; /* See DEVICE_CFG_* above */ @@ -502,7 +502,7 @@ struct child_device_config { struct { u8 dvo2_port; u8 i2c2_pin; - u8 slave2_addr; + u8 target2_addr; u8 ddc2_pin; } __packed; struct { diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index af9afdb53c7f..c022dc736045 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -42,8 +42,8 @@ #define GMBUS1_TOTAL_BYTES_MASK 0x1ff #define gmbus1_total_byte_count(v) (((v) >> \ GMBUS1_TOTAL_BYTES_SHIFT) & GMBUS1_TOTAL_BYTES_MASK) -#define gmbus1_slave_addr(v) (((v) & 0xff) >> 1) -#define gmbus1_slave_index(v) (((v) >> 8) & 0xff) +#define gmbus1_target_addr(v) (((v) & 0xff) >> 1) +#define gmbus1_target_index(v) (((v) >> 8) & 0xff) #define gmbus1_bus_cycle(v) (((v) >> 25) & 0x7) /* GMBUS0 bits definitions */ @@ -54,7 +54,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) struct intel_vgpu_i2c_edid *edid = &vgpu->display.i2c_edid; unsigned char chr = 0; - if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) { + if (edid->state == I2C_NOT_SPECIFIED || !edid->target_selected) { gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n"); return 0; } @@ -179,7 +179,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct intel_vgpu_i2c_edid *i2c_edid = &vgpu->display.i2c_edid; - u32 slave_addr; + u32 target_addr; u32 wvalue = *(u32 *)p_data; if (vgpu_vreg(vgpu, offset) & GMBUS_SW_CLR_INT) { @@ -210,21 +210,21 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, i2c_edid->gmbus.total_byte_count = gmbus1_total_byte_count(wvalue); - slave_addr = gmbus1_slave_addr(wvalue); + target_addr = gmbus1_target_addr(wvalue); /* vgpu gmbus only support EDID */ - if (slave_addr == EDID_ADDR) { - i2c_edid->slave_selected = true; - } else if (slave_addr != 0) { + if (target_addr == EDID_ADDR) { + i2c_edid->target_selected = true; + } else if (target_addr != 0) { gvt_dbg_dpy( - "vgpu%d: unsupported gmbus slave addr(0x%x)\n" + "vgpu%d: unsupported gmbus target addr(0x%x)\n" " gmbus operations will be ignored.\n", - vgpu->id, slave_addr); + vgpu->id, target_addr); } if (wvalue & GMBUS_CYCLE_INDEX) i2c_edid->current_edid_read = - gmbus1_slave_index(wvalue); + gmbus1_target_index(wvalue); i2c_edid->gmbus.cycle_type = gmbus1_bus_cycle(wvalue); switch (gmbus1_bus_cycle(wvalue)) { @@ -523,7 +523,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, } else if (addr == EDID_ADDR) { i2c_edid->state = I2C_AUX_CH; i2c_edid->port = port_idx; - i2c_edid->slave_selected = true; + i2c_edid->target_selected = true; if (intel_vgpu_has_monitor_on_port(vgpu, port_idx) && intel_vgpu_port_is_dp(vgpu, port_idx)) @@ -542,7 +542,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, return; if (drm_WARN_ON(&i915->drm, msg_length != 4)) return; - if (i2c_edid->edid_available && i2c_edid->slave_selected) { + if (i2c_edid->edid_available && i2c_edid->target_selected) { unsigned char val = edid_get_byte(vgpu); aux_data_for_write = (val << 16); @@ -571,7 +571,7 @@ void intel_vgpu_init_i2c_edid(struct intel_vgpu *vgpu) edid->state = I2C_NOT_SPECIFIED; edid->port = -1; - edid->slave_selected = false; + edid->target_selected = false; edid->edid_available = false; edid->current_edid_read = 0; diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h index dfe0cbc6aad8..c3b5a55aecb3 100644 --- a/drivers/gpu/drm/i915/gvt/edid.h +++ b/drivers/gpu/drm/i915/gvt/edid.h @@ -80,7 +80,7 @@ enum gmbus_cycle_type { * R/W Protect * Command and Status. * bit0 is the direction bit: 1 is read; 0 is write. - * bit1 - bit7 is slave 7-bit address. + * bit1 - bit7 is target 7-bit address. * bit16 - bit24 total byte count (ignore?) * * GMBUS2: @@ -130,7 +130,7 @@ struct intel_vgpu_i2c_edid { enum i2c_state state; unsigned int port; - bool slave_selected; + bool target_selected; bool edid_available; unsigned int current_edid_read; diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d2bed466540a..908f910420c2 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -86,7 +86,7 @@ struct efp_child_device_config { u8 skip2; u8 dvo_port; u8 i2c_pin; /* for add-in card */ - u8 slave_addr; /* for add-in card */ + u8 target_addr; /* for add-in card */ u8 ddc_pin; u16 edid_ptr; u8 dvo_config; From 91da337e5d506f2c065d20529d105ca40090e320 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 19 Jul 2024 14:55:53 -0400 Subject: [PATCH 0040/1523] nfsd: don't set SVC_SOCK_ANONYMOUS when creating nfsd sockets When creating nfsd sockets via the netlink interface, we do want to register with the portmapper. Don't set SVC_SOCK_ANONYMOUS. Reported-by: Steve Dickson Fixes: 16a471177496 ("NFSD: add listener-{set,get} netlink command") Cc: Lorenzo Bianconi Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e0ea6fc2aa3..34eb2c2cbcde 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) continue; } - ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, - SVC_SOCK_ANONYMOUS, + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) From f763c3b543d80ebcb94dd19a69324bf2b72b23ab Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 16:17:46 +0530 Subject: [PATCH 0041/1523] drm/amdgpu: Add sdma_v5_2 ip dump for devcoredump Add ip dump for sdma_v5_2 for devcoredump for all instances of sdma. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 82 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index d3706a484870..087ce0f6fa07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -115,6 +115,7 @@ struct amdgpu_sdma { bool has_page_queue; struct ras_common_if *ras_if; struct amdgpu_sdma_ras *ras; + uint32_t *ip_dump; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index cc9e961f0078..0cc969689946 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -60,6 +60,55 @@ MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1214,6 +1263,8 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1245,6 +1296,13 @@ static int sdma_v5_2_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1258,6 +1316,8 @@ static int sdma_v5_2_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1662,6 +1722,27 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } +static void sdma_v5_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_2_get_reg_offset(adev, i, + sdma_reg_list_5_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1678,6 +1759,7 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_clockgating_state = sdma_v5_2_set_clockgating_state, .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, + .dump_ip_state = sdma_v5_2_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { From 08bed7e4ff24f90e200defcd2c23e70b0a3cd710 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 16:44:36 +0530 Subject: [PATCH 0042/1523] drm/amdgpu: add print support for sdma_v_5_2 ip_dump Add support for ip dump for sdma_v_5_2 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 0cc969689946..630b03f2ce3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1722,6 +1722,26 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } +static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} static void sdma_v5_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1760,6 +1780,7 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, .dump_ip_state = sdma_v5_2_dump_ip_state, + .print_ip_state = sdma_v5_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { From 43796955a89572c63f7f96e271a2849c27d18f2d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:50:49 +0530 Subject: [PATCH 0043/1523] drm/amdgpu: fix the extra space between two functions fix extra line space between two functions. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 630b03f2ce3d..66bb85955fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1742,6 +1742,7 @@ static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) adev->sdma.ip_dump[instance_offset + j]); } } + static void sdma_v5_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; From 00bb3223bf7cfed3c9c714e994cbd454cc3e6b73 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Jul 2024 17:55:33 +0530 Subject: [PATCH 0044/1523] drm/amdgpu: fix the print message in devcoredump Fix the memory type logged for gtt memory size which is wrongly logged as visible vram size. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f0a44d0dec27..f6806ae1c061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -236,7 +236,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "\nSOC Memory Information\n"); drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size); drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size); - drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); + drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); /* GDS Config */ drm_printf(&p, "\nGDS Config\n"); From 1eba165aa40c79f65f487678c8ea8e77b1c6a5a4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:22:56 +0530 Subject: [PATCH 0045/1523] drm/amdgpu: Add sdma_v6_0 ip dump for devcoredump Add ip dump for sdma_v6_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index dab4c2db8c9d..102de209f120 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,63 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1239,6 +1296,8 @@ static int sdma_v6_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1274,6 +1333,13 @@ static int sdma_v6_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1287,6 +1353,8 @@ static int sdma_v6_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1488,6 +1556,27 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v6_0_get_reg_offset(adev, i, + sdma_reg_list_6_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .name = "sdma_v6_0", .early_init = sdma_v6_0_early_init, @@ -1505,6 +1594,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, + .dump_ip_state = sdma_v6_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { From ccb54d7d91d256485cfe5403a12abb0175ce4539 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 15:43:44 +0530 Subject: [PATCH 0046/1523] drm/amdgpu: add print support for sdma_v_6_0 ip_dump Add print support for ip dump for sdma_v_6_0 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 102de209f120..208a1fa9d4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1556,6 +1556,27 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v6_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1595,6 +1616,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, .dump_ip_state = sdma_v6_0_dump_ip_state, + .print_ip_state = sdma_v6_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { From 0f1a93704a5cf53ce819a7c544125442666d61ce Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 16:10:26 +0530 Subject: [PATCH 0047/1523] drm/amdgpu: Add sdma_v5_0 ip dump for devcoredump Add ip dump for sdma_v5_0 for devcoredump for all instances of sdma. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 82 ++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b7d33d78bce0..cb324a90b310 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -59,6 +59,55 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1341,6 +1390,8 @@ static int sdma_v5_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, @@ -1378,6 +1429,13 @@ static int sdma_v5_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1391,6 +1449,8 @@ static int sdma_v5_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1718,6 +1778,27 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_0_get_reg_offset(adev, i, + sdma_reg_list_5_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, @@ -1734,6 +1815,7 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_clockgating_state = sdma_v5_0_set_clockgating_state, .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, + .dump_ip_state = sdma_v5_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { From e84f798a93881062cd14ce316a68068edd50bfb4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Jul 2024 16:15:57 +0530 Subject: [PATCH 0048/1523] drm/amdgpu: add print support for sdma_v_5_0 ip_dump Add support for ip dump for sdma_v_5_0 in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index cb324a90b310..d5f0dc132a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1778,6 +1778,27 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v5_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1816,6 +1837,7 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, .dump_ip_state = sdma_v5_0_dump_ip_state, + .print_ip_state = sdma_v5_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { From b68417613d4134b9e39fff95e72ca726268b47db Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 26 Jun 2024 16:14:24 +0800 Subject: [PATCH 0049/1523] drm/amd/display: Disable replay if VRR capability is false [Why] The VRR need to be supported for panel replay feature. If VRR capability is false, panel replay capability also need to be disabled. [How] After update the vrr capability, the panel replay capability also need to be check if need. Reviewed-by: Wayne Lin Signed-off-by: Jerry Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..ea1e2d8dcd8c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12145,6 +12145,12 @@ update: if (dm_con_state) dm_con_state->freesync_capable = freesync_capable; + if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable && + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) { + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false; + amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false; + } + if (connector->vrr_capable_property) drm_connector_set_vrr_capable_property(connector, freesync_capable); From 4ccc8fdcca670edd76d8bfd6389f04c448cff6f6 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 12 Sep 2023 14:51:47 -0400 Subject: [PATCH 0050/1523] drm/amd/display: Disable HBR audio for DP2 for certain ASICs [Description] Due to a HW bug, HBR audio is not supported for DP2 encoders for certain ASICs. Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dce/dce_audio.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 +++++ drivers/gpu/drm/amd/display/dc/inc/hw/audio.h | 2 ++ .../gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c | 1 + .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 1 + .../drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 1 + .../gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 9 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 73cdebcd9f37..4c9bb913125d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -466,6 +466,7 @@ struct dc_config { bool use_assr_psp_message; bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; + bool disable_hbr_audio_dp2; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index cf5f84fb9c69..eeed840073fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -630,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio) audio->inst, value); } +void dce_aud_az_disable_hbr_audio(struct audio *audio) +{ + set_high_bit_rate_capable(audio, false); +} + void dce_aud_az_disable(struct audio *audio) { uint32_t value; @@ -1293,6 +1298,7 @@ static const struct audio_funcs funcs = { .az_enable = dce_aud_az_enable, .az_disable = dce_aud_az_disable, .az_configure = dce_aud_az_configure, + .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio, .destroy = dce_aud_destroy, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 539f881928d1..1b7b8b079af4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -166,6 +166,7 @@ void dce_aud_hw_init(struct audio *audio); void dce_aud_az_enable(struct audio *audio); void dce_aud_az_disable(struct audio *audio); +void dce_aud_az_disable_hbr_audio(struct audio *audio); void dce_aud_az_configure(struct audio *audio, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 1f2eb2f727dc..51c5195f8325 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1597,6 +1597,11 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( &audio_output.crtc_info, &pipe_ctx->stream->audio_info, &audio_output.dp_link_info); + + if (dc->config.disable_hbr_audio_dp2) + if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && + dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) + pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio(pipe_ctx->stream_res.audio); } /* make sure no pipes syncd to the pipe being enabled */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h index b6203253111c..8c18efc2aa70 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h @@ -46,6 +46,8 @@ struct audio_funcs { const struct audio_info *audio_info, const struct audio_dp_link_info *dp_link_info); + void (*az_disable_hbr_audio)(struct audio *audio); + void (*wall_dto_setup)(struct audio *audio, enum signal_type signal, const struct audio_crtc_info *crtc_info, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 5d1801dce273..ac8cb20e2e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1948,6 +1948,7 @@ static bool dcn31_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 969658313fd6..3ed6d1fa0c44 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2220,6 +2220,7 @@ static bool dcn32_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..a414ed60a724 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1780,6 +1780,7 @@ static bool dcn321_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index ddf251901fb3..5f3705f97bd7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1899,6 +1899,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { From 5f30ee493044e9ea3a46167e5597a96f5c302adb Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 20 Jun 2024 15:42:45 -0400 Subject: [PATCH 0051/1523] drm/amd/display: quality improvements for EASF and ISHARP [Why] Update coefficients and LUT tables for scaler and sharpener to improve quality and support different use cases (SDR/HDR) [How] Move scaler coefficients to new file dc_spl_scl_easf_filters.c Remove older coefficients file dc_sp_scl_filters_old.c Update default taps for EASF support Update LLS policy for DON'T CARE case Update cositing offset from 0.5 to 0.25 Add support to adjust sharpness based on level, use case, and scaling ratio ( using discrete levels ) Apply sharpness to all RGB surfaces and both NV12 and P010 video ( in fullscreen only ). Upscale and 1:1 ratios only Enable scaler when sharpening 1:1 ratios Add support for coefficients that are in S1.10 format (convert to S1.12 format) Reviewed-by: Jun Lei Signed-off-by: Jerry Zuo Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 12 + drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 7 - .../gpu/drm/amd/display/dc/dc_spl_translate.c | 5 + .../gpu/drm/amd/display/dc/dc_spl_translate.h | 1 + drivers/gpu/drm/amd/display/dc/dm_helpers.h | 3 + .../dc/dml2/dml21/dml21_translation_helper.c | 8 + .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 618 +++--- .../dc/resource/dcn401/dcn401_resource.c | 7 + drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 958 +++++---- .../drm/amd/display/dc/spl/dc_spl_filters.c | 15 + .../drm/amd/display/dc/spl/dc_spl_filters.h | 15 + .../display/dc/spl/dc_spl_isharp_filters.c | 427 +++- .../display/dc/spl/dc_spl_isharp_filters.h | 33 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 1725 +++++++++++++++++ .../display/dc/spl/dc_spl_scl_easf_filters.h | 38 + .../amd/display/dc/spl/dc_spl_scl_filters.c | 26 + .../amd/display/dc/spl/dc_spl_scl_filters.h | 39 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 12 + 19 files changed, 3201 insertions(+), 750 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b490ae67b6be..165e010fe69c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1286,3 +1286,15 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link) return as_type; } + +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} + +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 959ae0df1e56..c10567ec1c81 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -763,13 +763,6 @@ enum scanning_type { SCANNING_TYPE_UNDEFINED }; -enum chroma_cositing { - CHROMA_COSITING_NONE, - CHROMA_COSITING_LEFT, - CHROMA_COSITING_TOPLEFT, - CHROMA_COSITING_COUNT -}; - struct dc_crtc_timing_flags { uint32_t INTERLACE :1; uint32_t HSYNC_POSITIVE_POLARITY :1; /* when set to 1, diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 582606319764..49ff59258c8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -170,6 +170,11 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + /* Check if it is stream is in fullscreen and if its HDR. + * Use this to determine sharpness levels + */ + spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); + spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index c73d640c3632..eaa5c5373b28 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,6 +6,7 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" +#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 34adae7ab6e8..2e4a46f1b499 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -210,4 +210,7 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link); enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid); +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream); +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream); + #endif /* __DM_HELPERS__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 06387b8b0aee..a50fe3ec79c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,6 +788,14 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 505929800426..27cbda1cf8cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -655,6 +655,226 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height); } +/** + * dpp401_dscl_program_easf_v - Program EASF_V + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program vertical EASF registers + * + */ +static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_EASF_V_MODE */ + REG_SET_3(DSCL_EASF_V_MODE, 0, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, + SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); + + if (!scl_data->dscl_prog_data.easf_v_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_V_BF_CNTL */ + REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, + SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); + /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, + SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, + SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); + /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); + /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, + SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); + /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, + SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); + /* DSCL_EASF_V_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, + SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, + SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, + SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, + SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, + SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, + SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, + SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); + REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, + SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); + /* DSCL_EASF_V_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, + SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, + SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, + SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, + SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, + SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); + REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, + SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf_h - Program EASF_H + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program horizontal EASF registers + * + */ +static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_EASF_H_MODE */ + REG_SET_3(DSCL_EASF_H_MODE, 0, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, + SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); + + if (!scl_data->dscl_prog_data.easf_h_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_H_BF_CNTL */ + REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, + SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); + /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); + /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, + SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); + /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, + SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); + /* DSCL_EASF_H_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, + SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, + SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, + SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, + SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, + SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, + SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, + SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); + REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, + SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); + /* DSCL_EASF_H_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, + SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, + SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, + SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, + SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, + SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); + REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, + SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); + PERF_TRACE(); +} /** * dpp401_dscl_program_easf - Program EASF * @@ -669,261 +889,19 @@ static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_d struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); - REG_UPDATE(DSCL_SC_MODE, + /* DSCL_SC_MODE */ + REG_SET_2(DSCL_SC_MODE, 0, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); - /* DSCL_EASF_V_MODE */ - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); - /* DSCL_EASF_H_MODE */ - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_UPDATE(DSCL_SC_MATRIX_C0C1, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); - REG_UPDATE(DSCL_SC_MATRIX_C0C1, + REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, + REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); + dpp401_dscl_program_easf_v(dpp_base, scl_data); + dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -958,10 +936,11 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); + /* LUT data write is auto-indexed. Write index once */ + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -981,102 +960,67 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - /* ISHARP_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en); - /* ISHARP_NOISEDET_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); - /* ISHARP_NOISEDET_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + /* ISHARP_MODE */ + REG_SET_6(ISHARP_MODE, 0, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); + + /* Skip remaining register programming if ISHARP is disabled */ + if (!scl_data->dscl_prog_data.isharp_en) { + PERF_TRACE(); + return; + } + + /* ISHARP_NOISEDET_THRESHOLD */ + REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - /* ISHARP_NOISEDET_PWL_START_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); - /* ISHARP_NOISEDET_PWL_END_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); - /* ISHARP_NOISEDET_PWL_SLOPE */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + + /* ISHARP_NOISE_GAIN_PWL */ + REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - /* ISHARP_LBA_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); + /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, + REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, + REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, + REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, + REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, + REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, + REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); - /* ISHARP_FMT_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); - /* ISHARP_FMT_NORM */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - /* ISHARP_NLDELTA_SCLIP_EN_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); - /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); - /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); - /* ISHARP_NLDELTA_SCLIP_EN_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); - /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); - /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + + /* ISHARP_NLDELTA_SOFT_CLIP */ + REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..d3808c49d298 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,6 +76,9 @@ #include "dml2/dml2_wrapper.h" +#include "spl/dc_spl_scl_easf_filters.h" +#include "spl/dc_spl_isharp_filters.h" + #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -2123,6 +2126,10 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; + /* SPL */ + spl_init_easf_filter_coeffs(); + spl_init_blur_scale_coeffs(); + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 89cad60b1a10..af7eaf839970 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index e3e20cd86af6..dad38960d34d 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -5,6 +5,7 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" #include "dc_spl_isharp_filters.h" +#include "dc_spl_scl_easf_filters.h" #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 @@ -352,6 +353,7 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) memset(&spl_out->scl_data.recout, 0, sizeof(struct spl_rect)); } + /* Calculate scaling ratios */ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -399,7 +401,22 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * spl_out->scl_data.ratios.horz_c, 19); spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( spl_out->scl_data.ratios.vert_c, 19); + + /* + * Coefficient table and some registers are different based on ratio + * that is output/input. Currently we calculate input/output + * Store 1/ratio in recip_ratio for those lookups + */ + spl_out->scl_data.recip_ratios.horz = dc_fixpt_recip( + spl_out->scl_data.ratios.horz); + spl_out->scl_data.recip_ratios.vert = dc_fixpt_recip( + spl_out->scl_data.ratios.vert); + spl_out->scl_data.recip_ratios.horz_c = dc_fixpt_recip( + spl_out->scl_data.ratios.horz_c); + spl_out->scl_data.recip_ratios.vert_c = dc_fixpt_recip( + spl_out->scl_data.ratios.vert_c); } + /* Calculate Viewport size */ static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -417,6 +434,7 @@ static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *s swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } } + static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -440,6 +458,7 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } + /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -509,8 +528,8 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && - (format <= SPL_PIXEL_FORMAT_VIDEO_END)) + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) return true; return false; @@ -569,11 +588,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ case CHROMA_COSITING_LEFT: init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_v = dc_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 2); - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = dc_fixpt_from_fraction(sign, 4); + init_adj_v = dc_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: @@ -639,6 +658,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_out->scl_data.viewport_c.x += src.x / vpc_div; spl_out->scl_data.viewport_c.y += src.y / vpc_div; } + static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -665,6 +685,7 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } + static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -673,6 +694,7 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } + static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -681,17 +703,21 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } + static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, + bool enable_isharp, bool enable_easf) { const long long one = dc_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; + /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale) + && !spl_in->basic_out.always_scale + && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -700,65 +726,200 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; + + /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV + * downscale without EASF + */ + if ((!enable_isharp) && (!enable_easf)) { + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; + } return SCL_MODE_SCALING_420_YCBCR_ENABLE; } + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) +{ + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_BYPASS)) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; +} + +/* Enable EASF ?*/ +static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) +{ + int vratio = 0; + int hratio = 0; + bool skip_easf = false; + bool lls_enable_easf = true; + + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + + if (!lls_enable_easf || spl_in->disable_easf) + skip_easf = true; + + /* + * No EASF support for downscaling > 2:1 + * EASF support for upscaling or downscaling up to 2:1 + */ + if ((vratio > 2) || (hratio > 2)) + skip_easf = true; + + /* Check for linear scaling or EASF preferred */ + if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) + skip_easf = true; + + return skip_easf; +} + +/* Check if video is in fullscreen mode */ +static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_out) +{ + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + return true; + return false; +} + +static bool spl_get_isharp_en(struct spl_in *spl_in, + struct spl_out *spl_out) +{ + bool enable_isharp = false; + int vratio = 0; + int hratio = 0; + struct spl_taps taps = spl_out->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in, spl_out); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + + /* Return if adaptive sharpness is disabled */ + if (spl_in->adaptive_sharpness.enable == false) + return enable_isharp; + + /* No iSHARP support for downscaling */ + if (vratio > 1 || hratio > 1) + return enable_isharp; + + // Scaling is up to 1:1 (no scaling) or upscaling + + /* + * Apply sharpness to all RGB surfaces and to + * NV12/P010 surfaces if in fullscreen + */ + if (spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) + return enable_isharp; + + /* + * Apply sharpness if supports horizontal taps 4,6 AND + * vertical taps 3, 4, 6 + */ + if ((taps.h_taps == 4 || taps.h_taps == 6) && + (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) + enable_isharp = true; + + return enable_isharp; +} + /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, - const struct spl_taps *in_taps) + const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, + bool *enable_isharp) { int num_part_y, num_part_c; int max_taps_y, max_taps_c; int min_taps_y, min_taps_c; enum lb_memory_config lb_config; + bool skip_easf = false; if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && max_downscale_src_width != 0 && spl_out->scl_data.viewport.width > max_downscale_src_width) return false; + + /* Check if we are using EASF or not */ + skip_easf = enable_easf(spl_in, spl_out); + /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); - else - spl_out->scl_data.taps.h_taps = 4; - } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); - else - spl_out->scl_data.taps.v_taps = 4; - } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); - else - spl_out->scl_data.taps.v_taps_c = 4; - } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + if (skip_easf) { + if (in_taps->h_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) + spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil( + spl_out->scl_data.ratios.horz), 8); + else + spl_out->scl_data.taps.h_taps = 4; + } else + spl_out->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) + spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert, 2)), 8); + else + spl_out->scl_data.taps.v_taps = 4; + } else + spl_out->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) + spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert_c, 2)), 8); + else + spl_out->scl_data.taps.v_taps_c = 4; + } else + spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) + spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil( + spl_out->scl_data.ratios.horz_c), 8); + else + spl_out->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + } else { + if (spl_is_yuv420(spl_in->basic_in.format)) { + spl_out->scl_data.taps.h_taps = 6; + spl_out->scl_data.taps.v_taps = 6; spl_out->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; - else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_out->scl_data.taps.v_taps_c = 4; + } else { /* RGB */ + spl_out->scl_data.taps.h_taps = 6; + spl_out->scl_data.taps.v_taps = 6; + spl_out->scl_data.taps.h_taps_c = 6; + spl_out->scl_data.taps.v_taps_c = 6; + } + } /*Ensure we can support the requested number of vtaps*/ min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); @@ -794,43 +955,103 @@ static bool spl_get_optimal_number_of_taps( if (spl_out->scl_data.taps.v_taps_c > max_taps_c) spl_out->scl_data.taps.v_taps_c = max_taps_c; - if (spl_in->prefer_easf) { - // EASF can be enabled only for taps 3,4,6 - // If optimal no of taps is 5, then set it to 4 - // If optimal no of taps is 7 or 8, then set it to 6 + + if (!skip_easf) { + /* + * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 + * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV + * + * If LB does not support 3, 4, or 6 taps, then disable EASF_V + * and only enable EASF_H. So for RGB, support 6x2, 4x2 + * and for NL YUV420, support 6x2 for Y and 4x2 for UV + * + * All other cases, have to disable EASF_V and EASF_H + * + * If optimal no of taps is 5, then set it to 4 + * If optimal no of taps is 7 or 8, then fine since max tap is 6 + * + */ if (spl_out->scl_data.taps.v_taps == 5) spl_out->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) - spl_out->scl_data.taps.v_taps = 6; if (spl_out->scl_data.taps.v_taps_c == 5) spl_out->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) - spl_out->scl_data.taps.v_taps_c = 6; if (spl_out->scl_data.taps.h_taps == 5) spl_out->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) - spl_out->scl_data.taps.h_taps = 6; if (spl_out->scl_data.taps.h_taps_c == 5) spl_out->scl_data.taps.h_taps_c = 4; - if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) - spl_out->scl_data.taps.h_taps_c = 6; + if (spl_is_yuv420(spl_in->basic_in.format)) { + if ((spl_out->scl_data.taps.h_taps <= 4) || + (spl_out->scl_data.taps.h_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if ((spl_out->scl_data.taps.v_taps <= 3) || + (spl_out->scl_data.taps.v_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + ASSERT((spl_out->scl_data.taps.v_taps > 1) && + (spl_out->scl_data.taps.v_taps_c > 1)); + } else { /* RGB */ + if (spl_out->scl_data.taps.h_taps <= 3) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if (spl_out->scl_data.taps.v_taps < 3) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + ASSERT(spl_out->scl_data.taps.v_taps > 1); + } + } else { + *enable_easf_v = false; + *enable_easf_h = false; } // end of if prefer_easf - if (!spl_in->basic_out.always_scale) { - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) + + /* Sharpener requires scaler to be enabled, including for 1:1 + * Check if ISHARP can be enabled + * If ISHARP is not enabled, for 1:1, set taps to 1 and disable + * EASF + * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if + * EASF is not enabled + */ + + *enable_isharp = spl_get_isharp_en(spl_in, spl_out); + if (!*enable_isharp && !spl_in->basic_out.always_scale) { + if ((IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.vert))) { spl_out->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) spl_out->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) + spl_out->scl_data.taps.h_taps_c = 1; + + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) + spl_out->scl_data.taps.v_taps_c = 1; + + *enable_easf_v = false; + *enable_easf_h = false; + } else { + if ((!*enable_easf_h) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c))) + spl_out->scl_data.taps.h_taps_c = 1; + + if ((!*enable_easf_v) && + (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c))) + spl_out->scl_data.taps.v_taps_c = 1; + } } return true; } + static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { @@ -890,62 +1111,10 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } -static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - return NULL; - } -} -static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); -} - -static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if ((taps == 3) || (taps == 4) || (taps == 6)) - return spl_get_filter_isharp_bs_4tap_64p(); - else { - /* should never happen, bug */ - return NULL; - } -} -static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.h_taps); - dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.v_taps); -} /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out, + bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; @@ -953,6 +1122,8 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; + bool enable_easf = enable_easf_v || enable_easf_h; + // Set values for recout dscl_prog_data->recout = spl_out->scl_data.recout; // Set values for MPC Size @@ -960,7 +1131,8 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, + enable_easf); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -975,99 +1147,97 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set viewport_c dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data); + spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } -/* Enable EASF ?*/ -static bool enable_easf(int scale_ratio, int taps, - enum linear_light_scaling lls_pref, bool prefer_easf) -{ - // Is downscaling > 6:1 ? - if (scale_ratio > 6) { - // END - No EASF support for downscaling > 6:1 - return false; - } - // Is upscaling or downscaling up to 2:1? - if (scale_ratio <= 2) { - // Is linear scaling or EASF preferred? - if (lls_pref == LLS_PREF_YES || prefer_easf) { - // LB support taps 3, 4, 6 - if (taps == 3 || taps == 4 || taps == 6) { - // END - EASF supported - return true; - } - } - } - // END - EASF not supported - return false; -} -/* Set EASF data */ -static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, - bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format) -{ - if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - else - dscl_prog_data->easf_matrix_mode = 0; +/* Set EASF data */ +static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, + bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format, enum system_setup setup) +{ + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 1; + dscl_prog_data->easf_v_sharp_factor = 0; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) + spl_get_3tap_dntilt_uptilt_offset(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) + spl_get_3tap_uptilt_maxval(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) + spl_get_3tap_dntilt_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) + spl_get_3tap_uptilt1_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) + spl_get_3tap_uptilt2_slope(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) + spl_get_3tap_uptilt2_offset(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_out->scl_data.taps.v_taps, + spl_out->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1088,13 +1258,41 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1113,11 +1311,11 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } } else dscl_prog_data->easf_v_en = false; @@ -1125,52 +1323,63 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 1; + dscl_prog_data->easf_h_sharp_factor = 0; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_h_bf3_mode = - 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_out->scl_data.taps.h_taps, + spl_out->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1188,12 +1397,40 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1211,25 +1448,36 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient + if (setup == HDR_L) { + dscl_prog_data->easf_matrix_c0 = + 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) + dscl_prog_data->easf_matrix_c1 = + 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) + dscl_prog_data->easf_matrix_c2 = + 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } else { // SDR_L + dscl_prog_data->easf_matrix_c0 = + 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) + dscl_prog_data->easf_matrix_c1 = + 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) + dscl_prog_data->easf_matrix_c2 = + 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1241,27 +1489,43 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + + if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + /* + * 2-bit, BF3 chroma mode correction calculation mode + * Needs to be disabled for YUV420 mode + * Override lookup value + */ + dscl_prog_data->easf_v_bf3_mode = 0; + dscl_prog_data->easf_h_bf3_mode = 0; + } else + dscl_prog_data->easf_matrix_mode = 0; + } + /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (dscl_prog_data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE + if (data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; + else if (data->taps.v_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; + else if (data->taps.v_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, struct fixed31_32 ratio, + enum system_setup setup) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1270,10 +1534,12 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } dscl_prog_data->isharp_en = 1; // ISHARP_EN - dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (dscl_prog_data->taps.h_taps == 6) - spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE + if (data->taps.h_taps == 6) { + dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ + spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ + } else + dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1282,50 +1548,67 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ + if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - switch (adp_sharpness.sharpness) { - case SHARPNESS_LOW: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); - break; - case SHARPNESS_MID: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); - break; - case SHARPNESS_HIGH: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; - default: - BREAK_TO_DEBUGGER(); + if (setup == SDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 312; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1EC; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -20 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } + spl_build_isharp_1dlut_from_reference_curve(ratio, setup); + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut( + adp_sharpness.sharpness); + // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1346,59 +1629,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, // Set the values as per lookup table spl_set_blur_scale_data(dscl_prog_data, data); } -static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, - int vscale_ratio, int hscale_ratio, struct spl_taps taps, - enum spl_pixel_format format) -{ - bool enable_isharp = false; - - if (adp_sharpness.enable == false) - return enable_isharp; // Return if adaptive sharpness is disabled - // Is downscaling ? - if (vscale_ratio > 1 || hscale_ratio > 1) { - // END - No iSHARP support for downscaling - return enable_isharp; - } - // Scaling is up to 1:1 (no scaling) or upscaling - - /* Only apply sharpness to NV12 and not P010 */ - if (format != SPL_PIXEL_FORMAT_420BPP8) - return enable_isharp; - - // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 - if (taps.h_taps == 4 || taps.h_taps == 6 || - taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { - // END - iSHARP supported - enable_isharp = true; - } - return enable_isharp; -} - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if (tf_type == SPL_TF_TYPE_PREDEFINED) { - if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || - (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) - *lls_pref = LLS_PREF_NO; - else - *lls_pref = LLS_PREF_YES; - return true; - } else if (tf_type == SPL_TF_TYPE_BYPASS) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; -} /* Calculate scaler parameters */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) @@ -1406,8 +1636,13 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; - bool lls_enable_easf = true; + int vratio = 0; + int hratio = 0; const struct spl_scaler_data *data = &spl_out->scl_data; + struct fixed31_32 isharp_scale_ratio; + enum system_setup setup; + bool enable_isharp = false; + // All SPL calls /* recout calculation */ /* depends on h_active */ @@ -1419,7 +1654,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality); + spl_out, &spl_in->scaling_quality, &enable_easf_v, + &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario @@ -1434,37 +1670,33 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) if (!res) return res; - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out); + spl_set_dscl_prog_data(spl_in, spl_out, enable_easf_v, enable_easf_h, enable_isharp); - int vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - int hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); - if (!lls_enable_easf || spl_in->disable_easf) { - enable_easf_v = false; - enable_easf_h = false; + if (spl_in->lls_pref == LLS_PREF_YES) { + if (spl_in->is_hdr_on) + setup = HDR_L; + else + setup = SDR_L; } else { - /* Enable EASF on vertical? */ - enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); - /* Enable EASF on horizontal? */ - enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); + if (spl_in->is_hdr_on) + setup = HDR_NL; + else + setup = SDR_NL; } // Set EASF - spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format); + spl_set_easf_data(spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format, setup); // Set iSHARP - bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, - spl_out->scl_data.taps, spl_in->basic_in.format); + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + if (vratio <= hratio) + isharp_scale_ratio = spl_out->scl_data.recip_ratios.vert; + else + isharp_scale_ratio = spl_out->scl_data.recip_ratios.horz; + spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c new file mode 100644 index 000000000000..99238644e0a1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps) +{ + int num_entries = NUM_PHASES_COEFF * num_taps; + int i; + + for (i = 0; i < num_entries; i++) + *(s1_12_filter + i) = *(s1_10_filter + i) * 4; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h new file mode 100644 index 000000000000..20439cdbdb10 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_FILTERS_H__ +#define __DC_SPL_FILTERS_H__ + +#include "dc_spl_types.h" + +#define NUM_PHASES_COEFF 33 + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps); + +#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 8bc838c7c3c5..a5e544406e91 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -231,6 +232,53 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 3.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 40.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_3p0x[32] = { +0x03010000, +0x0F0B0805, +0x211E1813, +0x2B292624, +0x3533302E, +0x3E3C3A37, +0x46444240, +0x4D4B4A48, +0x5352504F, +0x59575655, +0x5D5C5B5A, +0x61605F5E, +0x64646362, +0x66666565, +0x68686767, +0x68686868, +0x68686868, +0x67676868, +0x65656666, +0x62636464, +0x5E5F6061, +0x5A5B5C5D, +0x55565759, +0x4F505253, +0x484A4B4D, +0x40424446, +0x373A3C3E, +0x2E303335, +0x2426292B, +0x191B1E21, +0x0D101316, +0x0003060A, +}; + +//======================================== // Wide scaler coefficients //======================================================== // gen_scaler_coeffs.m @@ -285,7 +333,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // Blur & Scale LPF // S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -320,6 +368,228 @@ static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; +//======================================================== +// gen_BlurScale_coeffs.m +// 25-Apr-2022 +// 4 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_4tap_64p[132] = { +0x00E5, 0x0237, 0x00E4, 0x0000, +0x00DE, 0x0237, 0x00EB, 0x0000, +0x00D7, 0x0236, 0x00F2, 0x0001, +0x00D0, 0x0235, 0x00FA, 0x0001, +0x00C9, 0x0234, 0x0101, 0x0002, +0x00C2, 0x0233, 0x0108, 0x0003, +0x00BB, 0x0232, 0x0110, 0x0003, +0x00B5, 0x0230, 0x0117, 0x0004, +0x00AE, 0x022E, 0x011F, 0x0005, +0x00A8, 0x022C, 0x0126, 0x0006, +0x00A2, 0x022A, 0x012D, 0x0007, +0x009C, 0x0228, 0x0134, 0x0008, +0x0096, 0x0225, 0x013C, 0x0009, +0x0090, 0x0222, 0x0143, 0x000B, +0x008A, 0x021F, 0x014B, 0x000C, +0x0085, 0x021C, 0x0151, 0x000E, +0x007F, 0x0218, 0x015A, 0x000F, +0x007A, 0x0215, 0x0160, 0x0011, +0x0074, 0x0211, 0x0168, 0x0013, +0x006F, 0x020D, 0x016F, 0x0015, +0x006A, 0x0209, 0x0176, 0x0017, +0x0065, 0x0204, 0x017E, 0x0019, +0x0060, 0x0200, 0x0185, 0x001B, +0x005C, 0x01FB, 0x018C, 0x001D, +0x0057, 0x01F6, 0x0193, 0x0020, +0x0053, 0x01F1, 0x019A, 0x0022, +0x004E, 0x01EC, 0x01A1, 0x0025, +0x004A, 0x01E6, 0x01A8, 0x0028, +0x0046, 0x01E1, 0x01AF, 0x002A, +0x0042, 0x01DB, 0x01B6, 0x002D, +0x003F, 0x01D5, 0x01BB, 0x0031, +0x003B, 0x01CF, 0x01C2, 0x0034, +0x0037, 0x01C9, 0x01C9, 0x0037, +}; +//======================================================== +// gen_BlurScale_coeffs.m +// 09-Jun-2022 +// 3 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_3tap_64p[99] = { +0x0200, 0x0200, 0x0000, +0x01F6, 0x0206, 0x0004, +0x01EC, 0x020B, 0x0009, +0x01E2, 0x0211, 0x000D, +0x01D8, 0x0216, 0x0012, +0x01CE, 0x021C, 0x0016, +0x01C4, 0x0221, 0x001B, +0x01BA, 0x0226, 0x0020, +0x01B0, 0x022A, 0x0026, +0x01A6, 0x022F, 0x002B, +0x019C, 0x0233, 0x0031, +0x0192, 0x0238, 0x0036, +0x0188, 0x023C, 0x003C, +0x017E, 0x0240, 0x0042, +0x0174, 0x0244, 0x0048, +0x016A, 0x0248, 0x004E, +0x0161, 0x024A, 0x0055, +0x0157, 0x024E, 0x005B, +0x014D, 0x0251, 0x0062, +0x0144, 0x0253, 0x0069, +0x013A, 0x0256, 0x0070, +0x0131, 0x0258, 0x0077, +0x0127, 0x025B, 0x007E, +0x011E, 0x025C, 0x0086, +0x0115, 0x025E, 0x008D, +0x010B, 0x0260, 0x0095, +0x0102, 0x0262, 0x009C, +0x00F9, 0x0263, 0x00A4, +0x00F0, 0x0264, 0x00AC, +0x00E7, 0x0265, 0x00B4, +0x00DF, 0x0264, 0x00BD, +0x00D6, 0x0265, 0x00C5, +0x00CD, 0x0266, 0x00CD, +}; + +/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ +static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; +static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; +static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_nl[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 2, 1}, + {11, 10, 175, 100}, + {1075, 1000, 15, 10}, + {105, 100, 125, 100}, + {1025, 1000, 1, 1}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 35, 10}, + {11, 10, 32, 10}, + {1075, 1000, 29, 10}, + {105, 100, 26, 10}, + {1025, 1000, 23, 10}, + {1, 1, 2, 1}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_l[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 15, 10}, + {11, 10, 135, 100}, + {1075, 1000, 12, 10}, + {105, 100, 105, 100}, + {1025, 1000, 9, 10}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 25, 10}, + {11, 10, 23, 10}, + {1075, 1000, 21, 10}, + {105, 100, 19, 10}, + {1025, 1000, 17, 10}, + {1, 1, 15, 10}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_nl[3][6] = { + { /* LOW */ + {1125, 1000, 5, 10}, + {11, 10, 4, 10}, + {1075, 1000, 3, 10}, + {105, 100, 2, 10}, + {1025, 1000, 1, 10}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 1, 1}, + {11, 10, 9, 10}, + {1075, 1000, 8, 10}, + {105, 100, 7, 10}, + {1025, 1000, 6, 10}, + {1, 1, 5, 10}, + }, + { /* HIGH */ + {1125, 1000, 15, 10}, + {11, 10, 14, 10}, + {1075, 1000, 13, 10}, + {105, 100, 12, 10}, + {1025, 1000, 11, 10}, + {1, 1, 1, 1}, + }, +}; + +struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_l[3][6] = { + { /* LOW */ + {1125, 1000, 75, 100}, + {11, 10, 6, 10}, + {1075, 1000, 45, 100}, + {105, 100, 3, 10}, + {1025, 1000, 15, 100}, + {1, 1, 0, 1}, + }, + { /* MID */ + {1125, 1000, 15, 10}, + {11, 10, 135, 100}, + {1075, 1000, 12, 10}, + {105, 100, 105, 100}, + {1025, 1000, 9, 10}, + {1, 1, 75, 100}, + }, + { /* HIGH */ + {1125, 1000, 25, 10}, + {11, 10, 23, 10}, + {1075, 1000, 21, 10}, + {105, 100, 19, 10}, + {1025, 1000, 17, 10}, + {1, 1, 15, 10}, + }, +}; + +/* Pre-generated 1DLUT for LOW for given setup and sharpness level */ +uint32_t filter_isharp_1D_lut_pregen[3][32] = { + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -340,11 +610,162 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) +{ + return filter_isharp_1D_lut_3p0x; +} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) { - return filter_isharp_bs_4tap_64p; + return filter_isharp_bs_4tap_in_6_64p_s1_12; } +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +{ + return filter_isharp_bs_4tap_64p_s1_12; +} +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) +{ + return filter_isharp_bs_3tap_64p_s1_12; +} + +void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup) +{ + uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; + struct fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; + int i, j; + struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; + int num_sharp_ramp_levels; + int size_1dlut; + int sharp_calc_int; + uint32_t filter_pregen_store[32]; + + /* + * Given scaling ratio and current system setup, build pregenerated + * 1DLUT tables for three sharpness levels - LOW, MID, HIGH + */ + for (i = 0; i < 3; i++) { + /* + * Based on setup ( HDR/SDR, L/NL ), get base scale ratio to + * sharpness curve + */ + switch (setup) { + case HDR_L: + setup_lookup_ptr = scale_to_sharp_hdr_l[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_l[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case HDR_NL: + setup_lookup_ptr = scale_to_sharp_hdr_nl[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_nl[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case SDR_L: + setup_lookup_ptr = scale_to_sharp_sdr_l[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_l[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + case SDR_NL: + default: + setup_lookup_ptr = scale_to_sharp_sdr_nl[i]; + num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_nl[i])/ + sizeof(struct scale_ratio_to_sharpness_level_lookup); + break; + } + + /* + * Compare desired scaling ratio and find adjusted sharpness from + * base scale ratio to sharpness curve + */ + j = 0; + sharp_level = dc_fixpt_zero; + while (j < num_sharp_ramp_levels) { + ratio_level = dc_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, + setup_lookup_ptr->ratio_denom); + if (ratio.value >= ratio_level.value) { + sharp_level = dc_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, + setup_lookup_ptr->sharpness_denom); + break; + } + setup_lookup_ptr++; + j++; + } + + /* + * Calculate LUT_128_gained with this equation: + * + * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) + * where LUT_128[i] is contents of 3p0x isharp 1dlut + * where sharpLevel is desired sharpness level + * where iGain is base sharpness level 3.0 + * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level + */ + byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; + byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; + size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); + memset(byte_ptr_1dlut_dst, 0, size_1dlut); + for (j = 0; j < size_1dlut; j++) { + sharp_base = dc_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = dc_fixpt_mul(sharp_base, sharp_level); + sharp_calc = dc_fixpt_div(sharp_calc, dc_fixpt_from_int(3)); + sharp_calc = dc_fixpt_min(dc_fixpt_from_int(255), sharp_calc); + sharp_calc = dc_fixpt_add(sharp_calc, dc_fixpt_from_fraction(1, 2)); + sharp_calc_int = dc_fixpt_floor(sharp_calc); + if (sharp_calc_int > 255) + sharp_calc_int = 255; + *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; + + byte_ptr_1dlut_src++; + byte_ptr_1dlut_dst++; + } + + /* Compare if filter has change, if so update */ + if (memcmp((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut) != 0) + memcpy((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut); + } +} + +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness) +{ + return filter_isharp_1D_lut_pregen[sharpness]; +} + +void spl_init_blur_scale_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, + filter_isharp_bs_3tap_64p_s1_12, 3); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, + filter_isharp_bs_4tap_64p_s1_12, 4); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, + filter_isharp_bs_4tap_in_6_64p_s1_12, 6); +} + +#ifdef CONFIG_DRM_AMD_DC_FP +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if (taps == 3) + return spl_get_filter_isharp_bs_3tap_64p(); + else if (taps == 4) + return spl_get_filter_isharp_bs_4tap_64p(); + else if (taps == 6) + return spl_get_filter_isharp_bs_4tap_in_6_64p(); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); + + dscl_prog_data->filter_blur_scale_v = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); +} +#endif + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 1aaf4c50c1bc..c8b7cd6404dd 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -12,6 +12,37 @@ const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); + +struct scale_ratio_to_sharpness_level_lookup { + unsigned int ratio_numer; + unsigned int ratio_denom; + unsigned int sharpness_numer; + unsigned int sharpness_denom; +}; + +struct sharpness_level_mapping { + unsigned int level; + unsigned int level_numer; + unsigned int level_denom; +}; + +enum system_setup { + SDR_NL = 0, + SDR_L, + HDR_NL, + HDR_L +}; + +void spl_init_blur_scale_coeffs(void); +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data); + +void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup); +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c new file mode 100644 index 000000000000..83dd3435ebcc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -0,0 +1,1725 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" +#include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.3_p_10qb_ +// 3 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EC, 0x020B, 0x0009, + 0x01E2, 0x0211, 0x000D, + 0x01D8, 0x0216, 0x0012, + 0x01CE, 0x021C, 0x0016, + 0x01C4, 0x0221, 0x001B, + 0x01BA, 0x0226, 0x0020, + 0x01B0, 0x022A, 0x0026, + 0x01A6, 0x022F, 0x002B, + 0x019C, 0x0233, 0x0031, + 0x0192, 0x0238, 0x0036, + 0x0188, 0x023C, 0x003C, + 0x017E, 0x0240, 0x0042, + 0x0174, 0x0244, 0x0048, + 0x016A, 0x0248, 0x004E, + 0x0161, 0x024A, 0x0055, + 0x0157, 0x024E, 0x005B, + 0x014D, 0x0251, 0x0062, + 0x0144, 0x0253, 0x0069, + 0x013A, 0x0256, 0x0070, + 0x0131, 0x0258, 0x0077, + 0x0127, 0x025B, 0x007E, + 0x011E, 0x025C, 0x0086, + 0x0115, 0x025E, 0x008D, + 0x010B, 0x0260, 0x0095, + 0x0102, 0x0262, 0x009C, + 0x00F9, 0x0263, 0x00A4, + 0x00F0, 0x0264, 0x00AC, + 0x00E7, 0x0265, 0x00B4, + 0x00DF, 0x0264, 0x00BD, + 0x00D6, 0x0265, 0x00C5, + 0x00CD, 0x0266, 0x00CD, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.4_p_10qb_ +// 3 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EB, 0x020E, 0x0007, + 0x01E1, 0x0214, 0x000B, + 0x01D7, 0x021A, 0x000F, + 0x01CD, 0x0220, 0x0013, + 0x01C2, 0x0226, 0x0018, + 0x01B8, 0x022C, 0x001C, + 0x01AE, 0x0231, 0x0021, + 0x01A3, 0x0237, 0x0026, + 0x0199, 0x023C, 0x002B, + 0x018F, 0x0240, 0x0031, + 0x0185, 0x0245, 0x0036, + 0x017A, 0x024A, 0x003C, + 0x0170, 0x024F, 0x0041, + 0x0166, 0x0253, 0x0047, + 0x015C, 0x0257, 0x004D, + 0x0152, 0x025A, 0x0054, + 0x0148, 0x025E, 0x005A, + 0x013E, 0x0261, 0x0061, + 0x0134, 0x0264, 0x0068, + 0x012B, 0x0266, 0x006F, + 0x0121, 0x0269, 0x0076, + 0x0117, 0x026C, 0x007D, + 0x010E, 0x026E, 0x0084, + 0x0104, 0x0270, 0x008C, + 0x00FB, 0x0271, 0x0094, + 0x00F2, 0x0272, 0x009C, + 0x00E9, 0x0273, 0x00A4, + 0x00E0, 0x0274, 0x00AC, + 0x00D7, 0x0275, 0x00B4, + 0x00CE, 0x0275, 0x00BD, + 0x00C5, 0x0276, 0x00C5, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.5_p_10qb_ +// 3 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F5, 0x0209, 0x0002, + 0x01EA, 0x0211, 0x0005, + 0x01DF, 0x021A, 0x0007, + 0x01D4, 0x0222, 0x000A, + 0x01C9, 0x022A, 0x000D, + 0x01BE, 0x0232, 0x0010, + 0x01B3, 0x0239, 0x0014, + 0x01A8, 0x0241, 0x0017, + 0x019D, 0x0248, 0x001B, + 0x0192, 0x024F, 0x001F, + 0x0187, 0x0255, 0x0024, + 0x017C, 0x025C, 0x0028, + 0x0171, 0x0262, 0x002D, + 0x0166, 0x0268, 0x0032, + 0x015B, 0x026E, 0x0037, + 0x0150, 0x0273, 0x003D, + 0x0146, 0x0278, 0x0042, + 0x013B, 0x027D, 0x0048, + 0x0130, 0x0282, 0x004E, + 0x0126, 0x0286, 0x0054, + 0x011B, 0x028A, 0x005B, + 0x0111, 0x028D, 0x0062, + 0x0107, 0x0290, 0x0069, + 0x00FD, 0x0293, 0x0070, + 0x00F3, 0x0296, 0x0077, + 0x00E9, 0x0298, 0x007F, + 0x00DF, 0x029A, 0x0087, + 0x00D5, 0x029C, 0x008F, + 0x00CC, 0x029D, 0x0097, + 0x00C3, 0x029E, 0x009F, + 0x00BA, 0x029E, 0x00A8, + 0x00B1, 0x029E, 0x00B1, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.6_p_10qb_ +// 3 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F4, 0x020B, 0x0001, + 0x01E8, 0x0216, 0x0002, + 0x01DC, 0x0221, 0x0003, + 0x01D0, 0x022B, 0x0005, + 0x01C4, 0x0235, 0x0007, + 0x01B8, 0x0240, 0x0008, + 0x01AC, 0x0249, 0x000B, + 0x01A0, 0x0253, 0x000D, + 0x0194, 0x025C, 0x0010, + 0x0188, 0x0265, 0x0013, + 0x017C, 0x026E, 0x0016, + 0x0170, 0x0277, 0x0019, + 0x0164, 0x027F, 0x001D, + 0x0158, 0x0287, 0x0021, + 0x014C, 0x028F, 0x0025, + 0x0140, 0x0297, 0x0029, + 0x0135, 0x029D, 0x002E, + 0x0129, 0x02A4, 0x0033, + 0x011D, 0x02AB, 0x0038, + 0x0112, 0x02B0, 0x003E, + 0x0107, 0x02B5, 0x0044, + 0x00FC, 0x02BA, 0x004A, + 0x00F1, 0x02BF, 0x0050, + 0x00E6, 0x02C3, 0x0057, + 0x00DB, 0x02C7, 0x005E, + 0x00D1, 0x02CA, 0x0065, + 0x00C7, 0x02CC, 0x006D, + 0x00BD, 0x02CE, 0x0075, + 0x00B3, 0x02D0, 0x007D, + 0x00A9, 0x02D2, 0x0085, + 0x00A0, 0x02D2, 0x008E, + 0x0097, 0x02D2, 0x0097, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.7_p_10qb_ +// 3 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F3, 0x020D, 0x0000, + 0x01E5, 0x021B, 0x0000, + 0x01D8, 0x0228, 0x0000, + 0x01CB, 0x0235, 0x0000, + 0x01BD, 0x0243, 0x0000, + 0x01B0, 0x024F, 0x0001, + 0x01A2, 0x025C, 0x0002, + 0x0195, 0x0268, 0x0003, + 0x0187, 0x0275, 0x0004, + 0x017A, 0x0280, 0x0006, + 0x016D, 0x028C, 0x0007, + 0x015F, 0x0298, 0x0009, + 0x0152, 0x02A2, 0x000C, + 0x0145, 0x02AD, 0x000E, + 0x0138, 0x02B7, 0x0011, + 0x012B, 0x02C0, 0x0015, + 0x011E, 0x02CA, 0x0018, + 0x0111, 0x02D3, 0x001C, + 0x0105, 0x02DB, 0x0020, + 0x00F8, 0x02E3, 0x0025, + 0x00EC, 0x02EA, 0x002A, + 0x00E0, 0x02F1, 0x002F, + 0x00D5, 0x02F6, 0x0035, + 0x00C9, 0x02FC, 0x003B, + 0x00BE, 0x0301, 0x0041, + 0x00B3, 0x0305, 0x0048, + 0x00A8, 0x0309, 0x004F, + 0x009E, 0x030C, 0x0056, + 0x0094, 0x030E, 0x005E, + 0x008A, 0x0310, 0x0066, + 0x0081, 0x0310, 0x006F, + 0x0077, 0x0312, 0x0077, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.8_p_10qb_ +// 3 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F1, 0x0210, 0x0FFF, + 0x01E2, 0x0220, 0x0FFE, + 0x01D2, 0x0232, 0x0FFC, + 0x01C3, 0x0241, 0x0FFC, + 0x01B4, 0x0251, 0x0FFB, + 0x01A4, 0x0262, 0x0FFA, + 0x0195, 0x0271, 0x0FFA, + 0x0186, 0x0281, 0x0FF9, + 0x0176, 0x0291, 0x0FF9, + 0x0167, 0x02A0, 0x0FF9, + 0x0158, 0x02AE, 0x0FFA, + 0x0149, 0x02BD, 0x0FFA, + 0x013A, 0x02CB, 0x0FFB, + 0x012C, 0x02D7, 0x0FFD, + 0x011D, 0x02E5, 0x0FFE, + 0x010F, 0x02F1, 0x0000, + 0x0101, 0x02FD, 0x0002, + 0x00F3, 0x0308, 0x0005, + 0x00E5, 0x0313, 0x0008, + 0x00D8, 0x031D, 0x000B, + 0x00CB, 0x0326, 0x000F, + 0x00BE, 0x032F, 0x0013, + 0x00B2, 0x0337, 0x0017, + 0x00A6, 0x033E, 0x001C, + 0x009A, 0x0345, 0x0021, + 0x008F, 0x034A, 0x0027, + 0x0084, 0x034F, 0x002D, + 0x0079, 0x0353, 0x0034, + 0x006F, 0x0356, 0x003B, + 0x0065, 0x0358, 0x0043, + 0x005C, 0x0359, 0x004B, + 0x0053, 0x035A, 0x0053, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.9_p_10qb_ +// 3 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EE, 0x0214, 0x0FFE, + 0x01DC, 0x0228, 0x0FFC, + 0x01CA, 0x023C, 0x0FFA, + 0x01B9, 0x024F, 0x0FF8, + 0x01A7, 0x0262, 0x0FF7, + 0x0195, 0x0276, 0x0FF5, + 0x0183, 0x028A, 0x0FF3, + 0x0172, 0x029C, 0x0FF2, + 0x0160, 0x02AF, 0x0FF1, + 0x014F, 0x02C2, 0x0FEF, + 0x013E, 0x02D4, 0x0FEE, + 0x012D, 0x02E5, 0x0FEE, + 0x011C, 0x02F7, 0x0FED, + 0x010C, 0x0307, 0x0FED, + 0x00FB, 0x0318, 0x0FED, + 0x00EC, 0x0327, 0x0FED, + 0x00DC, 0x0336, 0x0FEE, + 0x00CD, 0x0344, 0x0FEF, + 0x00BE, 0x0352, 0x0FF0, + 0x00B0, 0x035E, 0x0FF2, + 0x00A2, 0x036A, 0x0FF4, + 0x0095, 0x0375, 0x0FF6, + 0x0088, 0x037F, 0x0FF9, + 0x007B, 0x0388, 0x0FFD, + 0x006F, 0x0391, 0x0000, + 0x0064, 0x0397, 0x0005, + 0x0059, 0x039D, 0x000A, + 0x004E, 0x03A3, 0x000F, + 0x0045, 0x03A6, 0x0015, + 0x003B, 0x03A9, 0x001C, + 0x0033, 0x03AA, 0x0023, + 0x002A, 0x03AC, 0x002A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_1_p_10qb_ +// 3 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EB, 0x0217, 0x0FFE, + 0x01D5, 0x022F, 0x0FFC, + 0x01C0, 0x0247, 0x0FF9, + 0x01AB, 0x025E, 0x0FF7, + 0x0196, 0x0276, 0x0FF4, + 0x0181, 0x028D, 0x0FF2, + 0x016C, 0x02A5, 0x0FEF, + 0x0158, 0x02BB, 0x0FED, + 0x0144, 0x02D1, 0x0FEB, + 0x0130, 0x02E8, 0x0FE8, + 0x011C, 0x02FE, 0x0FE6, + 0x0109, 0x0313, 0x0FE4, + 0x00F6, 0x0328, 0x0FE2, + 0x00E4, 0x033C, 0x0FE0, + 0x00D2, 0x034F, 0x0FDF, + 0x00C0, 0x0363, 0x0FDD, + 0x00B0, 0x0374, 0x0FDC, + 0x009F, 0x0385, 0x0FDC, + 0x0090, 0x0395, 0x0FDB, + 0x0081, 0x03A4, 0x0FDB, + 0x0072, 0x03B3, 0x0FDB, + 0x0064, 0x03C0, 0x0FDC, + 0x0057, 0x03CC, 0x0FDD, + 0x004B, 0x03D6, 0x0FDF, + 0x003F, 0x03E0, 0x0FE1, + 0x0034, 0x03E8, 0x0FE4, + 0x002A, 0x03EF, 0x0FE7, + 0x0020, 0x03F5, 0x0FEB, + 0x0017, 0x03FA, 0x0FEF, + 0x000F, 0x03FD, 0x0FF4, + 0x0007, 0x03FF, 0x0FFA, + 0x0000, 0x0400, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.3_p_10qb_ +// 4 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { + 0x0104, 0x01F8, 0x0104, 0x0000, + 0x00FE, 0x01F7, 0x010A, 0x0001, + 0x00F8, 0x01F6, 0x010F, 0x0003, + 0x00F2, 0x01F5, 0x0114, 0x0005, + 0x00EB, 0x01F4, 0x011B, 0x0006, + 0x00E5, 0x01F3, 0x0120, 0x0008, + 0x00DF, 0x01F2, 0x0125, 0x000A, + 0x00DA, 0x01F0, 0x012A, 0x000C, + 0x00D4, 0x01EE, 0x0130, 0x000E, + 0x00CE, 0x01ED, 0x0135, 0x0010, + 0x00C8, 0x01EB, 0x013A, 0x0013, + 0x00C2, 0x01E9, 0x0140, 0x0015, + 0x00BD, 0x01E7, 0x0145, 0x0017, + 0x00B7, 0x01E5, 0x014A, 0x001A, + 0x00B1, 0x01E2, 0x0151, 0x001C, + 0x00AC, 0x01E0, 0x0155, 0x001F, + 0x00A7, 0x01DD, 0x015A, 0x0022, + 0x00A1, 0x01DB, 0x015F, 0x0025, + 0x009C, 0x01D8, 0x0165, 0x0027, + 0x0097, 0x01D5, 0x016A, 0x002A, + 0x0092, 0x01D2, 0x016E, 0x002E, + 0x008C, 0x01CF, 0x0174, 0x0031, + 0x0087, 0x01CC, 0x0179, 0x0034, + 0x0083, 0x01C9, 0x017D, 0x0037, + 0x007E, 0x01C5, 0x0182, 0x003B, + 0x0079, 0x01C2, 0x0187, 0x003E, + 0x0074, 0x01BE, 0x018C, 0x0042, + 0x0070, 0x01BA, 0x0190, 0x0046, + 0x006B, 0x01B7, 0x0195, 0x0049, + 0x0066, 0x01B3, 0x019A, 0x004D, + 0x0062, 0x01AF, 0x019E, 0x0051, + 0x005E, 0x01AB, 0x01A2, 0x0055, + 0x005A, 0x01A6, 0x01A6, 0x005A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.4_p_10qb_ +// 4 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { + 0x00FB, 0x0209, 0x00FC, 0x0000, + 0x00F5, 0x0209, 0x0101, 0x0001, + 0x00EE, 0x0208, 0x0108, 0x0002, + 0x00E8, 0x0207, 0x010E, 0x0003, + 0x00E2, 0x0206, 0x0114, 0x0004, + 0x00DB, 0x0205, 0x011A, 0x0006, + 0x00D5, 0x0204, 0x0120, 0x0007, + 0x00CF, 0x0203, 0x0125, 0x0009, + 0x00C9, 0x0201, 0x012C, 0x000A, + 0x00C3, 0x01FF, 0x0132, 0x000C, + 0x00BD, 0x01FD, 0x0138, 0x000E, + 0x00B7, 0x01FB, 0x013E, 0x0010, + 0x00B1, 0x01F9, 0x0144, 0x0012, + 0x00AC, 0x01F7, 0x0149, 0x0014, + 0x00A6, 0x01F4, 0x0150, 0x0016, + 0x00A0, 0x01F2, 0x0156, 0x0018, + 0x009B, 0x01EF, 0x015C, 0x001A, + 0x0095, 0x01EC, 0x0162, 0x001D, + 0x0090, 0x01E9, 0x0168, 0x001F, + 0x008B, 0x01E6, 0x016D, 0x0022, + 0x0085, 0x01E3, 0x0173, 0x0025, + 0x0080, 0x01DF, 0x0179, 0x0028, + 0x007B, 0x01DC, 0x017E, 0x002B, + 0x0076, 0x01D8, 0x0184, 0x002E, + 0x0071, 0x01D4, 0x018A, 0x0031, + 0x006D, 0x01D1, 0x018E, 0x0034, + 0x0068, 0x01CD, 0x0193, 0x0038, + 0x0063, 0x01C8, 0x019A, 0x003B, + 0x005F, 0x01C4, 0x019E, 0x003F, + 0x005B, 0x01C0, 0x01A3, 0x0042, + 0x0056, 0x01BB, 0x01A9, 0x0046, + 0x0052, 0x01B7, 0x01AD, 0x004A, + 0x004E, 0x01B2, 0x01B2, 0x004E, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.5_p_10qb_ +// 4 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { + 0x00E5, 0x0236, 0x00E5, 0x0000, + 0x00DE, 0x0235, 0x00ED, 0x0000, + 0x00D7, 0x0235, 0x00F4, 0x0000, + 0x00D0, 0x0235, 0x00FB, 0x0000, + 0x00C9, 0x0234, 0x0102, 0x0001, + 0x00C2, 0x0233, 0x010A, 0x0001, + 0x00BC, 0x0232, 0x0111, 0x0001, + 0x00B5, 0x0230, 0x0119, 0x0002, + 0x00AE, 0x022F, 0x0121, 0x0002, + 0x00A8, 0x022D, 0x0128, 0x0003, + 0x00A2, 0x022B, 0x012F, 0x0004, + 0x009B, 0x0229, 0x0137, 0x0005, + 0x0095, 0x0226, 0x013F, 0x0006, + 0x008F, 0x0224, 0x0146, 0x0007, + 0x0089, 0x0221, 0x014E, 0x0008, + 0x0083, 0x021E, 0x0155, 0x000A, + 0x007E, 0x021B, 0x015C, 0x000B, + 0x0078, 0x0217, 0x0164, 0x000D, + 0x0072, 0x0213, 0x016D, 0x000E, + 0x006D, 0x0210, 0x0173, 0x0010, + 0x0068, 0x020C, 0x017A, 0x0012, + 0x0063, 0x0207, 0x0182, 0x0014, + 0x005E, 0x0203, 0x0189, 0x0016, + 0x0059, 0x01FE, 0x0191, 0x0018, + 0x0054, 0x01F9, 0x0198, 0x001B, + 0x0050, 0x01F4, 0x019F, 0x001D, + 0x004B, 0x01EF, 0x01A6, 0x0020, + 0x0047, 0x01EA, 0x01AC, 0x0023, + 0x0043, 0x01E4, 0x01B3, 0x0026, + 0x003F, 0x01DF, 0x01B9, 0x0029, + 0x003B, 0x01D9, 0x01C0, 0x002C, + 0x0037, 0x01D3, 0x01C6, 0x0030, + 0x0033, 0x01CD, 0x01CD, 0x0033, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.6_p_10qb_ +// 4 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { + 0x00C8, 0x026F, 0x00C9, 0x0000, + 0x00C0, 0x0270, 0x00D1, 0x0FFF, + 0x00B8, 0x0270, 0x00D9, 0x0FFF, + 0x00B1, 0x0270, 0x00E1, 0x0FFE, + 0x00A9, 0x026F, 0x00EB, 0x0FFD, + 0x00A2, 0x026E, 0x00F3, 0x0FFD, + 0x009A, 0x026D, 0x00FD, 0x0FFC, + 0x0093, 0x026C, 0x0105, 0x0FFC, + 0x008C, 0x026A, 0x010F, 0x0FFB, + 0x0085, 0x0268, 0x0118, 0x0FFB, + 0x007E, 0x0265, 0x0122, 0x0FFB, + 0x0078, 0x0263, 0x012A, 0x0FFB, + 0x0071, 0x0260, 0x0134, 0x0FFB, + 0x006B, 0x025C, 0x013E, 0x0FFB, + 0x0065, 0x0259, 0x0147, 0x0FFB, + 0x005F, 0x0255, 0x0151, 0x0FFB, + 0x0059, 0x0251, 0x015A, 0x0FFC, + 0x0054, 0x024D, 0x0163, 0x0FFC, + 0x004E, 0x0248, 0x016D, 0x0FFD, + 0x0049, 0x0243, 0x0176, 0x0FFE, + 0x0044, 0x023E, 0x017F, 0x0FFF, + 0x003F, 0x0238, 0x0189, 0x0000, + 0x003A, 0x0232, 0x0193, 0x0001, + 0x0036, 0x022C, 0x019C, 0x0002, + 0x0031, 0x0226, 0x01A5, 0x0004, + 0x002D, 0x021F, 0x01AF, 0x0005, + 0x0029, 0x0218, 0x01B8, 0x0007, + 0x0025, 0x0211, 0x01C1, 0x0009, + 0x0022, 0x020A, 0x01C9, 0x000B, + 0x001E, 0x0203, 0x01D2, 0x000D, + 0x001B, 0x01FB, 0x01DA, 0x0010, + 0x0018, 0x01F3, 0x01E3, 0x0012, + 0x0015, 0x01EB, 0x01EB, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.7_p_10qb_ +// 4 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { + 0x00A3, 0x02B9, 0x00A4, 0x0000, + 0x009A, 0x02BA, 0x00AD, 0x0FFF, + 0x0092, 0x02BA, 0x00B6, 0x0FFE, + 0x0089, 0x02BA, 0x00C1, 0x0FFC, + 0x0081, 0x02B9, 0x00CB, 0x0FFB, + 0x0079, 0x02B8, 0x00D5, 0x0FFA, + 0x0071, 0x02B7, 0x00DF, 0x0FF9, + 0x0069, 0x02B5, 0x00EA, 0x0FF8, + 0x0062, 0x02B3, 0x00F4, 0x0FF7, + 0x005B, 0x02B0, 0x00FF, 0x0FF6, + 0x0054, 0x02AD, 0x010B, 0x0FF4, + 0x004D, 0x02A9, 0x0117, 0x0FF3, + 0x0046, 0x02A5, 0x0123, 0x0FF2, + 0x0040, 0x02A1, 0x012D, 0x0FF2, + 0x003A, 0x029C, 0x0139, 0x0FF1, + 0x0034, 0x0297, 0x0145, 0x0FF0, + 0x002F, 0x0292, 0x0150, 0x0FEF, + 0x0029, 0x028C, 0x015C, 0x0FEF, + 0x0024, 0x0285, 0x0169, 0x0FEE, + 0x001F, 0x027F, 0x0174, 0x0FEE, + 0x001B, 0x0278, 0x017F, 0x0FEE, + 0x0016, 0x0270, 0x018D, 0x0FED, + 0x0012, 0x0268, 0x0199, 0x0FED, + 0x000E, 0x0260, 0x01A4, 0x0FEE, + 0x000B, 0x0258, 0x01AF, 0x0FEE, + 0x0007, 0x024F, 0x01BC, 0x0FEE, + 0x0004, 0x0246, 0x01C7, 0x0FEF, + 0x0001, 0x023D, 0x01D3, 0x0FEF, + 0x0FFE, 0x0233, 0x01DF, 0x0FF0, + 0x0FFC, 0x0229, 0x01EA, 0x0FF1, + 0x0FFA, 0x021F, 0x01F4, 0x0FF3, + 0x0FF8, 0x0215, 0x01FF, 0x0FF4, + 0x0FF6, 0x020A, 0x020A, 0x0FF6, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.8_p_10qb_ +// 4 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { + 0x0075, 0x0315, 0x0076, 0x0000, + 0x006C, 0x0316, 0x007F, 0x0FFF, + 0x0062, 0x0316, 0x008A, 0x0FFE, + 0x0059, 0x0315, 0x0096, 0x0FFC, + 0x0050, 0x0314, 0x00A1, 0x0FFB, + 0x0048, 0x0312, 0x00AD, 0x0FF9, + 0x0040, 0x0310, 0x00B8, 0x0FF8, + 0x0038, 0x030D, 0x00C5, 0x0FF6, + 0x0030, 0x030A, 0x00D1, 0x0FF5, + 0x0029, 0x0306, 0x00DE, 0x0FF3, + 0x0022, 0x0301, 0x00EB, 0x0FF2, + 0x001C, 0x02FC, 0x00F8, 0x0FF0, + 0x0015, 0x02F7, 0x0106, 0x0FEE, + 0x0010, 0x02F1, 0x0112, 0x0FED, + 0x000A, 0x02EA, 0x0121, 0x0FEB, + 0x0005, 0x02E3, 0x012F, 0x0FE9, + 0x0000, 0x02DB, 0x013D, 0x0FE8, + 0x0FFB, 0x02D3, 0x014C, 0x0FE6, + 0x0FF7, 0x02CA, 0x015A, 0x0FE5, + 0x0FF3, 0x02C1, 0x0169, 0x0FE3, + 0x0FF0, 0x02B7, 0x0177, 0x0FE2, + 0x0FEC, 0x02AD, 0x0186, 0x0FE1, + 0x0FE9, 0x02A2, 0x0196, 0x0FDF, + 0x0FE7, 0x0297, 0x01A4, 0x0FDE, + 0x0FE4, 0x028C, 0x01B3, 0x0FDD, + 0x0FE2, 0x0280, 0x01C2, 0x0FDC, + 0x0FE0, 0x0274, 0x01D0, 0x0FDC, + 0x0FDF, 0x0268, 0x01DE, 0x0FDB, + 0x0FDD, 0x025B, 0x01EE, 0x0FDA, + 0x0FDC, 0x024E, 0x01FC, 0x0FDA, + 0x0FDB, 0x0241, 0x020A, 0x0FDA, + 0x0FDB, 0x0233, 0x0218, 0x0FDA, + 0x0FDA, 0x0226, 0x0226, 0x0FDA, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.9_p_10qb_ +// 4 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { + 0x003F, 0x0383, 0x003E, 0x0000, + 0x0034, 0x0383, 0x004A, 0x0FFF, + 0x002B, 0x0383, 0x0054, 0x0FFE, + 0x0021, 0x0381, 0x0061, 0x0FFD, + 0x0019, 0x037F, 0x006C, 0x0FFC, + 0x0010, 0x037C, 0x0079, 0x0FFB, + 0x0008, 0x0378, 0x0086, 0x0FFA, + 0x0001, 0x0374, 0x0093, 0x0FF8, + 0x0FFA, 0x036E, 0x00A1, 0x0FF7, + 0x0FF3, 0x0368, 0x00B0, 0x0FF5, + 0x0FED, 0x0361, 0x00BF, 0x0FF3, + 0x0FE8, 0x035A, 0x00CD, 0x0FF1, + 0x0FE2, 0x0352, 0x00DC, 0x0FF0, + 0x0FDE, 0x0349, 0x00EB, 0x0FEE, + 0x0FD9, 0x033F, 0x00FC, 0x0FEC, + 0x0FD5, 0x0335, 0x010D, 0x0FE9, + 0x0FD2, 0x032A, 0x011D, 0x0FE7, + 0x0FCF, 0x031E, 0x012E, 0x0FE5, + 0x0FCC, 0x0312, 0x013F, 0x0FE3, + 0x0FCA, 0x0305, 0x0150, 0x0FE1, + 0x0FC8, 0x02F8, 0x0162, 0x0FDE, + 0x0FC6, 0x02EA, 0x0174, 0x0FDC, + 0x0FC5, 0x02DC, 0x0185, 0x0FDA, + 0x0FC4, 0x02CD, 0x0197, 0x0FD8, + 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, + 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, + 0x0FC3, 0x029F, 0x01CD, 0x0FD1, + 0x0FC3, 0x028E, 0x01E0, 0x0FCF, + 0x0FC3, 0x027E, 0x01F2, 0x0FCD, + 0x0FC4, 0x026D, 0x0203, 0x0FCC, + 0x0FC5, 0x025C, 0x0215, 0x0FCA, + 0x0FC6, 0x024B, 0x0227, 0x0FC8, + 0x0FC7, 0x0239, 0x0239, 0x0FC7, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_1_p_10qb_ +// 4 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { + 0x0000, 0x0400, 0x0000, 0x0000, + 0x0FF6, 0x03FF, 0x000B, 0x0000, + 0x0FED, 0x03FE, 0x0015, 0x0000, + 0x0FE4, 0x03FB, 0x0022, 0x0FFF, + 0x0FDC, 0x03F7, 0x002E, 0x0FFF, + 0x0FD5, 0x03F2, 0x003B, 0x0FFE, + 0x0FCE, 0x03EC, 0x0048, 0x0FFE, + 0x0FC8, 0x03E5, 0x0056, 0x0FFD, + 0x0FC3, 0x03DC, 0x0065, 0x0FFC, + 0x0FBE, 0x03D3, 0x0075, 0x0FFA, + 0x0FB9, 0x03C9, 0x0085, 0x0FF9, + 0x0FB6, 0x03BE, 0x0094, 0x0FF8, + 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, + 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, + 0x0FAD, 0x0397, 0x00CA, 0x0FF2, + 0x0FAB, 0x0389, 0x00DC, 0x0FF0, + 0x0FAA, 0x0379, 0x00EF, 0x0FEE, + 0x0FA9, 0x0369, 0x0102, 0x0FEC, + 0x0FA9, 0x0359, 0x0115, 0x0FE9, + 0x0FA9, 0x0348, 0x0129, 0x0FE6, + 0x0FA9, 0x0336, 0x013D, 0x0FE4, + 0x0FA9, 0x0323, 0x0153, 0x0FE1, + 0x0FAA, 0x0310, 0x0168, 0x0FDE, + 0x0FAC, 0x02FD, 0x017C, 0x0FDB, + 0x0FAD, 0x02E9, 0x0192, 0x0FD8, + 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, + 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, + 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, + 0x0FB5, 0x0296, 0x01E9, 0x0FCC, + 0x0FB8, 0x0281, 0x01FE, 0x0FC9, + 0x0FBA, 0x026C, 0x0214, 0x0FC6, + 0x0FBD, 0x0256, 0x022A, 0x0FC3, + 0x0FC0, 0x0240, 0x0240, 0x0FC0, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.3_p_10qb_ +// 6 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { + 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, + 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, + 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, + 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, + 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, + 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, + 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, + 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, + 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, + 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, + 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, + 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, + 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, + 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, + 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, + 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, + 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, + 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, + 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, + 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, + 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, + 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, + 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, + 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, + 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, + 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, + 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, + 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, + 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, + 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, + 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, + 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, + 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.4_p_10qb_ +// 6 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { + 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, + 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, + 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, + 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, + 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, + 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, + 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, + 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, + 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, + 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, + 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, + 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, + 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, + 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, + 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, + 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, + 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, + 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, + 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, + 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, + 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, + 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, + 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, + 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, + 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, + 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, + 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, + 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, + 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, + 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, + 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, + 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, + 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.5_p_10qb_ +// 6 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { + 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, + 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, + 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, + 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, + 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, + 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, + 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, + 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, + 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, + 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, + 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, + 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, + 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, + 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, + 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, + 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, + 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, + 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, + 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, + 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, + 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, + 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, + 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, + 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, + 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, + 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, + 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, + 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, + 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, + 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, + 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, + 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, + 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.6_p_10qb_ +// 6 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { + 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, + 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, + 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, + 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, + 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, + 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, + 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, + 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, + 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, + 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, + 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, + 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, + 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, + 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, + 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, + 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, + 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, + 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, + 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, + 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, + 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, + 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, + 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, + 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, + 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, + 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, + 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, + 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, + 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, + 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, + 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, + 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, + 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.7_p_10qb_ +// 6 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { + 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, + 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, + 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, + 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, + 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, + 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, + 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, + 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, + 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, + 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, + 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, + 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, + 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, + 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, + 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, + 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, + 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, + 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, + 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, + 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, + 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, + 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, + 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, + 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, + 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, + 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, + 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, + 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, + 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, + 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, + 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, + 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, + 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.8_p_10qb_ +// 6 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { + 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, + 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, + 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, + 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, + 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, + 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, + 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, + 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, + 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, + 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, + 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, + 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, + 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, + 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, + 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, + 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, + 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, + 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, + 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, + 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, + 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, + 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, + 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, + 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, + 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, + 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, + 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, + 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, + 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, + 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, + 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, + 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, + 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.9_p_10qb_ +// 6 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { + 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, + 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, + 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, + 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, + 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, + 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, + 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, + 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, + 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, + 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, + 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, + 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, + 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, + 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, + 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, + 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, + 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, + 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, + 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, + 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, + 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, + 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, + 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, + 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, + 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, + 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, + 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, + 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, + 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, + 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, + 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_1_p_10qb_ +// 6 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { + 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, + 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, + 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, + 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, + 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, + 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, + 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, + 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, + 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, + 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, + 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, + 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, + 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, + 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, + 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, + 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, + 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, + 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, + 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, + 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, + 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, + 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, + 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, + 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, + 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, + 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, + 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, + 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, + 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, + 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, + 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, + 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, + 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, +}; + +/* Converted scaler coeff tables from S1.10 to S1.12 */ +static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; +static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; +static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; + +struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x251F}, + {5, 10, 0x291F}, + {6, 10, 0xA51F}, + {7, 10, 0xA51F}, + {8, 10, 0xAA66}, + {9, 10, 0xA51F}, + {1, 1, 0xA640}, + {-1, -1, 0xA640}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x9600}, + {5, 10, 0xA460}, + {6, 10, 0xA8E0}, + {7, 10, 0xAC00}, + {8, 10, 0xAD20}, + {9, 10, 0xAFC0}, + {1, 1, 0xB058}, + {-1, -1, 0xB058}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x9900}, + {7, 10, 0xA100}, + {8, 10, 0xA8C0}, + {9, 10, 0xAB20}, + {1, 1, 0xAC00}, + {-1, -1, 0xAC00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4100}, + {9, 10, 0x9F00}, + {1, 1, 0xA4C0}, + {-1, -1, 0xA8D8}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4000}, + {9, 10, 0x24FE}, + {1, 1, 0x2D64}, + {-1, -1, 0x3ADB}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x3886}, + {9, 10, 0x3940}, + {1, 1, 0x3A4E}, + {-1, -1, 0x3B66}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x36F4}, + {9, 10, 0x359C}, + {1, 1, 0x3360}, + {-1, -1, 0x2F20}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x359C}, + {1, 1, 0x31F0}, + {-1, -1, 0x1F00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x9F00}, + {1, 1, 0xA400}, + {-1, -1, 0x9E00}, +}; + +void spl_init_easf_filter_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, + easf_filter_3tap_64p_ratio_0_30_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, + easf_filter_3tap_64p_ratio_0_40_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, + easf_filter_3tap_64p_ratio_0_50_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, + easf_filter_3tap_64p_ratio_0_60_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, + easf_filter_3tap_64p_ratio_0_70_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, + easf_filter_3tap_64p_ratio_0_80_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, + easf_filter_3tap_64p_ratio_0_90_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, + easf_filter_3tap_64p_ratio_1_00_s1_12, 3); + + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, + easf_filter_4tap_64p_ratio_0_30_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, + easf_filter_4tap_64p_ratio_0_40_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, + easf_filter_4tap_64p_ratio_0_50_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, + easf_filter_4tap_64p_ratio_0_60_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, + easf_filter_4tap_64p_ratio_0_70_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, + easf_filter_4tap_64p_ratio_0_80_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, + easf_filter_4tap_64p_ratio_0_90_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, + easf_filter_4tap_64p_ratio_1_00_s1_12, 4); + + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, + easf_filter_6tap_64p_ratio_0_30_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, + easf_filter_6tap_64p_ratio_0_40_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, + easf_filter_6tap_64p_ratio_0_50_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, + easf_filter_6tap_64p_ratio_0_60_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, + easf_filter_6tap_64p_ratio_0_70_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, + easf_filter_6tap_64p_ratio_0_80_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, + easf_filter_6tap_64p_ratio_0_90_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, + easf_filter_6tap_64p_ratio_1_00_s1_12, 6); +} + +uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_3tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_3tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_3tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_3tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_3tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_3tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_3tap_64p_ratio_0_90_s1_12; + else + return easf_filter_3tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_4tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_4tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_4tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_4tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_4tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_4tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_4tap_64p_ratio_0_90_s1_12; + else + return easf_filter_4tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + return easf_filter_6tap_64p_ratio_0_30_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + return easf_filter_6tap_64p_ratio_0_40_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + return easf_filter_6tap_64p_ratio_0_50_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + return easf_filter_6tap_64p_ratio_0_60_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + return easf_filter_6tap_64p_ratio_0_70_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + return easf_filter_6tap_64p_ratio_0_80_s1_12; + else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + return easf_filter_6tap_64p_ratio_0_90_s1_12; + else + return easf_filter_6tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 6) + return spl_get_easf_filter_6tap_64p(ratio); + else if (taps == 4) + return spl_get_easf_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_easf_filter_3tap_64p(ratio); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h) +{ + /* + * Old coefficients calculated scaling ratio = input / output + * New coefficients are calculated based on = output / input + */ + if (enable_easf_h) { + dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps, data->recip_ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps_c, data->recip_ratios.horz_c); + } else { + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + } + if (enable_easf_v) { + dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps, data->recip_ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps_c, data->recip_ratios.vert_c); + } else { + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); + } +} + +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, + struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, + unsigned int num_entries) +{ + unsigned int count = 0; + uint32_t value = 0; + struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; + + lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); + value = lookup_table_index_ptr->reg_value; + + while (count < num_entries) { + + lookup_table_index_ptr = (lookup_table_base_ptr + count); + if (lookup_table_index_ptr->numer < 0) + break; + + if (ratio.value < dc_fixpt_from_fraction( + lookup_table_index_ptr->numer, + lookup_table_index_ptr->denom).value) { + value = lookup_table_index_ptr->reg_value; + break; + } + + count++; + } + return value; +} +uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_v_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_h_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_uptilt_offset_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt_maxval_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt1_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_offset_lookup, num_entries); + } else + value = 0; + return value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h new file mode 100644 index 000000000000..542b5ce1a385 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ +#define __DC_SPL_SCL_EASF_FILTERS_H__ + +#include "dc_spl_types.h" + +struct scale_ratio_to_reg_value_lookup { + int numer; + int denom; + const uint32_t reg_value; +}; + +void spl_init_easf_filter_coeffs(void); +uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h); + +uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio); + +#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index c174b2e8a150..156f8171e44f 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -1423,3 +1423,29 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } + +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 6d96aca53b24..27590846d92a 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -17,43 +17,6 @@ const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_get_filter_3tap_16p_upscale(void); -const uint16_t *spl_get_filter_3tap_16p_116(void); -const uint16_t *spl_get_filter_3tap_16p_149(void); -const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p_upscale(void); -const uint16_t *spl_get_filter_4tap_16p_116(void); -const uint16_t *spl_get_filter_4tap_16p_149(void); -const uint16_t *spl_get_filter_4tap_16p_183(void); - -const uint16_t *spl_get_filter_3tap_64p_upscale(void); -const uint16_t *spl_get_filter_3tap_64p_116(void); -const uint16_t *spl_get_filter_3tap_64p_149(void); -const uint16_t *spl_get_filter_3tap_64p_183(void); - -const uint16_t *spl_get_filter_4tap_64p_upscale(void); -const uint16_t *spl_get_filter_4tap_64p_116(void); -const uint16_t *spl_get_filter_4tap_64p_149(void); -const uint16_t *spl_get_filter_4tap_64p_183(void); - -const uint16_t *spl_get_filter_5tap_64p_upscale(void); -const uint16_t *spl_get_filter_5tap_64p_116(void); -const uint16_t *spl_get_filter_5tap_64p_149(void); -const uint16_t *spl_get_filter_5tap_64p_183(void); - -const uint16_t *spl_get_filter_6tap_64p_upscale(void); -const uint16_t *spl_get_filter_6tap_64p_116(void); -const uint16_t *spl_get_filter_6tap_64p_149(void); -const uint16_t *spl_get_filter_6tap_64p_183(void); - -const uint16_t *spl_get_filter_7tap_64p_upscale(void); -const uint16_t *spl_get_filter_7tap_64p_116(void); -const uint16_t *spl_get_filter_7tap_64p_149(void); -const uint16_t *spl_get_filter_7tap_64p_183(void); - -const uint16_t *spl_get_filter_8tap_64p_upscale(void); -const uint16_t *spl_get_filter_8tap_64p_116(void); -const uint16_t *spl_get_filter_8tap_64p_149(void); -const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 201201d3f55b..e54da5ea4ae8 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -81,6 +81,8 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, + SPL_PIXEL_FORMAT_422BPP8, + SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -120,6 +122,13 @@ enum spl_color_space { SPL_COLOR_SPACE_YCBCR709_BLACK, }; +enum chroma_cositing { + CHROMA_COSITING_NONE, + CHROMA_COSITING_LEFT, + CHROMA_COSITING_TOPLEFT, + CHROMA_COSITING_COUNT +}; + // Scratch space for calculating scaler params struct spl_scaler_data { int h_active; @@ -129,6 +138,7 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; + struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -485,6 +495,8 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; + bool is_fullscreen; + bool is_hdr_on; }; // end of SPL inputs From 0961367cb5208850a123d235d147f92f586b2491 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 25 Jun 2024 16:40:51 -0400 Subject: [PATCH 0052/1523] drm/amd/display: Don't consider cursor for no plane case in DML1 [Description] For no plane scenarios we should not consider cursor as there cannot be any cursor if there's no planes. This fixes an issue where dc_commit_streams fails due to prefetch bandwidth requirements (the display config + dummy planes + cursor causes the prefetch bandwidth to exceed what is possible). Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 8a8efe408a9d..efe337ebf7c8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1562,6 +1562,8 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + pipes[pipe_cnt].pipe.src.cur0_src_width = 0; + pipes[pipe_cnt].pipe.src.cur1_src_width = 0; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/ From 98579743c4561acc3b1c7d2f3fcd46b2160db5ba Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Wed, 26 Jun 2024 14:53:26 -0400 Subject: [PATCH 0053/1523] drm/amd/display: Added logging for automated DPM testing [Why] Added clock logs to automate DPM testing [How] Added logs and helper functions to output clocks Co-authored-by: Ryan Seto Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Ryan Seto Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 250 ++++++++++++++---- drivers/gpu/drm/amd/display/dc/core/dc.c | 9 +- .../dc/dml2/dml21/dml21_translation_helper.c | 27 ++ .../dc/dml2/dml21/dml21_translation_helper.h | 1 + .../amd/display/dc/dml2/dml21/dml21_utils.c | 2 + .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 1 + .../amd/display/dc/inc/hw/clk_mgr_internal.h | 4 +- 7 files changed, 237 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 45fe17a46890..c453c5f15ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -14,6 +14,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" +#include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn401_smu14_driver_if.h" @@ -29,6 +30,7 @@ #define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F #define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72 #define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75 +#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051 #define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL #define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL @@ -302,6 +304,197 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_build_wm_range_table(clk_mgr_base); } +static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dprefclk_did = 0; + uint32_t dcfclk_did = 0; + uint32_t dtbclk_did = 0; + uint32_t dispclk_did = 0; + uint32_t dppclk_did = 0; + uint32_t fclk_did = 0; + uint32_t target_div = 0; + + /* DFS Slice 0 is used for DISPCLK */ + dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); + /* DFS Slice 1 is used for DPPCLK */ + dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); + /* DFS Slice 2 is used for DPREFCLK */ + dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); + /* DFS Slice 3 is used for DCFCLK */ + dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); + /* DFS Slice 4 is used for DTBCLK */ + dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); + /* DFS Slice _ is used for FCLK */ + fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL); + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dispclk_did); + //Get dispclk in khz + regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dppclk_did); + //Get dppclk in khz + regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DPREFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dprefclk_did); + //Get dprefclk in khz + regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DCFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dcfclk_did); + //Get dcfclk in khz + regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dtbclk_did); + //Get dtbclk in khz + regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(fclk_did); + //Get fclk in khz + regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; +} + +static bool dcn401_check_native_scaling(struct pipe_ctx *pipe) +{ + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn401_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + unsigned int mall_ss_size_bytes; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + msleep(5); + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; + + struct clk_log_info log_info = {0}; + struct clk_state_registers_and_bypass clk_register_dump; + + dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info); + + // Overrides for these clocks in case there is no p_state change support + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; + + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + + if (!new_clocks->p_state_change_support) + dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; + + if (!new_clocks->fclk_p_state_change_support) + fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000; + + + //////////////////////////////////////////////////////////////////////////// + // IMPORTANT: When adding more clocks to these logs, do NOT put a newline + // anywhere other than at the very end of the string. + // + // Formatting example (make sure to have " - " between each entry): + // + // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" + //////////////////////////////////////////////////////////////////////////// + if (active_pipe_count > 0 && + new_clocks->dramclk_khz > 0 && + new_clocks->fclk_khz > 0 && + new_clocks->dcfclk_khz > 0 && + new_clocks->dppclk_khz > 0) { + + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total + * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " + "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " + "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", + dramclk_khz_override, + fclk_khz_override, + new_clocks->dcfclk_khz, + new_clocks->dppclk_khz, + clk_register_dump.dispclk, + clk_register_dump.dppclk, + clk_register_dump.dprefclk, + clk_register_dump.dcfclk, + clk_register_dump.dtbclk, + clk_register_dump.fclk, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); + } +} + static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, struct dc_state *context, int ref_dtbclk_khz) @@ -1194,6 +1387,10 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* execute sequence */ dcn401_execute_block_sequence(clk_mgr_base, num_steps); + + if (dc->config.enable_auto_dpm_test_logs) + dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context); + } @@ -1218,59 +1415,6 @@ static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_m return dc_fixpt_floor(pll_req); } -static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - uint32_t dprefclk_did = 0; - uint32_t dcfclk_did = 0; - uint32_t dtbclk_did = 0; - uint32_t dispclk_did = 0; - uint32_t dppclk_did = 0; - uint32_t target_div = 0; - - /* DFS Slice 0 is used for DISPCLK */ - dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); - /* DFS Slice 1 is used for DPPCLK */ - dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); - /* DFS Slice 2 is used for DPREFCLK */ - dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); - /* DFS Slice 3 is used for DCFCLK */ - dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); - /* DFS Slice 4 is used for DTBCLK */ - dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dispclk_did); - //Get dispclk in khz - regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dppclk_did); - //Get dppclk in khz - regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DPREFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dprefclk_did); - //Get dprefclk in khz - regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DCFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dcfclk_did); - //Get dcfclk in khz - regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DTBCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dtbclk_did); - //Get dtbclk in khz - regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; -} - static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr) { struct dc_bios *bp = clk_mgr->base.ctx->dc_bios; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 85a2ef82afa5..387b392f4c0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1254,7 +1254,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); dc_update_visual_confirm_color(dc, context, pipe); } @@ -3704,7 +3705,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -3839,7 +3841,8 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index a50fe3ec79c1..7c73efe19525 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1172,3 +1172,30 @@ void dml21_get_pipe_mcache_config( mcache_pipe_config->plane1_enabled = dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); } + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_uclk_pstate_support_method_fw_drr: + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 4cc0a1fbb93d..97a8f51b7780 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,4 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index d276458e50fd..622c98f4b7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -316,6 +316,8 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index d5fefce3e74b..c55d7279fe51 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -180,6 +180,7 @@ struct clk_state_registers_and_bypass { uint32_t dispclk; uint32_t dppclk; uint32_t dtbclk; + uint32_t fclk; uint32_t dppclk_bypass; uint32_t dcfclk_bypass; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 12282f96dfe1..c2dd061892f4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -191,7 +191,8 @@ enum dentist_divider_range { CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK2_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK3_DFS_CNTL, CLK01, 0), \ - CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0) + CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0), \ + CLK_SR_DCN401(CLK2_CLK2_DFS_CNTL, CLK20, 0) #define CLK_COMMON_MASK_SH_LIST_DCN401(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN321(mask_sh) @@ -235,6 +236,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; uint32_t CLK1_CLK1_CURRENT_CNT; From e8d77cfdd2698039667432b8569bac34d13508c1 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 25 Jun 2024 13:23:36 -0400 Subject: [PATCH 0054/1523] drm/amd/display: Replace assert with error message in dp_retrieve_lttpr_cap() [Why] When assert in dp_retrieve_lttpr_cap() is hit, dmesg has traces like: RIP: 0010:dp_retrieve_lttpr_cap+0xcc/0x1a0 [amdgpu] Call Trace: dp_retrieve_lttpr_cap+0xcc/0x1a0 [amdgpu] report_bug+0x1e8/0x240 handle_bug+0x46/0x80 link_detect+0x35/0x580 [amdgpu] It happens when LTTPRs fail to increment dpcd repeater count. We have a recovery action in place for such cases. Assert is misleading, an indicative error in dmesg is more useful. [How] Remove ASSERT and use DC_LOG_ERROR instead. Reviewed-by: Michael Strauss Signed-off-by: Jerry Zuo Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 46bb7a855bc2..c257e733044a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1541,7 +1541,11 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { - ASSERT(0); + /* If you see this message consistently, either the host platform has FIXED_VS flag + * incorrectly configured or the sink device is returning an invalid count. + */ + DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } From bc2fe69f16c7122b5dabc294aa2d6065d8da2169 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 13:06:43 -0600 Subject: [PATCH 0055/1523] drm/amd/display: Revert "Check HDCP returned status" This reverts commit 5d93060d430b359e16e7c555c8f151ead1ac614b due to a power consumption regression. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/modules/hdcp/hdcp1_execution.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1e495e884484..8bc377560787 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -432,18 +432,18 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, goto out; } - if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, + mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, &input->bstatus_read, &status, - hdcp, "bstatus_read")) - goto out; - if (!mod_hdcp_execute_and_set(check_link_integrity_dp, + hdcp, "bstatus_read"); + + mod_hdcp_execute_and_set(check_link_integrity_dp, &input->link_integrity_check, &status, - hdcp, "link_integrity_check")) - goto out; - if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, + hdcp, "link_integrity_check"); + + mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, - hdcp, "reauth_request_check")) - goto out; + hdcp, "reauth_request_check"); + out: return status; } From 3c9154310af71802de2f40c5689b66009a407f37 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 25 Jun 2024 15:22:25 -0400 Subject: [PATCH 0056/1523] drm/amd/display: fix dscclk programming sequence on DCN401 [why] The mux to switch between refclk and dto_dsc_clk is non double buffered. However dto dsc clk's phase and modulo divider registers are currently configured as double buffered update. This causes a problem when we switch to use dto dsc clk and program phase and modulo in the same sequence. In this sequence dsc clk is switched to dto but the clock divider programming doesn't take effect until next frame. When we try to program DSCC registers, SMN bus will hang because dto dsc clk divider phase is set to 0. [how] Configure phase and modulo to take effect immediately. Always switch to dto dsc clk before DSC clock is unagted. Switch back to refclk after DSC clock is gated. Acked-by: Rodrigo Siqueira Reviewed-by: Jerry Zuo Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn20/dcn20_dccg.h | 6 +-- .../amd/display/dc/dccg/dcn401/dcn401_dccg.c | 32 ++++++++++----- .../amd/display/dc/dccg/dcn401/dcn401_dccg.h | 4 -- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 21 +++------- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +-- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 5 +-- .../gpu/drm/amd/display/dc/link/link_dpms.c | 41 ++++++++++++------- 8 files changed, 58 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 1e0292861244..6ac2bd86c4db 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -346,11 +346,7 @@ type SYMCLK32_LE3_SRC_SEL;\ type SYMCLK32_LE2_EN;\ type SYMCLK32_LE3_EN;\ - type DP_DTO_ENABLE[MAX_PIPES];\ - type DSCCLK0_DTO_DB_EN;\ - type DSCCLK1_DTO_DB_EN;\ - type DSCCLK2_DTO_DB_EN;\ - type DSCCLK3_DTO_DB_EN; + type DP_DTO_ENABLE[MAX_PIPES]; struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 07f1f396ba52..0b889004509a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -730,35 +730,35 @@ void dccg401_init(struct dccg *dccg) } } -static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, bool enable) +static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - uint32_t phase = enable ? 1 : 0; switch (inst) { case 0: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1, DSCCLK0_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, - DSCCLK0_DTO_PHASE, phase, + DSCCLK0_DTO_PHASE, 1, DSCCLK0_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); + break; case 1: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1, DSCCLK1_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, - DSCCLK1_DTO_PHASE, phase, + DSCCLK1_DTO_PHASE, 1, DSCCLK1_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1, DSCCLK2_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, - DSCCLK2_DTO_PHASE, phase, + DSCCLK2_DTO_PHASE, 1, DSCCLK2_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1, DSCCLK3_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, - DSCCLK3_DTO_PHASE, phase, + DSCCLK3_DTO_PHASE, 1, DSCCLK3_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1); break; default: BREAK_TO_DEBUGGER(); @@ -774,15 +774,27 @@ static void dccg401_set_ref_dscclk(struct dccg *dccg, switch (dsc_inst) { case 0: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 0); break; case 1: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 0); break; case 2: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 0); break; case 3: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); break; default: return; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h index 8bcddc836347..a196ce9e8127 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h @@ -117,10 +117,6 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_DB_EN, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 2532ad410cb5..ea9bedf65d84 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2186,9 +2186,9 @@ static void post_unlock_reset_opp(struct dc *dc, * yet power gated. */ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc); + dsc->funcs->dsc_disable(dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, dsc->inst); - dsc->funcs->dsc_disable(dsc); } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 05d8f81daa06..4534843ba66a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1029,24 +1029,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } - dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; - dsc_cfg.pic_width *= opp_cnt; - optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED; - /* Enable DSC in OPTC */ DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst); pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg, @@ -1060,13 +1056,9 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* only disconnect DSC block, DSC is disabled when OPP head pipe is reset */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); + dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); } } @@ -1137,10 +1129,7 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; - struct dccg *dccg = dc->res_pool->dccg; - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, false); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 2c50c0f745a0..b9378f18c020 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1542,7 +1542,6 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, struct pipe_ctx *old_pipe; struct pipe_ctx *new_pipe; struct pipe_ctx *old_opp_heads[MAX_PIPES]; - struct dccg *dccg = dc->res_pool->dccg; struct pipe_ctx *old_otg_master; int old_opp_head_count = 0; @@ -1568,12 +1567,9 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, for (i = 0; i < old_opp_head_count; i++) { old_pipe = old_opp_heads[i]; new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx]; - if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) { - dccg->funcs->set_dto_dscclk(dccg, - old_pipe->stream_res.dsc->inst, false); + if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) old_pipe->stream_res.dsc->funcs->dsc_disconnect( old_pipe->stream_res.dsc); - } } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 4fb1aacee894..d619eb229a62 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -211,10 +211,7 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); - void (*set_dto_dscclk)( - struct dccg *dccg, - uint32_t dsc_inst, - bool enable); + void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 65607589495f..d6550b904b16 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -817,17 +817,17 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -879,19 +879,32 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - pipe_ctx->stream_res.dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); - if (dccg->funcs->set_ref_dscclk) - dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); - pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); + for (odm_pipe = pipe_ctx; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); + /* + * TODO - dsc_disconnect is a double buffered register. + * by the time we call dsc_disable, dsc may still remain + * connected to OPP. In this case OPTC will no longer + * get correct pixel data because DSCC is off. However + * we also can't wait for the disconnect pending + * complete, because this function can be called + * with/without OTG master lock acquired. When the lock + * is acquired we will never get pending complete until + * we release the lock later. So there is no easy way to + * solve this problem especially when the lock is + * acquired. DSC is a front end hw block it should be + * programmed as part of front end sequence, where the + * commit sequence without lock and update sequence + * with lock are completely separated. However because + * we are programming dsc as part of back end link + * programming sequence, we don't know if front end OPTC + * master lock is acquired. The back end should be + * agnostic to front end lock. DSC programming shouldn't + * belong to this sequence. + */ + odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); - odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } } From 5fc77c26970d443f1c020ee8a5d475ad6b81e15f Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 31 May 2024 11:37:15 -0400 Subject: [PATCH 0057/1523] drm/amd/display: apply vmin optimization even if it doesn't reach vmin level [why] Based on power measurement result, in most cases when display clock is higher than Vmin display clock, lowering display clock using dynamic ODM will improve overall power consumption by 0 to 4 watts even if we can't reach Vmin. [how] Allow vmin optimization applied even if dispclk can't reach Vmin. Reviewed-by: Jun Lei Signed-off-by: Jerry Zuo Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 14 +++++++++----- .../display/dc/dml2/dml21/src/dml2_top/dml_top.c | 13 +++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 603036df68ba..60a9faf81d3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -591,6 +591,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -611,28 +613,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 2fb3e2f45e07..b25e9230adea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -268,9 +268,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* From c83ecc0bee94995117329fccbfa6e8b43ce56044 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Thu, 27 Jun 2024 17:41:36 -0400 Subject: [PATCH 0058/1523] drm/amd/display: Implement bias and scale pre scl why: New scaler needs the input to be full range color space. This will also fix issues that come up due to not having a predefined limited color space matrix for certain color spaces how: Use bias and scale HW to expand the range of limited color spaces to full before the scaler Reviewed-by: Krunoslav Kovac Signed-off-by: Jerry Zuo Signed-off-by: Relja Vojvodic Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 6 ++--- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- .../drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c | 27 ++++++++++++++++++- .../drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h | 3 +++ .../amd/display/dc/dpp/dcn401/dcn401_dpp.c | 3 ++- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 +-- .../gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 13 ++++----- 7 files changed, 43 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 87e36d51c56d..9e42a0128baa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -901,12 +901,12 @@ void hwss_program_bias_and_scale(union block_sequence_params *params) struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); - if (dpp->funcs->dpp_program_bias_and_scale) + if (dpp->funcs->dpp_program_bias_and_scale) { dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + } } void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4c9bb913125d..83fe13f5a367 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1292,7 +1292,7 @@ struct dc_plane_state { struct dc_gamma gamma_correction; struct dc_transfer_func in_transfer_func; - struct dc_bias_and_scale *bias_and_scale; + struct dc_bias_and_scale bias_and_scale; struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; struct fixed31_32 hdr_mult; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index e16274fee31d..8473c694bfdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -59,6 +59,31 @@ void dpp35_dppclk_control( DISPCLK_R_GATE_DISABLE, 0); } +void dpp35_program_bias_and_scale_fcnv( + struct dpp *dpp_base, + struct dc_bias_and_scale *params) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + + if (!params->bias_and_scale_valid) { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, 0); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, 0); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, 0); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, 0x1F000); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, 0x1F000); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, 0x1F000); + } else { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, params->bias_red); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, params->bias_green); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, params->bias_blue); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, params->scale_red); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, params->scale_green); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, params->scale_blue); + } +} + static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, .dpp_read_state = dpp30_read_state, @@ -81,7 +106,7 @@ static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp3_set_cursor_attributes, .set_cursor_position = dpp1_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h index 135872d88219..3ca339a16e5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h @@ -61,4 +61,7 @@ bool dpp35_construct(struct dcn3_dpp *dpp3, struct dc_context *ctx, void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable); +void dpp35_program_bias_and_scale_fcnv(struct dpp *dpp_base, + struct dc_bias_and_scale *bias_and_scale); + #endif // __DCN35_DPP_H diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 7cae18fd7be9..97bf26fa3573 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -30,6 +30,7 @@ #include "basics/conversion.h" #include "dcn30/dcn30_cm_common.h" #include "dcn32/dcn32_dpp.h" +#include "dcn35/dcn35_dpp.h" #define REG(reg)\ dpp->tf_regs->reg @@ -240,7 +241,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp401_set_cursor_attributes, .set_cursor_position = dpp401_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index ea9bedf65d84..9a00479f0417 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1698,7 +1698,7 @@ static void dcn20_update_dchubp_dpp( plane_state->update_flags.bits.input_csc_change || plane_state->update_flags.bits.color_space_change || plane_state->update_flags.bits.coeff_reduction_change) { - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; // program the input csc dpp->funcs->dpp_setup(dpp, @@ -1715,7 +1715,6 @@ static void dcn20_update_dchubp_dpp( } if (dpp->funcs->dpp_program_bias_and_scale) { //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 27bba47186e9..41c76ba9ba56 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -217,12 +217,13 @@ enum optc_dsc_mode { }; struct dc_bias_and_scale { - uint16_t scale_red; - uint16_t bias_red; - uint16_t scale_green; - uint16_t bias_green; - uint16_t scale_blue; - uint16_t bias_blue; + uint32_t scale_red; + uint32_t bias_red; + uint32_t scale_green; + uint32_t bias_green; + uint32_t scale_blue; + uint32_t bias_blue; + bool bias_and_scale_valid; }; enum test_pattern_dyn_range { From 218784049f4b90834dd7b854857df0b7e0f76567 Mon Sep 17 00:00:00 2001 From: Jingwen Zhu Date: Tue, 25 Jun 2024 16:20:58 +0800 Subject: [PATCH 0059/1523] drm/amd/display: avoid disable otg when dig was disabled [Why] This is a workaround for an dcn3.1 hang that happens if otg dispclk is ramped while otg is on and stream enc is off. But this w/a should not trigger when we have a dig active. [How] Avoid disable otg when dig FE/BE FIFO was not switched. Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Jingwen Zhu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 18 +++++++++++++++--- .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 9 +++++++++ .../drm/amd/display/dc/inc/hw/stream_encoder.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 70ee0089a20d..248d22b23a6d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -120,7 +120,6 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } - static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower, bool disable) { @@ -128,14 +127,27 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; - + bool stream_changed_otg_dig_on = false; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; + stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream && + old_pipe->stream != new_pipe->stream && + old_pipe->stream_res.tg == new_pipe->stream_res.tg && + new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && + new_pipe->stream->link->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link->link_enc) && + new_pipe->stream_res.stream_enc && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc)) { + !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + /* This w/a should not trigger when we have a dig active */ if (disable) { if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6a179e5ab417..fcc88ef83e6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -392,6 +392,14 @@ static void enc35_reset_fifo(struct stream_encoder *enc, bool reset) udelay(10); } +static bool enc35_is_fifo_enabled(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t reset_val; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); + return (reset_val == 0) ? false : true; +} void enc35_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); @@ -465,6 +473,7 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .set_input_mode = enc314_set_dig_input_mode, .enable_fifo = enc35_enable_fifo, .disable_fifo = enc35_disable_fifo, + .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index e5e11c84e9e2..6fe42120738d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -271,6 +271,7 @@ struct stream_encoder_funcs { struct stream_encoder *enc, unsigned int pix_per_container); void (*enable_fifo)(struct stream_encoder *enc); void (*disable_fifo)(struct stream_encoder *enc); + bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); }; From f5c783868855bbecfc4311050dbaca4cb28ada5e Mon Sep 17 00:00:00 2001 From: Sridevi Arvindekar Date: Thu, 27 Jun 2024 09:39:33 -0400 Subject: [PATCH 0060/1523] drm/amd/display: Add option to allow transition when odm is forced Added option to allow transition for forced odm. Add the variation to the nightly run. Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Sridevi Arvindekar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 387b392f4c0d..c35029c65223 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4338,7 +4338,8 @@ static void backup_and_set_minimal_pipe_split_policy(struct dc *dc, dc->debug.force_disable_subvp = true; for (i = 0; i < context->stream_count; i++) { policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments; - context->streams[i]->debug.force_odm_combine_segments = 0; + if (context->streams[i]->debug.allow_transition_for_forced_odm) + context->streams[i]->debug.force_odm_combine_segments = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 8ebd7e9e776e..3d9ee4da7056 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -159,6 +159,12 @@ struct test_pattern { struct dc_stream_debug_options { char force_odm_combine_segments; + /* + * When force_odm_combine_segments is non zero, allow dc to + * temporarily transition to ODM bypass when minimal transition state + * is required to prevent visual glitches showing on the screen + */ + char allow_transition_for_forced_odm; }; #define LUMINANCE_DATA_TABLE_SIZE 10 From b3c9c9affd71f1f9b136534d6f56f392edb78620 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 27 Jun 2024 10:42:26 -0400 Subject: [PATCH 0061/1523] drm/amd/display: When resync fifo ensure to use correct pipe ctx We resync the FIFO after each pipe update in apply_ctx_to_hw. However, this means that some pipes (in hardware) are based on the new context and some are based on the current_state (since the pipes are updated on at a time). In this case we must ensure to use the pipe_ctx that's currently still configured in hardware when turning off / on OTG's and reconfiguring ODM during the resync. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 13 +++++++++--- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.h | 2 +- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 20 ++++++++++++++----- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h | 2 +- .../display/dc/hwss/hw_sequencer_private.h | 3 ++- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 51c5195f8325..982b2d5bfb5f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2443,7 +2443,7 @@ enum dc_status dce110_apply_ctx_to_hw( #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 388404cdeeaa..4e93eeedfc1b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -355,14 +355,18 @@ void dcn314_calculate_pix_rate_divider( } } -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + } if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -377,7 +381,10 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index fb4f90f61b22..2305ad282f21 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -41,7 +41,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig void dcn314_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream); -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 4534843ba66a..7f41eccefe02 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1201,20 +1201,27 @@ void dcn32_calculate_pix_rate_divider( } } -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; + struct dc_state *dc_state = NULL; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc_state = context; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + dc_state = dc->current_state; + } if (!resource_is_pipe_type(pipe, OTG_MASTER)) continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1224,7 +1231,10 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; @@ -1572,7 +1582,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index db562e45d6ff..b1563e2c0491 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -75,7 +75,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn32_subvp_pipe_control_lock(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7ac3f2a09487..7a75ff320511 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -170,7 +170,8 @@ struct hwseq_private_funcs { unsigned int *k1_div, unsigned int *k2_div); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, - struct dc_state *context); + struct dc_state *context, + unsigned int current_pipe_idx); enum dc_status (*apply_single_controller_ctx_to_hw)( struct pipe_ctx *pipe_ctx, struct dc_state *context, From c18fa08e6fd8952e88fa9ad108371c2bd5b82564 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 27 Jun 2024 11:56:47 -0400 Subject: [PATCH 0062/1523] drm/amd/display: Disable subvp based on HW cursor requirement [Description] - There are situations where HW cursor is required - In these scenarios we should disable subvp based on the HW cursor requirement Reviewed-by: Dillon Varone Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 3 +++ .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c35029c65223..9897e322e2d5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2717,6 +2717,10 @@ static enum surface_update_type check_update_surfaces_for_stream( overall_type = UPDATE_TYPE_FULL; } + if (stream_update && stream_update->hw_cursor_req) { + overall_type = UPDATE_TYPE_FULL; + } + /* some stream updates require passive update */ if (stream_update) { union stream_update_flags *su_flags = &stream_update->stream->update_flags; @@ -3012,6 +3016,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->hw_cursor_req) + stream->hw_cursor_req = *update->hw_cursor_req; + if (update->allow_freesync) stream->allow_freesync = *update->allow_freesync; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 3d9ee4da7056..de9bd72ca514 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -266,6 +266,8 @@ struct dc_stream_state { struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; + bool hw_cursor_req; + uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode /* from stream struct */ @@ -350,6 +352,7 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; + bool *hw_cursor_req; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 7c73efe19525..405544920f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -516,7 +516,7 @@ static void populate_dml21_stream_overrides_from_stream_state( if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || stream->debug.force_odm_combine_segments > 0) stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req; } static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) From d94df7cad26b4c5958fbfa550d65794c89aa90ba Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Fri, 28 Jun 2024 16:05:36 -0400 Subject: [PATCH 0063/1523] drm/amd/display: Fix DP-DVI dongle hotplug [why] Hotplugging with a DVI-DP dongle on pre-rdna embedded platform working about half the time. The regression was found to be the setting of link->type here. [what] Reverts fix besides the logging added. Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index bba644024780..391dbe81534d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -863,7 +863,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; - enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -965,7 +964,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { - link->type = pre_connection_type; if (prev_sink) dc_sink_release(prev_sink); @@ -1299,8 +1297,7 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) link->dpcd_caps.is_mst_capable) is_delegated_to_mst_top_mgr = discover_dp_mst_topology(link, reason); - if (is_local_sink_detect_success && - pre_link_type == dc_connection_mst_branch && + if (pre_link_type == dc_connection_mst_branch && link->type != dc_connection_mst_branch) is_delegated_to_mst_top_mgr = link_reset_cur_dp_mst_topology(link); From f60881ca126cf825b89b4118e93dbd82ea9bcf33 Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Wed, 26 Jun 2024 18:03:16 +0530 Subject: [PATCH 0064/1523] drm/amd/display: Refactoring OPP [Why] To refactor OPP files [How] Moved opp related files to specific opp folder and updated Makefiles. Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Revalla Hari Krishna Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/opp/Makefile | 16 ++++++++++++++++ .../amd/display/dc/{ => opp}/dcn10/dcn10_opp.c | 0 .../amd/display/dc/{ => opp}/dcn10/dcn10_opp.h | 0 .../amd/display/dc/{ => opp}/dcn20/dcn20_opp.c | 0 .../amd/display/dc/{ => opp}/dcn20/dcn20_opp.h | 0 7 files changed, 17 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn10/dcn10_opp.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn10/dcn10_opp.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn20/dcn20_opp.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => opp}/dcn20/dcn20_opp.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 9923d0d620d4..75e088b479ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_opp.o \ dcn10_mpc.o \ dcn10_cm_common.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index b3aeabc4d605..744a6c4ac816 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_opp.o dcn20_mmhubbub.o \ +DCN20 = dcn20_mpc.o dcn20_mmhubbub.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/opp/Makefile b/drivers/gpu/drm/amd/display/dc/opp/Makefile index fbfb3c3ad819..1be76754db30 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/Makefile +++ b/drivers/gpu/drm/amd/display/dc/opp/Makefile @@ -25,6 +25,22 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN10 +############################################################################### +OPP_DCN10 = dcn10_opp.o + +AMD_DAL_OPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/opp/dcn10/,$(OPP_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN10) +############################################################################### +# DCN20 +############################################################################### +OPP_DCN20 = dcn20_opp.o + +AMD_DAL_OPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/opp/dcn20/,$(OPP_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN20) +############################################################################### # DCN35 ############################################################################### OPP_DCN35 = dcn35_opp.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h rename to drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h From b995c0a6de6c74656a0c39cd57a0626351b13e3c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 18 Jun 2024 14:05:08 -0600 Subject: [PATCH 0065/1523] drm/amd/display: Initialize denominators' default to 1 [WHAT & HOW] Variables used as denominators and maybe not assigned to other values, should not be 0. Change their default to 1 so they are never 0. This fixes 10 DIVIDE_BY_ZERO issues reported by Coverity. Reviewed-by: Harry Wentland Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c | 2 +- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 7c56ad0f8812..e7019c95ba79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index dae13f202220..d8bfc85e5dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -39,7 +39,7 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 81f0a6f19f87..679b20031903 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -9386,8 +9386,8 @@ static void CalculateVMGroupAndRequestTimes( double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { - unsigned int num_group_per_lower_vm_stage = 0; - unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage = 1; + unsigned int num_req_per_lower_vm_stage = 1; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); From 367cd9ceba1933b63bc1d87d967baf6d9fd241d2 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 17:34:18 -0600 Subject: [PATCH 0066/1523] drm/amd/display: Check null-initialized variables [WHAT & HOW] drr_timing and subvp_pipe are initialized to null and they are not always assigned new values. It is necessary to check for null before dereferencing. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Nevenko Stupar Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9d399c4ce957..4cb0227bdd27 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -871,8 +871,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, * and the max of (VBLANK blanking time, MALL region)). */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + if (drr_timing && + stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) schedulable = true; return schedulable; @@ -937,7 +938,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } - if (found) { + if (found && subvp_pipe) { phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; phantom_timing = &phantom_stream->timing; From 3718a619a8c0a53152e76bb6769b6c414e1e83f4 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 20 Jun 2024 20:23:41 -0600 Subject: [PATCH 0067/1523] drm/amd/display: Check phantom_stream before it is used dcn32_enable_phantom_stream can return null, so returned value must be checked before used. This fixes 1 NULL_RETURNS issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 3ed6d1fa0c44..ee009716d39b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1717,6 +1717,9 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't // already have phantom pipe assigned, etc.) by previous checks. phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); + if (!phantom_stream) + return; + dcn32_enable_phantom_plane(dc, context, phantom_stream, index); for (i = 0; i < dc->res_pool->pipe_count; i++) { From 5559598742fb4538e4c51c48ef70563c49c2af23 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 11:51:27 -0600 Subject: [PATCH 0068/1523] drm/amd/display: Pass non-null to dcn20_validate_apply_pipe_split_flags [WHAT & HOW] "dcn20_validate_apply_pipe_split_flags" dereferences merge, and thus it cannot be a null pointer. Let's pass a valid pointer to avoid null dereference. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 3 ++- drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 5e7cfa8e8ec9..eea2b3b307cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2040,6 +2040,7 @@ bool dcn20_fast_validate_bw( { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -2064,7 +2065,7 @@ bool dcn20_fast_validate_bw( if (vlevel > context->bw_ctx.dml.soc.num_states) goto validate_fail; - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); /*initialize pipe_just_split_from to invalid idx*/ for (i = 0; i < MAX_PIPES; i++) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 8663cbc3d1cf..347e6aaea582 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -774,6 +774,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -816,7 +817,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, goto validate_fail; } - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; From 1ff12bcd7deaeed25efb5120433c6a45dd5504a8 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 17:38:16 -0600 Subject: [PATCH 0069/1523] drm/amd/display: Check null pointers before using them [WHAT & HOW] These pointers are null checked previously in the same function, indicating they might be null as reported by Coverity. As a result, they need to be checked when used again. This fixes 3 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ea1e2d8dcd8c..92774a871e98 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7195,6 +7195,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; + if (!dm_state) + return NULL; + do { stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, @@ -9302,7 +9305,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (acrtc) old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - if (!acrtc->wb_enabled) + if (!acrtc || !acrtc->wb_enabled) continue; dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -9706,9 +9709,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); - hdcp_update_display( - adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, - new_con_state->hdcp_content_type, enable_encryption); + if (aconnector->dc_link) + hdcp_update_display( + adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, + new_con_state->hdcp_content_type, enable_encryption); } } From 35ff747c86767937ee1e0ca987545b7eed7a0810 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 20:05:14 -0600 Subject: [PATCH 0070/1523] drm/amd/display: Check stream before comparing them [WHAT & HOW] amdgpu_dm can pass a null stream to dc_is_stream_unchanged. It is necessary to check for null before dereferencing them. This fixes 1 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bcb5267b5a6b..83aa3d8a997a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3241,6 +3241,8 @@ static bool are_stream_backends_same( bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { + if (!old_stream || !stream) + return false; if (!are_stream_backends_same(old_stream, stream)) return false; From 892abca6877a96c9123bb1c010cafccdf8ca1b75 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 28 Jun 2024 15:09:06 -0400 Subject: [PATCH 0071/1523] drm/amd/display: Deallocate DML memory if allocation fails [Why] When DC state create DML memory allocation fails, memory is not deallocated subsequently, resulting in uninitialized structure that is not NULL. [How] Deallocate memory if DML memory allocation fails. Reviewed-by: Joshua Aberback Signed-off-by: Jerry Zuo Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e990346e51f6..665157f8d4cb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -211,10 +211,16 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) { dml2_opt->use_clock_dc_limits = false; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2)) { + dc_state_release(state); + return NULL; + } dml2_opt->use_clock_dc_limits = true; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source)) { + dc_state_release(state); + return NULL; + } } #endif From 906fd46a65383cd639e5eec72a047efc33045d86 Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Tue, 2 Jul 2024 17:17:40 +0530 Subject: [PATCH 0072/1523] drm/amd/display: Refactoring MMHUBBUB [Why] To refactor MMHUBBUB files [How] Moved mmhubbub files from dcn20 to /mmhubbub/ folder and update makefile to fix compilation. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Revalla Hari Krishna Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile | 9 +++++++++ .../amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.c | 0 .../amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.h | 0 4 files changed, 10 insertions(+), 1 deletion(-) rename drivers/gpu/drm/amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mmhubbub}/dcn20/dcn20_mmhubbub.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 744a6c4ac816..d92d2b4ee015 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_mmhubbub.o \ +DCN20 = dcn20_mpc.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile index 505bc0517e08..eab196c57c6c 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN20 +############################################################################### +MMHUBBUB_DCN20 = dcn20_mmhubbub.o + +AMD_DAL_MMHUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn20/,$(MMHUBBUB_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN20) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h rename to drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h From 5d2c102deff63ff8980dfa848ee41858d255c291 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Tue, 2 Jul 2024 13:30:36 -0400 Subject: [PATCH 0073/1523] drm/amd/display: Do 1-to-1 mapping between OPP and DSC in DML2 [why] To determine which block instance to power-gate, we look at the available pipe resource for both plane and stream. On MPO, DSC3 was falsely powered on even though only 1 stream path was enabled because the resource mapping was not done correctly. [how] Acquire the correct DSC instance to power on / off based on the instance of OPP which determines the backend pipe index. Reviewed-by: Swapnil Patel Signed-off-by: Jerry Zuo Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 83aa3d8a997a..4f5b23520365 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5166,7 +5166,7 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; if (sec_pipe->stream->timing.flags.DSC == 1) { #if defined(CONFIG_DRM_AMD_DC_FP) - dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst); #endif ASSERT(sec_pipe->stream_res.dsc); if (sec_pipe->stream_res.dsc == NULL) From 2563391e57b5a9c1d83fd36c05ac4cbafeb5efe6 Mon Sep 17 00:00:00 2001 From: Chaitanya Dhere Date: Tue, 2 Jul 2024 10:50:04 -0400 Subject: [PATCH 0074/1523] drm/amd/display: DML2.1 resynchronization July update for DML2.1 library from hardware team targeting DCN401 Reviewed-by: Aurabindo Pillai Signed-off-by: Jerry Zuo Signed-off-by: Chaitanya Dhere Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 3 - .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 13 +- .../amd/display/dc/dml2/dml21/inc/dml_top.h | 1 - .../dml2/dml21/inc/dml_top_dchub_registers.h | 1 - .../dml21/inc/dml_top_display_cfg_types.h | 2 +- .../dc/dml2/dml21/inc/dml_top_policy_types.h | 1 - .../dml21/inc/dml_top_soc_parameter_types.h | 6 +- .../display/dc/dml2/dml21/inc/dml_top_types.h | 2 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 45 +- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.h | 1 - .../src/dml2_core/dml2_core_dcn4_calcs.c | 180 ++- .../src/dml2_core/dml2_core_dcn4_calcs.h | 2 +- .../dml21/src/dml2_core/dml2_core_factory.c | 1 - .../dml21/src/dml2_core/dml2_core_factory.h | 1 - .../dml21/src/dml2_core/dml2_core_shared.c | 40 +- .../dml21/src/dml2_core/dml2_core_shared.h | 1 - .../src/dml2_core/dml2_core_shared_types.h | 6 +- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 3 +- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h | 1 - .../dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 1 - .../dml21/src/dml2_dpmm/dml2_dpmm_factory.h | 1 - .../dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c | 1 - .../dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h | 1 - .../dml21/src/dml2_mcg/dml2_mcg_factory.c | 1 - .../dml21/src/dml2_mcg/dml2_mcg_factory.h | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h | 1 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c | 1250 ----------------- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h | 25 - .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 545 ++++--- .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h | 1 - .../dml21/src/dml2_pmo/dml2_pmo_factory.c | 6 +- .../dml21/src/dml2_pmo/dml2_pmo_factory.h | 1 - .../lib_float_math.c | 1 - .../lib_float_math.h | 1 - .../src/dml2_top/dml2_top_optimization.c | 1 - .../src/dml2_top/dml2_top_optimization.h | 1 - .../dc/dml2/dml21/src/dml2_top/dml_top.c | 30 +- .../dml2/dml21/src/dml2_top/dml_top_mcache.c | 1 - .../dml2/dml21/src/dml2_top/dml_top_mcache.h | 1 - .../dc/dml2/dml21/src/inc/dml2_debug.c | 1 - .../dc/dml2/dml21/src/inc/dml2_debug.h | 1 - .../src/inc/dml2_internal_shared_types.h | 27 +- 43 files changed, 554 insertions(+), 1657 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index fea857214c0f..dcd01fe04296 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -87,7 +87,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_c CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) @@ -110,7 +109,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) @@ -132,7 +130,6 @@ DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o DML21 += src/dml2_mcg/dml2_mcg_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn3.o -DML21 += src/dml2_pmo/dml2_pmo_dcn4.o DML21 += src/dml2_pmo/dml2_pmo_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o DML21 += src/dml2_standalone_libraries/lib_float_math.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index fe07fcc3d0d5..5af94f06c667 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -344,6 +344,9 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, .subvp_drr_scheduling_margin_us = 100, .subvp_prefetch_end_to_mall_start_us = 15, .subvp_fw_processing_delay = 15, @@ -351,14 +354,18 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, + .scheduling_delay_us = 125, .vertical_interrupt_ack_delay_us = 18, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, + .subvp_programming_delay_us = 200, .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a25f4e5977cf..a64ec4dcf11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_H__ #define __DML_TOP_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 8247289ce7d3..83fc15bf13cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __dml2_TOP_DCHUB_REGISTERS_H__ #define __dml2_TOP_DCHUB_REGISTERS_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index daae77f2672b..fe153f4edaf5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ #define __DML_TOP_DISPLAY_CFG_TYPES_H__ @@ -478,6 +477,7 @@ struct dml2_display_cfg { bool max_outstanding_when_urgent_expected_disable; bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming unsigned int best_effort_min_active_latency_hiding_us; + bool all_streams_blanked; } overrides; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h index 2f444f448770..8f624a912e78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_POLICY_TYPES_H__ #define __DML_TOP_POLICY_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 065b2afab6fb..99d775adc3e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ #define __DML_TOP_SOC_PARAMETER_TYPES_H__ @@ -173,6 +172,7 @@ struct dml2_ip_capabilities { unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int hostvm_mode; unsigned int subvp_drr_scheduling_margin_us; unsigned int subvp_prefetch_end_to_mall_start_us; @@ -190,6 +190,10 @@ struct dml2_ip_capabilities { unsigned int subvp_programming_delay_us; unsigned int subvp_prefetch_to_mall_delay_us; unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; } fams2; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 8aa77bb190ea..c47a07f473e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -262,6 +262,7 @@ union dml2_global_sync_programming { unsigned int vupdate_offset_pixels; unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; + unsigned int pstate_keepout_start_lines; } dcn4; }; @@ -411,6 +412,7 @@ struct dml2_display_cfg_programming { /* indicates this configuration requires FW to support */ bool fams2_required; + struct dmub_cmd_fams2_global_config fams2_global_config; struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 04edcde423a9..b7a6f7f4c342 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml2_core_shared_types.h" #include "dml2_core_dcn4.h" @@ -70,6 +69,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .max_num_dp2p0_streams = 4, .imall_supported = 1, .max_flip_time_us = 80, + .max_flip_time_lines = 32, .words_per_channel = 16, .subvp_fw_processing_delay_us = 15, @@ -169,6 +169,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; // FIXME_STAGE2: cleanup after adding all dv override to ip_caps @@ -192,6 +193,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; } @@ -222,6 +224,7 @@ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) } memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); return true; } @@ -246,10 +249,12 @@ static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *p phantom->stream_index = phantom_stream_index; phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane0.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); - phantom->composition.viewport.plane1.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane1.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); phantom->immediate_flip = false; phantom->dynamic_meta_data.enable = false; phantom->cursor.num_cursors = 0; @@ -344,6 +349,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Check if FAMS2 is required if (display_cfg->stage3.performed && display_cfg->stage3.success) { programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) @@ -641,20 +648,20 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else { - if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; - else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; - else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - } + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h index 235280c6dcf5..e62b2d3eeee6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_H__ #define __DML2_CORE_DCN4_H__ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 6f4026e396e0..45e43a915fd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -10,10 +10,7 @@ #include "dml_top_types.h" #include "dml2_core_shared.h" -#define DML_VM_PTE_ADL_PATCH_EN //#define DML_TVM_UPDATE_EN -#define DML_TDLUT_ROW_BYTES_FIX_EN -#define DML_REG_LIMIT_CLAMP_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 @@ -235,6 +232,7 @@ dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStart dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); @@ -2343,16 +2341,16 @@ static void calculate_mcache_row_bytes( } if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { - meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; } *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float) blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -3841,7 +3839,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; if (*p->UnboundedRequestEnabled) { *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, - (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); @@ -3852,21 +3850,20 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; - if (!p->mrq_present) { - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(*p->UnboundedRequestEnabled) - && p->display_cfg->plane_descriptors[k].surface.dcc.enable - && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) - *p->hw_debug5 = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!p->UnboundedRequestEnabled) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif - } } } @@ -4616,7 +4613,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -4980,11 +4977,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -5258,10 +5258,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; -#endif s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; @@ -5274,16 +5272,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->setup_for_tdlut) vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); -#else - tdlut_row_bytes = p->tdlut_pte_bytes_per_frame; -#endif -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { @@ -5542,11 +5534,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_equ = s->prefetch_bw4; } -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); @@ -6169,6 +6159,7 @@ static void CalculateFlipSchedule( unsigned int dpte_row_height_chroma, bool use_one_row_for_frame_flip, unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, unsigned int per_pipe_flip_bytes, unsigned int meta_row_bytes, unsigned int meta_row_height, @@ -6189,6 +6180,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); @@ -6239,7 +6231,8 @@ static void CalculateFlipSchedule( if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time - l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); //The lower bound on flip bandwidth // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required @@ -6541,7 +6534,8 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { - if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; @@ -6921,6 +6915,28 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -7126,7 +7142,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024.0 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7466,6 +7482,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.OutputRate[k], &mode_lib->ms.RequiredSlots[k]); + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + if (mode_lib->ms.RequiresDSC[k] == false) { mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; @@ -7819,7 +7839,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], mode_lib->ms.ODMMode[k], mode_lib->ip.maximum_dsc_bits_per_component, - mode_lib->ms.OutputBpp[k], + s->OutputBpp[k], display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8247,7 +8267,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8501,10 +8521,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -8845,6 +8870,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dpte_row_height_chroma[k], mode_lib->ms.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->ms.meta_row_bytes[k], s->meta_row_height_luma[k], @@ -8979,6 +9005,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } // End of Prefetch Check @@ -9373,11 +9401,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9406,11 +9432,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9535,17 +9559,16 @@ static void CalculateVMGroupAndRequestTimes( line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; -#ifdef DML_VM_PTE_ADL_PATCH_EN - if (num_group_per_lower_vm_stage_flip <= 2) { - num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; - } + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; - if (num_group_per_lower_vm_stage_pref <= 2) { - num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; - } -#endif - TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; - TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; if (num_req_per_lower_vm_stage_pref > 0) TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; else @@ -9599,10 +9622,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc bool FoundCriticalSurface = false; double LastZ8StutterPeriod = 0; - unsigned int SwathSizeCriticalSurface; - unsigned int LastChunkOfSwathSize; - unsigned int MissingPartOfLastSwathOfDETSize; - memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { @@ -9777,7 +9796,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) - / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); @@ -9871,19 +9890,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif - SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); - LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); - MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && - (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); - dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -10462,11 +10473,16 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10994,6 +11010,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.dpte_row_height_chroma[k], mode_lib->mp.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->mp.meta_row_bytes[k], mode_lib->mp.meta_row_height[k], @@ -11203,6 +11220,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); @@ -12190,6 +12209,7 @@ void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12197,6 +12217,18 @@ void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_disp dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); } +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; +} + void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, @@ -12209,6 +12241,11 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna unsigned int i; + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + /* from display configuration */ fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; @@ -12368,6 +12405,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp { double phantom_processing_delay_pix; unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; unsigned int phantom_v_active_lines; unsigned int phantom_v_startup_lines; unsigned int phantom_v_blank_lines; @@ -12384,7 +12422,9 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_lines++; phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); - phantom_v_active_lines = phantom_processing_delay_lines + dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) + mode_lib->ip.subvp_swath_height_margin_lines; + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; @@ -12396,8 +12436,8 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp // phantom_vtotal = vactive + vblank out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; - out->phantom_min_v_active = dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index); - out->phantom_v_startup = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index b280ab573fbb..df2d1550a14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_CALCS_H__ #define __DML2_CORE_DCN4_CALCS_H__ @@ -30,6 +29,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index f56abe9ab919..640087e862f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_core_factory.h" #include "dml2_core_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h index 53636a8f52aa..411c514fe65c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_FACTORY_H__ #define __DML2_CORE_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 679b20031903..6d7701a97d3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -2242,11 +2242,15 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou } double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -5050,7 +5054,7 @@ static void calculate_mcache_row_bytes( unsigned int meta_per_mvmpg_per_channel_ub = 0; if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { @@ -5059,7 +5063,7 @@ static void calculate_mcache_row_bytes( *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float)blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -7165,7 +7169,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -7501,11 +7505,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -7739,7 +7746,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); @@ -9304,6 +9310,10 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9332,6 +9342,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9451,6 +9464,14 @@ static void CalculateVMGroupAndRequestTimes( double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + if (num_group_per_lower_vm_stage_flip <= 2) { + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; + } + + if (num_group_per_lower_vm_stage_pref <= 2) { + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; + } + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; @@ -10388,11 +10409,16 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h index d76bda907ec8..f3356b072b59 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_SHARED_H__ #define __DML2_CORE_SHARED_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 1343b744eeb3..02498c0e3282 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_SHARED_TYPES_H__ #define __DML2_CORE_SHARED_TYPES_H__ @@ -70,6 +69,7 @@ struct dml2_core_ip_params { unsigned int words_per_channel; bool imall_supported; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int subvp_swath_height_margin_lines; unsigned int subvp_fw_processing_delay_us; unsigned int subvp_pstate_allow_width_us; @@ -782,6 +782,7 @@ struct dml2_core_internal_mode_program { unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int pstate_keepout_dst_lines[DML2_MAX_PLANES]; // Latency and Support double MaxActiveFCLKChangeLatencySupported; @@ -975,7 +976,7 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int DSCFormatFactor; struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES]; - unsigned int ReorderBytes; + unsigned int ReorderingBytes; double HostVMInefficiencyFactor; double HostVMInefficiencyFactorPrefetch; unsigned int TotalDCCActiveDPP; @@ -2010,6 +2011,7 @@ struct dml2_core_internal_scratch { struct dml2_core_internal_display_mode_lib { struct dml2_core_ip_params ip; struct dml2_soc_bb soc; + struct dml2_ip_capabilities ip_caps; //@brief Mode Support and Mode programming struct // Used to hold input; intermediate and output of the calculations diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index c94c4f32c957..0d847bccd5d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_dcn4.h" #include "dml2_internal_shared_types.h" #include "dml_top_types.h" @@ -181,7 +180,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h index 3afb69dfd040..b165c58dfd11 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_DCN4_H__ #define __DML2_DPMM_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 2c983daf2dad..dfd01440737d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_factory.h" #include "dml2_dpmm_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h index 80b44b4c2e68..20ba2e446f1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_FACTORY_H__ #define __DML2_DPMM_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index 5d8887ac766d..f4b1a7d02d42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_dcn4.h" #include "dml_top_soc_parameter_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h index 19d178651435..02da6f45cbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_DCN4_H__ #define __DML2_MCG_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index 55085b85f8ed..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_factory.h" #include "dml2_mcg_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h index 5dfdfed04e22..ad307deca3b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_FACTORY_H__ #define __DML2_MCG_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 671f9ac2627c..717536d7bb30 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn3.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h index cc350f88d4d2..f00bd9e72a86 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_DCN3_H__ #define __DML2_PMO_DCN3_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c deleted file mode 100644 index 8952dd7e36cb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c +++ /dev/null @@ -1,1250 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" - -static const int MIN_VACTIVE_MARGIN_US = 100; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const int SUBVP_DRR_MARGIN_US = 100; - -static const enum dml2_pmo_pstate_strategy full_strategy_list_1_display[][4] = { - // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_1_display_size = sizeof(full_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_2_display[][4] = { - // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_2_display_size = sizeof(full_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_3_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR -}; - -static const int full_strategy_list_3_display_size = sizeof(full_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_4_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR -}; - -static const int full_strategy_list_4_display_size = sizeof(full_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i; - int count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - free_pipes -= planes_on_stream; - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->fams_params.v1.subvp.fw_processing_delay_us = 10; - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us = 50; - pmo->fams_params.v1.subvp.refresh_rate_limit_max = 175; - pmo->fams_params.v1.subvp.refresh_rate_limit_min = 0; - - pmo->options = in_out->options; - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - } - - return true; -} - -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -static bool are_timings_trivially_synchronizable(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - bool identical = true; - bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, - &display_config->display_config.stream_descriptors[remap_array[i]].timing, - sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - for (i = 0; i < remap_array_size; i++) { - if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - return !contains_drr && identical; -} - -static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) -{ - *bit_field = *bit_field | (0x1 << bit_offset); -} - -static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) -{ - if (bit_field & (0x1 << bit_offset)) - return true; - - return false; -} - -static bool are_all_timings_drr_enabled(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - if (!display_config->display_config.stream_descriptors[i].timing.drr_config.enabled) - return false; - } - } - - return true; -} - -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) -{ - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - - scratch->pmo_dcn4.num_pstate_candidates++; -} - -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) -{ - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - - if (strategy == dml2_pmo_pstate_strategy_vactive) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) - return false; - } - } - - return true; -} - -static bool subvp_subvp_schedulable(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - unsigned char *svp_stream_indicies, char svp_stream_count) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - int i; - int microschedule_lines, time_us, refresh_hz; - int max_microschedule_us = 0; - int vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - const struct dml2_timing_cfg *svp_timing1 = 0; - const struct dml2_implicit_svp_meta *svp_meta1 = 0; - - const struct dml2_timing_cfg *svp_timing2 = 0; - - if (svp_stream_count <= 1) - return true; - else if (svp_stream_count > 2) - return false; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - for (i = 0; i < svp_stream_count; i++) { - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[i]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[i]]; - - microschedule_lines = svp_meta1->v_active; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (int)((microschedule_lines * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us + pmo->fams_params.v1.subvp.fw_processing_delay_us + 1); - - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - refresh_hz = (int)((double)(svp_timing1->pixel_clock_khz * 1000) / (svp_timing1->v_total * svp_timing1->h_total)); - - if (refresh_hz < pmo->fams_params.v1.subvp.refresh_rate_limit_min || - refresh_hz > pmo->fams_params.v1.subvp.refresh_rate_limit_max) { - return false; - } - } - - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[0]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[0]]; - - vactive1_us = (int)((svp_timing1->v_active * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - vblank1_us = (int)(((svp_timing1->v_total - svp_timing1->v_active) * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - svp_timing2 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[1]].timing; - - vactive2_us = (int)((svp_timing2->v_active * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - vblank2_us = (int)(((svp_timing2->v_total - svp_timing2->v_active) * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -static bool validate_svp_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask) -{ - bool result = false; - unsigned char stream_index; - - unsigned char svp_stream_indicies[2] = { 0 }; - unsigned char svp_stream_count = 0; - - // Find the SVP streams, store only the first 2, but count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - if (svp_stream_count < 2) - svp_stream_indicies[svp_stream_count] = stream_index; - - svp_stream_count++; - } - } - - if (svp_stream_count == 1) { - result = true; // 1 SVP is always co_functional - } else if (svp_stream_count == 2) { - result = subvp_subvp_schedulable(pmo, display_cfg, svp_stream_indicies, svp_stream_count); - } - - return result; -} - -static bool validate_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_stream_count++; - } - } - - return drr_stream_count <= 4; -} - -static bool validate_svp_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - int svp_stream_count = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int drr_frame_us = 0; // nominal frame time - int subvp_active_us = 0; - int stretched_drr_us = 0; - int drr_stretched_vblank_us = 0; - int max_vblank_mallregion = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *drr_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - drr_stream_count++; - } - } - - if (svp_stream_count == 1 && drr_stream_count == 1 && svp_timing != drr_timing) { - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) - * svp_timing->h_total / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - drr_frame_us = (int)(drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - - drr_stretched_vblank_us = (int)((drr_timing->v_total - drr_timing->v_active) * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000 + (stretched_drr_us - drr_frame_us)); - - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->drr_config.min_refresh_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - } - - return schedulable; -} - -static bool validate_svp_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int vblank_stream_mask) -{ - unsigned char stream_index; - int vblank_stream_count = 0; - int svp_stream_count = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *vblank_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int vblank_frame_us = 0; - int subvp_active_us = 0; - int vblank_blank_us = 0; - int max_vblank_mallregion = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(vblank_stream_mask, stream_index)) { - vblank_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - vblank_stream_count++; - } - } - - if (svp_stream_count == 1 && vblank_stream_count > 0) { - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) * svp_timing->h_total - / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_frame_us = (int)(vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_blank_us = (int)((vblank_timing->v_total - vblank_timing->v_active) * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -static bool validate_drr_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask, int vblank_stream_mask) -{ - return false; -} - -static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[4]) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - - unsigned char stream_index = 0; - - unsigned int svp_count = 0; - unsigned int svp_stream_mask = 0; - unsigned int drr_count = 0; - unsigned int drr_stream_mask = 0; - unsigned int vactive_count = 0; - unsigned int vactive_stream_mask = 0; - unsigned int vblank_count = 0; - unsigned int vblank_stream_mask = 0; - - bool strategy_matches_forced_requirements = true; - - bool admissible = false; - - // Tabulate everything - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { - strategy_matches_forced_requirements = false; - break; - } - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - svp_count++; - set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - drr_count++; - set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive) { - vactive_count++; - set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) { - vblank_count++; - set_bit_in_bitfield(&vblank_stream_mask, stream_index); - } - } - - if (!strategy_matches_forced_requirements) - return false; - - // Check for trivial synchronization for vblank - if (vblank_count > 0 && (pmo->options->disable_vblank || !are_timings_trivially_synchronizable(display_cfg, vblank_stream_mask))) - return false; - - if (svp_count > 0 && pmo->options->disable_svp) - return false; - - if (drr_count > 0 && (pmo->options->disable_drr_var || !are_all_timings_drr_enabled(display_cfg, drr_stream_mask))) - return false; - - // Validate for FAMS admissibiliy - if (svp_count == 0 && drr_count == 0) { - // No FAMS - admissible = true; - } else { - admissible = false; - if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count == 0) { - // All SVP - admissible = validate_svp_cofunctionality(pmo, display_cfg, svp_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // All DRR - admissible = validate_drr_cofunctionality(pmo, display_cfg, drr_stream_mask); - } else if (svp_count > 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // SVP + DRR - admissible = validate_svp_drr_cofunctionality(pmo, display_cfg, svp_stream_mask, drr_stream_mask); - } else if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count > 0) { - // SVP + VBlank - admissible = validate_svp_vblank_cofunctionality(pmo, display_cfg, svp_stream_mask, vblank_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count > 0) { - // DRR + VBlank - admissible = validate_drr_vblank_cofunctionality(pmo, display_cfg, drr_stream_mask, vblank_stream_mask); - } - } - - return admissible; -} - -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned char i; - int min_vactive_margin_us = 0xFFFFFFF; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) - min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; - } - } - - return min_vactive_margin_us; -} - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - struct dml2_pmo_scratch *s = &pmo->scratch; - - struct display_configuation_with_meta *display_config; - const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy (*strategy_list)[4] = 0; - unsigned int strategy_list_size = 0; - unsigned int plane_index, stream_index, i; - - state->performed = true; - - display_config = in_out->base_display_config; - display_config->display_config.overrides.enable_subvp_implicit_pmo = true; - - memset(s, 0, sizeof(struct dml2_pmo_scratch)); - - pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size - 1; - pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; - - set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - - // Figure out which streams can do vactive, and also build up implicit SVP meta - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= - MIN_VACTIVE_MARGIN_US) - set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); - - s->pmo_dcn4.stream_svp_meta[stream_index].valid = true; - s->pmo_dcn4.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - s->pmo_dcn4.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - s->pmo_dcn4.stream_svp_meta[stream_index].v_front_porch = 1; - } - - switch (display_config->display_config.num_streams) { - case 1: - strategy_list = full_strategy_list_1_display; - strategy_list_size = full_strategy_list_1_display_size; - break; - case 2: - strategy_list = full_strategy_list_2_display; - strategy_list_size = full_strategy_list_2_display_size; - break; - case 3: - strategy_list = full_strategy_list_3_display; - strategy_list_size = full_strategy_list_3_display_size; - break; - case 4: - strategy_list = full_strategy_list_4_display; - strategy_list_size = full_strategy_list_4_display_size; - break; - default: - strategy_list_size = 0; - break; - } - - if (strategy_list_size == 0) - return false; - - s->pmo_dcn4.num_pstate_candidates = 0; - - for (i = 0; i < strategy_list_size && i < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); - } - } - - if (s->pmo_dcn4.num_pstate_candidates > 0) { - // There's this funny case... - // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 - // Otherwise the current index should be -1 because we run the optimization at least once - s->pmo_dcn4.cur_pstate_candidate = 0; - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { - s->pmo_dcn4.cur_pstate_candidate = -1; - break; - } - } - return true; - } else { - return false; - } -} - -static void reset_display_configuration(struct display_configuation_with_meta *display_config) -{ - unsigned int plane_index; - unsigned int stream_index; - struct dml2_plane_parameters *plane; - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - display_config->stage3.stream_svp_meta[stream_index].valid = false; - } - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Unset SubVP - plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; - - // Remove reserve time - plane->overrides.reserved_vblank_time_ns = 0; - - // Reset strategy to auto - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; - } -} - -static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup DRR - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; - } - } -} - -static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; - } - } - - if (stream_index >= 0) { - display_config->stage3.stream_svp_meta[stream_index].valid = true; - display_config->stage3.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - display_config->stage3.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - display_config->stage3.stream_svp_meta[stream_index].v_front_porch = 1; - } -} - -static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup reserve time - plane->overrides.reserved_vblank_time_ns = 400 * 1000; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; - } - } -} - -static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - } -} - -static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_scratch *scratch, int strategy_index) -{ - bool success = true; - unsigned char stream_index; - - reset_display_configuration(display_config); - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { - success = false; - break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { - setup_planes_for_vblank_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - setup_planes_for_svp_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - setup_planes_for_drr_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { - setup_planes_for_vactive_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } - } - - return success; -} - -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) -{ - int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) - min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; - } - } - return min_time_us; -} - -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - bool p_state_supported = true; - unsigned int stream_index; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - if (s->pmo_dcn4.cur_pstate_candidate < 0) - return false; - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_US) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr)) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { - p_state_supported = false; - break; - } - } - - return p_state_supported; -} - -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { - s->pmo_dcn4.cur_latency_index++; - - success = true; - } - } - - if (!success) { - s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; - s->pmo_dcn4.cur_pstate_candidate++; - - if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { - success = true; - } - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; - setup_display_config(in_out->optimized_display_config, &in_out->instance->scratch, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h deleted file mode 100644 index 09cacc933d21..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_PMO_DCN4_H__ -#define __DML2_PMO_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -bool pmo_dcn4_unit_test(void); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 60a9faf81d3d..fa445067782e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1,122 +1,181 @@ -/* -* Copyright 2022 Advanced Micro Devices, Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -* -* Authors: AMD -* -*/ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. #include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" #include "dml2_debug.h" #include "lib_float_math.h" #include "dml2_pmo_dcn4_fams2.h" static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const enum dml2_pmo_pstate_strategy base_strategy_list_1_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_2_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + DRR - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then DRR + VActive - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Then DRR + VBlank - //{ dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_3_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank + // All VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 2 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_4_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, // VActive + 1 VBlank + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 2 VBlank + // All Vblank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .allow_state_increase = true, + }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 2 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 3 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) @@ -296,9 +355,9 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_strategy base_strategy) +static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) { - enum dml2_pmo_pstate_strategy variant_strategy = 0; + enum dml2_pmo_pstate_method variant_strategy = 0; switch (base_strategy) { case dml2_pmo_pstate_strategy_vactive: @@ -327,11 +386,9 @@ static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum return variant_strategy; } -static enum dml2_pmo_pstate_strategy(*get_expanded_strategy_list( - struct dml2_pmo_init_data *init_data, - int stream_count))[PMO_DCN4_MAX_DISPLAYS] +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; switch (stream_count) { case 1: @@ -361,23 +418,23 @@ static unsigned int get_num_expanded_strategies( } static void insert_strategy_into_expanded_list( - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_init_data *init_data) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++], - per_stream_pstate_strategy, - sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; } } static void expand_base_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategy, unsigned int stream_count) { bool skip_to_next_stream; @@ -386,19 +443,21 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, unsigned int i, j; unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; /* determine number of displays per method */ for (i = 0; i < stream_count; i++) { /* increment the count of the earliest index with the same method */ for (j = 0; j < stream_count; j++) { - if (base_strategy_list[i] == base_strategy_list[j]) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { num_streams_per_method[j] = num_streams_per_method[j] + 1; break; } } } + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + i = 0; /* uses a while loop instead of recursion to build permutations of base strategy */ while (stream_iteration_indices[0] < stream_count) { @@ -409,12 +468,12 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, /* determine what to do for this iteration */ if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { /* decrement count and assign method */ - cur_strategy_list[i] = base_strategy_list[stream_iteration_indices[i]]; + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; num_streams_per_method[stream_iteration_indices[i]] -= 1; if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -450,55 +509,122 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], unsigned int stream_count) { + bool valid = true; unsigned int i; - bool variant_found = false; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - - /* setup variant list as base to start */ - memcpy(cur_strategy_list, base_strategy_list, sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); - + /* check all restrictions are met */ for (i = 0; i < stream_count; i++) { - cur_strategy_list[i] = convert_strategy_to_drr_variant(base_strategy_list[i]); + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } - if (cur_strategy_list[i] != base_strategy_list[i]) { - variant_found = true; + return valid; +} + +static void expand_variant_strategy(struct dml2_pmo_instance *pmo, + const struct dml2_pmo_pstate_strategy *base_strategy, + unsigned int stream_count) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } } - if (i == stream_count - 1 && variant_found) { - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + expand_base_strategy(pmo, &variant_strategy, stream_count); + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } + } + + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; } } } static void expand_base_strategies( struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy(*base_strategies_list)[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategies_list, const unsigned int num_base_strategies, unsigned int stream_count) { unsigned int i; - unsigned int num_pre_variant_strategies; - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS]; /* expand every explicit base strategy (except all DRR) */ - for (i = 0; i < num_base_strategies - 1; i++) { - expand_base_strategy(pmo, base_strategies_list[i], stream_count); + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(pmo, &base_strategies_list[i], stream_count); + expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); } - - /* expand base strategies to DRR variants */ - num_pre_variant_strategies = get_num_expanded_strategies(&pmo->init_data, stream_count); - expanded_strategy_list = get_expanded_strategy_list(&pmo->init_data, stream_count); - for (i = 0; i < num_pre_variant_strategies; i++) { - expand_variant_strategy(pmo, expanded_strategy_list[i], stream_count); - } - - /* add back all DRR */ - insert_strategy_into_expanded_list(base_strategies_list[num_base_strategies - 1], stream_count, &pmo->init_data); } bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) @@ -591,8 +717,6 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; - struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -613,30 +737,28 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - state->unoptimizable_streams[i] = true; + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; } - state->performed = true; - return true; } @@ -787,6 +909,7 @@ static void build_synchronized_timing_groups( /* clear all group masks */ memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); s->pmo_dcn4.num_timing_groups = 0; @@ -808,6 +931,8 @@ static void build_synchronized_timing_groups( /* if drr is in use, timing is not sychnronizable */ if (master_timing->drr_config.enabled) { s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); continue; } @@ -933,8 +1058,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, /* check recout height covers entire otg vactive, and single plane */ if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || - !plane_descriptor->composition.rect_out_height_spans_vactive || - plane_descriptor->composition.rotation_angle != dml2_rotation_0) { + !plane_descriptor->composition.rect_out_height_spans_vactive) { return false; } } @@ -970,35 +1094,24 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, return true; } -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) { - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) { unsigned char i; enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - if (strategy == dml2_pmo_pstate_strategy_vactive || strategy == dml2_pmo_pstate_strategy_fw_vactive_drr) + if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank || strategy == dml2_pmo_pstate_strategy_fw_vblank_drr) + else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + else if (method == dml2_pmo_pstate_strategy_fw_svp) matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + else if (method == dml2_pmo_pstate_strategy_fw_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1030,12 +1143,12 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_strategy stream_pstate_strategy, + enum dml2_pmo_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; - switch (stream_pstate_strategy) { + switch (stream_pstate_method) { case dml2_pmo_pstate_strategy_vactive: case dml2_pmo_pstate_strategy_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; @@ -1066,7 +1179,7 @@ static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( static bool is_timing_group_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *pstate_strategy, const unsigned int timing_group_idx, struct dml2_fams2_per_method_common_meta *group_fams2_meta) { @@ -1085,18 +1198,13 @@ static bool is_timing_group_schedulable( } /* init allow start and end lines for timing group */ - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[base_stream_idx], base_stream_idx); - if (!stream_method_fams2_meta) - return false; - + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[i], i); - if (!stream_method_fams2_meta) - continue; + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1126,7 +1234,7 @@ static bool is_timing_group_schedulable( static bool is_config_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { unsigned int i, j; bool schedulable; @@ -1149,7 +1257,7 @@ static bool is_config_schedulable( for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, per_stream_pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { /* synchronized timing group was not schedulable */ schedulable = false; break; @@ -1251,7 +1359,7 @@ static bool is_config_schedulable( unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || - s->pmo_dcn4.group_is_drr_enabled[sorted_ip1]) { + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { schedulable = false; break; } @@ -1263,8 +1371,8 @@ static bool is_config_schedulable( /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ if (s->pmo_dcn4.num_timing_groups == 2 && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[0]) && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[1])) { + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { double period_ratio; double max_shift_us; double shift_per_period; @@ -1293,44 +1401,45 @@ static bool is_config_schedulable( } static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy stream_pstate_strategy, - unsigned int stream_index) + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_method stream_pstate_method, + unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; bool strategy_matches_drr_requirements = true; /* check if strategy is compatible with stream drr capability and strategy */ - if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && display_cfg->display_config.num_streams > 1 && stream_descriptor->timing.drr_config.enabled && (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { /* DRR is active, so config may become unschedulable */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && - is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR is variable, fw exclusive methods require DRR to be clamped */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_drr_var_when_var_active && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR variable is active, but policy blocks DRR for p-state when this happens */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_var || !stream_descriptor->timing.drr_config.enabled || stream_descriptor->timing.drr_config.disallowed)) { /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled)) { + !stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_fams2) { /* FW modes require FAMS2 */ strategy_matches_drr_requirements = false; @@ -1341,7 +1450,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { struct dml2_pmo_scratch *s = &pmo->scratch; @@ -1362,28 +1471,28 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins // Tabulate everything for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { strategy_matches_forced_requirements = false; break; } strategy_matches_drr_requirements &= - stream_matches_drr_policy(pmo, display_cfg, per_stream_pstate_strategy[stream_index], stream_index); + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1392,7 +1501,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) return false; - if (vactive_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))) + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) return false; if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) @@ -1404,7 +1513,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) return false; - return is_config_schedulable(pmo, display_cfg, per_stream_pstate_strategy); + return is_config_schedulable(pmo, display_cfg, pstate_strategy); } static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) @@ -1460,6 +1569,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, (stream_fams2_meta->nom_vtotal * timing->h_total); stream_fams2_meta->nom_frame_time_us = (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; + stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active; if (stream_descriptor->timing.drr_config.enabled == true) { if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { @@ -1513,7 +1623,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_vactive.common.allow_start_otg_vline = timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; stream_fams2_meta->method_vactive.common.allow_end_otg_vline = - timing->v_blank_end + timing->v_active - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0; @@ -1523,8 +1633,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); /* vblank */ - stream_fams2_meta->method_vblank.common.allow_start_otg_vline = - timing->v_blank_end + timing->v_active; + stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start; stream_fams2_meta->method_vblank.common.allow_end_otg_vline = stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; @@ -1558,8 +1667,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_subvp.common.allow_end_otg_vline = - stream_fams2_meta->nom_vtotal - - timing->v_front_porch - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); @@ -1568,20 +1676,21 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_drr.programming_delay_otg_vlines = (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); stream_fams2_meta->method_drr.common.allow_start_otg_vline = - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->vblank_start + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; if (display_config->display_config.num_streams <= 1) { /* only need to stretch vblank for blackout time */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { /* multi display needs to always be schedulable */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->nom_vtotal * 2 + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } @@ -1614,7 +1723,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy(*strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; unsigned int strategy_list_size = 0; unsigned char plane_index, stream_index, i; @@ -1626,6 +1735,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp memset(s, 0, sizeof(struct dml2_pmo_scratch)); + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1663,8 +1776,8 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp s->pmo_dcn4.num_pstate_candidates = 0; for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); } } @@ -1860,26 +1973,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { success = false; break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -1920,6 +2033,10 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp int MIN_VACTIVE_MARGIN_DRR = 0; int REQUIRED_RESERVED_TIME = 0; + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; MIN_VACTIVE_MARGIN_DRR = INT_MIN; REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; @@ -1930,34 +2047,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { p_state_supported = false; break; } @@ -1974,8 +2091,8 @@ bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pst memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { s->pmo_dcn4.cur_latency_index++; success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 75175d93add4..0c25bd3e9ac0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FAMS2_DCN4_H__ #define __DML2_PMO_FAMS2_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index e0b9ece7901d..95f716e2641f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -2,10 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn4_fams2.h" -#include "dml2_pmo_dcn4.h" #include "dml2_pmo_dcn3.h" #include "dml2_external_lib_deps.h" @@ -35,8 +33,8 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * switch (project_id) { case dml2_project_dcn4x_stage1: - out->initialize = pmo_dcn4_initialize; - out->optimize_dcc_mcache = pmo_dcn4_optimize_dcc_mcache; + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; result = true; break; case dml2_project_dcn4x_stage2: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h index 9d3dc5e94be1..7218de1824cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FACTORY_H__ #define __DML2_PMO_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index defe13436a2c..4822dbcc86bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "lib_float_math.h" #define ASSERT(condition) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h index 537cf6fd4c15..e13b0c5939b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __LIB_FLOAT_MATH_H__ #define __LIB_FLOAT_MATH_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index 1b6dbfaa7ae8..dc8af4dd0410 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_top_optimization.h" #include "dml2_internal_shared_types.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h index 1536afcbf73a..9f22ab33eab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_TOP_OPTIMIZATION_H__ #define __DML2_TOP_OPTIMIZATION_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index b25e9230adea..30d07cd1065f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml_top.h" #include "dml2_mcg_factory.h" @@ -28,6 +27,7 @@ bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) bool result = false; memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); @@ -96,14 +96,12 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) { struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; - /* Borrow the build_mode_programming_locals programming struct for DPMM call. */ - struct dml2_display_cfg_programming *dpmm_programming = dml->scratch.build_mode_programming_locals.mode_programming_params.programming; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; bool result = false; bool mcache_success = false; - if (dpmm_programming) - memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); @@ -130,7 +128,7 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) /* * Call DPMM to map all requirements to minimum clock state */ - if (result && dpmm_programming) { + if (result) { l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; l->dppm_map_mode_params.programming = dpmm_programming; @@ -140,9 +138,8 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) } in_out->is_supported = mcache_success; - result = result && in_out->is_supported; - return result; + return true; } bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) @@ -268,24 +265,15 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (l->optimized_display_config_with_meta.stage4.performed) { - /* - * when performed is true, optimization has applied to - * optimized_display_config_with_meta and it has passed mode - * support. However it may or may not pass the test function to - * reach actual Vmin. As long as voltage is optimized even if it - * doesn't reach Vmin level, there is still power benefit so in - * this case we will still copy this optimization into base - * display config. - */ + if (vmin_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = vmin_success; + l->base_display_config_with_meta.stage4.success = true; } /* * Phase 5: Optimize for Stutter */ - memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); l->stutter_phase.dml = dml; l->stutter_phase.display_config = &l->base_display_config_with_meta; l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; @@ -298,7 +286,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o if (stutter_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage5.success = true; + l->base_display_config_with_meta.stage4.success = true; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index 7afd417071a5..e69f8ce97e24 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h index bb12e4c30690..7b1f6f7143d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_MCACHE_H__ #define __DML_TOP_MCACHE_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index de7d8a6a2d3d..e9b8e10695ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" int dml2_printf(const char *format, ...) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index 0403238df107..d51a1b6c62f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index 5632cdacb7f4..aeac9f159fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_INTERNAL_SHARED_TYPES_H__ #define __DML2_INTERNAL_SHARED_TYPES_H__ @@ -107,10 +106,16 @@ struct dml2_dpmm_map_watermarks_params_in_out { struct dml2_display_cfg_programming *programming; }; +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + struct dml2_dpmm_instance { bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); bool (*unit_test)(void); + + struct dml2_dpmm_scratch dpmm_scratch; }; /* @@ -266,6 +271,7 @@ struct dml2_fams2_meta { unsigned int contention_delay_otg_vlines; unsigned int min_allow_width_otg_vlines; unsigned int nom_vtotal; + unsigned int vblank_start; double nom_refresh_rate_hz; double nom_frame_time_us; unsigned int max_vtotal; @@ -594,7 +600,7 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_strategy { +enum dml2_pmo_pstate_method { dml2_pmo_pstate_strategy_na = 0, /* hw exclusive modes */ dml2_pmo_pstate_strategy_vactive = 1, @@ -612,6 +618,11 @@ enum dml2_pmo_pstate_strategy { dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, }; +struct dml2_pmo_pstate_strategy { + enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; + #define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) #define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) #define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) @@ -634,8 +645,7 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { - enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[DML2_MAX_PLANES][DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; - bool allow_state_increase_for_strategy[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -661,6 +671,7 @@ struct dml2_pmo_scratch { unsigned int num_timing_groups; unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; double group_line_time_us[DML2_MAX_PLANES]; /* scheduling check locals */ @@ -676,10 +687,10 @@ struct dml2_pmo_init_data { union { struct { /* populated once during initialization */ - enum dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 24 * 2][PMO_DCN4_MAX_DISPLAYS]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; } pmo_dcn4; }; From 5ed9481db74740fd33ec4079b29db88e8c9f4a1c Mon Sep 17 00:00:00 2001 From: Mounika Adhuri Date: Wed, 26 Jun 2024 18:51:04 +0530 Subject: [PATCH 0075/1523] drm/amd/display: Refactoring MPC [Why] To refactor MPC files [How] Moved MPC files to respective folders and updated makefiles appropriately. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Mounika Adhuri Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 3 +-- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/mpc/Makefile | 27 +++++++++++++++++++ .../display/dc/{ => mpc}/dcn10/dcn10_mpc.c | 0 .../display/dc/{ => mpc}/dcn10/dcn10_mpc.h | 0 .../display/dc/{ => mpc}/dcn20/dcn20_mpc.c | 0 .../display/dc/{ => mpc}/dcn20/dcn20_mpc.h | 0 .../display/dc/{ => mpc}/dcn30/dcn30_mpc.c | 2 +- .../display/dc/{ => mpc}/dcn30/dcn30_mpc.h | 0 10 files changed, 30 insertions(+), 5 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn10/dcn10_mpc.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn10/dcn10_mpc.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn20/dcn20_mpc.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn20/dcn20_mpc.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn30/dcn30_mpc.c (99%) rename drivers/gpu/drm/amd/display/dc/{ => mpc}/dcn30/dcn30_mpc.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 75e088b479ea..e1f6623d4936 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_mpc.o \ dcn10_cm_common.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index d92d2b4ee015..25ba0d310d46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o \ - dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o +DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 4c43af867d86..804851247acc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,7 +23,7 @@ # # -DCN30 := dcn30_mpc.o dcn30_vpg.o \ +DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ dcn30_dwb.o \ dcn30_dwb_cm.o \ diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile index 7f7458c07e2a..5402c3529f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile @@ -24,6 +24,33 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN10 +############################################################################### +MPC_DCN10 = dcn10_mpc.o + +AMD_DAL_MPC_DCN10 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn10/,$(MPC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN10) + +############################################################################### +# DCN20 +############################################################################### +MPC_DCN20 = dcn20_mpc.o + +AMD_DAL_MPC_DCN20 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn20/,$(MPC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN20) + +############################################################################### +# DCN30 +############################################################################### +MPC_DCN30 = dcn30_mpc.o + +AMD_DAL_MPC_DCN30 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn30/,$(MPC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN30) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 3aeb85ec40b0..fe26fde12eeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -25,7 +25,7 @@ #include "reg_helper.h" #include "dcn30_mpc.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "basics/conversion.h" #include "dcn10/dcn10_cm_common.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h rename to drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h From f91a9af09dea850d83d4b217b8acbafd97b5c61f Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 3 Jul 2024 16:47:57 +0800 Subject: [PATCH 0076/1523] drm/amd/display: Fix VRR cannot enable [Why] Sometimes the VRR cannot enable after login to the desktop. User space may call the DRM_IOCTL_MODE_GETCONNECTOR right after the DRM_IOCTL_MODE_RMFB. After calling DRM_IOCTL_MODE_RMFB to remove all the frame buffer and it will cause the driver to disable the crtc and disable the link while calling the link_set_dpms_off(). It will cause the dpcd read failed in amdgpu_dm_update_freesync_caps() while try to get the DP_MSA_TIMING_PAR_IGNORED capability and think the sink side does not support VRR. [How] Use the dpcd_caps.allow_invalid_MSA_timing_param flag instead of reading from dpcd directly. dpcd_caps.allow_invalid_MSA_timing_param flag is updated during HPD. It is safe to replace the original method. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 92774a871e98..a3b0f2748af0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11721,25 +11721,6 @@ fail: return ret; } -static bool is_dp_capable_without_timing_msa(struct dc *dc, - struct amdgpu_dm_connector *amdgpu_dm_connector) -{ - u8 dpcd_data; - bool capable = false; - - if (amdgpu_dm_connector->dc_link && - dm_helpers_dp_read_dpcd( - NULL, - amdgpu_dm_connector->dc_link, - DP_DOWN_STREAM_PORT_COUNT, - &dpcd_data, - sizeof(dpcd_data))) { - capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false; - } - - return capable; -} - static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, unsigned int offset, unsigned int total_length, @@ -12042,8 +12023,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; - if (is_dp_capable_without_timing_msa(adev->dm.dc, - amdgpu_dm_connector)) { + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; From 39d496d4721ba509647a70041f38d82b03c74680 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 10:41:26 -0400 Subject: [PATCH 0077/1523] drm/amd/display: Add blanked streams override to DML2.1 [WHY] DML2.1 currently has no concept of a "blanked" stream. For cases like DPMS off, things like UCLK p-state is always allowed, so PMO is not required to optimize for it. [HOW] Add flag to DML2.1 display configuration to indicate all streams are blanked, so certain operations and optimizations can be skipped for optimal programming when displays are blanked. Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 405544920f3b..e9647f068ee4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -957,6 +957,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s int stream_index, plane_index; int disp_cfg_stream_location, disp_cfg_plane_location; struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); @@ -1010,10 +1011,16 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); } + + plane_count++; } } } + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + return true; } From 4e8eac98046446d99cbbed740f0767204b839f3f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 3 Jul 2024 11:36:34 -0400 Subject: [PATCH 0078/1523] drm/amd/display: Calculate ODM width using odm slice rect, not recout [Description] There are scenarios where ODM4:1 is used but the surface is entirely outside of the first and last ODM slice. In this case the recout.width for the first and last slice is 0 because there's no overlap with the surface and that ODM slice, but this causes the x_pos for the cursor in this scenario to be calculated incorrectly. Instead we should use the ODM slice width instead of the recout width. Reviewed-by: Nevenko Stupar Signed-off-by: Jerry Zuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b9378f18c020..31e0e9210dd7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1115,10 +1115,10 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror, .stream = pipe_ctx->stream }; + struct rect odm_slice_src = { 0 }; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || (pipe_ctx->prev_odm_pipe != NULL); int prev_odm_width = 0; - int prev_odm_offset = 0; struct pipe_ctx *prev_odm_pipe = NULL; bool mpc_combine_on = false; int bottom_pipe_x_pos = 0; @@ -1183,12 +1183,12 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) prev_odm_pipe = pipe_ctx->prev_odm_pipe; while (prev_odm_pipe != NULL) { - prev_odm_width += prev_odm_pipe->plane_res.scl_data.recout.width; - prev_odm_offset += prev_odm_pipe->plane_res.scl_data.recout.x; + odm_slice_src = resource_get_odm_slice_src_rect(prev_odm_pipe); + prev_odm_width += odm_slice_src.width; prev_odm_pipe = prev_odm_pipe->prev_odm_pipe; } - x_pos -= (prev_odm_width + prev_odm_offset); + x_pos -= (prev_odm_width); } /* If the position is negative then we need to add to the hotspot From a41d58fb91248557438de4e8298d1d2ed5b39564 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Wed, 3 Jul 2024 13:29:55 -0400 Subject: [PATCH 0079/1523] drm/amd/display: Issue with 3 or more mcaches per surface [Why & How] Current logic in mcache admissibility check has flaw if calculated number of maches are 3 or more per surface, so sometimes the check may pass when it should fail, and sometimes may fail when it should pass, fix the issue and also adding additional check to make sure that required number of mcaches per surface cannot be higher than number of pipes + 1, used on that surface. Reviewed-by: Chaitanya Dhere Signed-off-by: Jerry Zuo Signed-off-by: Nevenko Stupar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_top/dml_top_mcache.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index e69f8ce97e24..a342ebfbe4e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -142,12 +142,12 @@ static unsigned int count_elements_in_span(int *array, unsigned int array_size, while (span_start_index < array_size) { for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value > span) { + if (array[i] - span_start_value <= span) { if (i - span_start_index + 1 > greatest_element_count) { greatest_element_count = i - span_start_index + 1; } + } else break; - } } span_start_index++; @@ -207,9 +207,9 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi int temp, p0shift, p1shift; unsigned int plane_index = 0; unsigned int i; - char odm_combine_factor = 1; - char mpc_combine_factor = 1; - char num_dpps; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; unsigned int num_boundaries; enum dml2_scaling_transform scaling_transform; const struct dml2_plane_parameters *plane; @@ -226,10 +226,10 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi plane = ¶ms->display_cfg->plane_descriptors[plane_index]; stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - odm_combine_factor = (char)params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; if (odm_combine_factor == 1) - mpc_combine_factor = (char)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; else mpc_combine_factor = 1; @@ -259,13 +259,13 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi // The last element in the unshifted boundary array will always be the first pixel outside the // plane, which means theres no mcache associated with it, so -1 num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { p0pass = true; } num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { p1pass = true; } From f82200703434522f1b35d38bdef02486d22b2f25 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 3 Jul 2024 12:23:02 -0400 Subject: [PATCH 0080/1523] drm/amd/display: remove dc dependencies from SPL library [Why] Make SPL library dc-independent so it can be reused by other components [How] Create separate set of fixed31_32 calls in SPL Make all inputs and outputs to SPL use primitive types For ratios and inits, return as uint32 from SPL. So add conversion from uint32 back to fixed point in SPL-to-dc translate function Reviewed-by: Relja Vojvodic Signed-off-by: Jerry Zuo Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/basics/fixpt31_32.c | 27 + .../gpu/drm/amd/display/dc/core/dc_resource.c | 2 - .../gpu/drm/amd/display/dc/dc_spl_translate.c | 43 +- drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 564 +++++++++--------- .../display/dc/spl/dc_spl_isharp_filters.c | 27 +- .../display/dc/spl/dc_spl_isharp_filters.h | 2 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 81 +-- .../display/dc/spl/dc_spl_scl_easf_filters.h | 32 +- .../amd/display/dc/spl/dc_spl_scl_filters.c | 69 +-- .../amd/display/dc/spl/dc_spl_scl_filters.h | 18 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 39 +- .../gpu/drm/amd/display/dc/spl/spl_debug.h | 23 + .../drm/amd/display/dc/spl/spl_fixpt31_32.c | 518 ++++++++++++++++ .../drm/amd/display/dc/spl/spl_fixpt31_32.h | 546 +++++++++++++++++ .../gpu/drm/amd/display/dc/spl/spl_os_types.h | 77 +++ .../gpu/drm/amd/display/include/fixed31_32.h | 6 + 17 files changed, 1645 insertions(+), 431 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_debug.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 506f82cd5cc6..88d3f9d7dd55 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -486,3 +486,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg) else return ux_dy(arg.value, 4, 19); } + +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_zero; + struct fixed31_32 fixpt_int_value = dc_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4f5b23520365..5c9091f2a8b2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1511,8 +1511,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; - spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 49ff59258c8d..339d092e711c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -42,26 +42,26 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; - scaling_quality->h_taps = spl_scaling_quality->h_taps; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; - scaling_quality->v_taps = spl_scaling_quality->v_taps; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; + scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; + scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct spl_ratios *spl_ratios) + const struct ratio *spl_ratios) { - ratios->horz = spl_ratios->horz; - ratios->vert = spl_ratios->vert; - ratios->horz_c = spl_ratios->horz_c; - ratios->vert_c = spl_ratios->vert_c; + ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); + ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); + ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); + ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct spl_inits *spl_inits) + const struct init *spl_inits) { - inits->h = spl_inits->h; - inits->v = spl_inits->v; - inits->h_c = spl_inits->h_c; - inits->v_c = spl_inits->v_c; + inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); + inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); + inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); + inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -170,6 +170,9 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + + spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; /* Check if it is stream is in fullscreen and if its HDR. * Use this to determine sharpness levels */ @@ -184,15 +187,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); } diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index af7eaf839970..05764d4d4604 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index dad38960d34d..b8858ea7c776 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,10 +4,11 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" -#include "dc_spl_isharp_filters.h" #include "dc_spl_scl_easf_filters.h" +#include "dc_spl_isharp_filters.h" +#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -108,26 +109,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct fixed31_32 temp; + struct spl_fixed31_32 temp; - temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + dc_fixpt_round(temp); + rec_out.x = stream_dst->x + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; - temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + dc_fixpt_round(temp); + rec_out.y = stream_dst->y + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -145,7 +146,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - ASSERT(mpc_slice_count == 1 || + SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -158,7 +159,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - ASSERT(mpc_rec.height % 2 == 0); + SPL_ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -198,7 +199,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -341,21 +342,23 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_out->scl_data.recout = shift_rec( + spl_scratch->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_out->scl_data.recout, 0, + memset(&spl_scratch->scl_data.recout, 0, sizeof(struct spl_rect)); } /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, + struct spl_scratch *spl_scratch, + struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -366,72 +369,72 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - swap(surf_src.height, surf_src.width); + spl_swap(surf_src.height, surf_src.width); - spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_out->scl_data.ratios.horz.value *= 2; + spl_scratch->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_out->scl_data.ratios.vert.value *= 2; + spl_scratch->scl_data.ratios.vert.value *= 2; - spl_out->scl_data.ratios.vert.value = div64_s64( - spl_out->scl_data.ratios.vert.value * in_h, out_h); - spl_out->scl_data.ratios.horz.value = div64_s64( - spl_out->scl_data.ratios.horz.value * in_w, out_w); + spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( + spl_scratch->scl_data.ratios.vert.value * in_h, out_h); + spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( + spl_scratch->scl_data.ratios.horz.value * in_w, out_w); - spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; - spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; + spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; + spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_out->scl_data.ratios.horz_c.value /= 2; - spl_out->scl_data.ratios.vert_c.value /= 2; + spl_scratch->scl_data.ratios.horz_c.value /= 2; + spl_scratch->scl_data.ratios.vert_c.value /= 2; } - spl_out->scl_data.ratios.horz = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz, 19); - spl_out->scl_data.ratios.vert = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert, 19); - spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz_c, 19); - spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert_c, 19); + spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz, 19); + spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert, 19); + spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz_c, 19); + spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert_c, 19); /* * Coefficient table and some registers are different based on ratio * that is output/input. Currently we calculate input/output * Store 1/ratio in recip_ratio for those lookups */ - spl_out->scl_data.recip_ratios.horz = dc_fixpt_recip( - spl_out->scl_data.ratios.horz); - spl_out->scl_data.recip_ratios.vert = dc_fixpt_recip( - spl_out->scl_data.ratios.vert); - spl_out->scl_data.recip_ratios.horz_c = dc_fixpt_recip( - spl_out->scl_data.ratios.horz_c); - spl_out->scl_data.recip_ratios.vert_c = dc_fixpt_recip( - spl_out->scl_data.ratios.vert_c); + spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz); + spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert); + spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz_c); + spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert_c); } /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { - spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, - spl_out->scl_data.recout.height)); - spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, - spl_out->scl_data.recout.height)); + spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, + spl_scratch->scl_data.recout.height)); + spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, + spl_scratch->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } } @@ -468,13 +471,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct fixed31_32 ratio, - struct fixed31_32 init_adj, - struct fixed31_32 *init, + struct spl_fixed31_32 ratio, + struct spl_fixed31_32 init_adj, + struct spl_fixed31_32 *init, int *vp_offset, int *vp_size) { - struct fixed31_32 temp; + struct spl_fixed31_32 temp; int int_part; /* @@ -487,33 +490,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = dc_fixpt_floor(temp); + temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = spl_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = dc_fixpt_add(*init, init_adj); - *init = dc_fixpt_truncate(*init, 19); + *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = spl_fixpt_add(*init, init_adj); + *init = spl_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = dc_fixpt_floor(*init); + int_part = spl_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = dc_fixpt_add_int(*init, int_part); + *init = spl_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = dc_fixpt_floor(temp); + temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = spl_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -536,7 +539,8 @@ static bool spl_is_yuv420(enum spl_pixel_format format) } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -547,11 +551,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct fixed31_32 init_adj_h = dc_fixpt_zero; - struct fixed31_32 init_adj_v = dc_fixpt_zero; + struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; + struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -574,8 +578,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ &flip_horz_scan_dir); if (orthogonal_rotation) { - swap(src.width, src.height); - swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -587,17 +591,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 4); - init_adj_v = dc_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_zero; + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_zero; break; } } @@ -605,58 +609,58 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width, - spl_out->scl_data.taps.h_taps, - spl_out->scl_data.ratios.horz, - dc_fixpt_zero, - &spl_out->scl_data.inits.h, - &spl_out->scl_data.viewport.x, - &spl_out->scl_data.viewport.width); + spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.ratios.horz, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.h, + &spl_scratch->scl_data.viewport.x, + &spl_scratch->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width / vpc_div, - spl_out->scl_data.taps.h_taps_c, - spl_out->scl_data.ratios.horz_c, + spl_scratch->scl_data.taps.h_taps_c, + spl_scratch->scl_data.ratios.horz_c, init_adj_h, - &spl_out->scl_data.inits.h_c, - &spl_out->scl_data.viewport_c.x, - &spl_out->scl_data.viewport_c.width); + &spl_scratch->scl_data.inits.h_c, + &spl_scratch->scl_data.viewport_c.x, + &spl_scratch->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height, - spl_out->scl_data.taps.v_taps, - spl_out->scl_data.ratios.vert, - dc_fixpt_zero, - &spl_out->scl_data.inits.v, - &spl_out->scl_data.viewport.y, - &spl_out->scl_data.viewport.height); + spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.ratios.vert, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.v, + &spl_scratch->scl_data.viewport.y, + &spl_scratch->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height / vpc_div, - spl_out->scl_data.taps.v_taps_c, - spl_out->scl_data.ratios.vert_c, + spl_scratch->scl_data.taps.v_taps_c, + spl_scratch->scl_data.ratios.vert_c, init_adj_v, - &spl_out->scl_data.inits.v_c, - &spl_out->scl_data.viewport_c.y, - &spl_out->scl_data.viewport_c.height); + &spl_scratch->scl_data.inits.v_c, + &spl_scratch->scl_data.viewport_c.y, + &spl_scratch->scl_data.viewport_c.height); if (orthogonal_rotation) { - swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } - spl_out->scl_data.viewport.x += src.x; - spl_out->scl_data.viewport.y += src.y; - ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_out->scl_data.viewport_c.x += src.x / vpc_div; - spl_out->scl_data.viewport_c.y += src.y / vpc_div; + spl_scratch->scl_data.viewport.x += src.x; + spl_scratch->scl_data.viewport.y += src.y; + SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; + spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; } static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) @@ -667,7 +671,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -708,7 +712,7 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, const struct spl_scaler_data *data, bool enable_isharp, bool enable_easf) { - const long long one = dc_fixpt_one.value; + const long long one = spl_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ @@ -762,7 +766,7 @@ static bool spl_choose_lls_policy(enum spl_pixel_format format, } /* Enable EASF ?*/ -static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) +static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { int vratio = 0; int hratio = 0; @@ -778,8 +782,8 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, &spl_in->lls_pref); - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); if (!lls_enable_easf || spl_in->disable_easf) skip_easf = true; @@ -799,7 +803,7 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_out *spl_out) } /* Check if video is in fullscreen mode */ -static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_out) +static bool spl_is_video_fullscreen(struct spl_in *spl_in) { if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) return true; @@ -807,16 +811,16 @@ static bool spl_is_video_fullscreen(struct spl_in *spl_in, struct spl_out *spl_o } static bool spl_get_isharp_en(struct spl_in *spl_in, - struct spl_out *spl_out) + struct spl_scratch *spl_scratch) { bool enable_isharp = false; int vratio = 0; int hratio = 0; - struct spl_taps taps = spl_out->scl_data.taps; - bool fullscreen = spl_is_video_fullscreen(spl_in, spl_out); + struct spl_taps taps = spl_scratch->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in); - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); /* Return if adaptive sharpness is disabled */ if (spl_in->adaptive_sharpness.enable == false) @@ -848,7 +852,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, + int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp) { @@ -858,13 +862,13 @@ static bool spl_get_optimal_number_of_taps( enum lb_memory_config lb_config; bool skip_easf = false; - if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && + if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && - spl_out->scl_data.viewport.width > max_downscale_src_width) + spl_scratch->scl_data.viewport.width > max_downscale_src_width) return false; /* Check if we are using EASF or not */ - skip_easf = enable_easf(spl_in, spl_out); + skip_easf = enable_easf(spl_in, spl_scratch); /* * Set default taps if none are provided @@ -873,57 +877,57 @@ static bool spl_get_optimal_number_of_taps( */ if (skip_easf) { if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil( - spl_out->scl_data.ratios.horz), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) + spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz), 8); else - spl_out->scl_data.taps.h_taps = 4; + spl_scratch->scl_data.taps.h_taps = 4; } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; + spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) + spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert, 2)), 8); else - spl_out->scl_data.taps.v_taps = 4; + spl_scratch->scl_data.taps.v_taps = 4; } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; + spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) + spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert_c, 2)), 8); else - spl_out->scl_data.taps.v_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil( - spl_out->scl_data.ratios.horz_c), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) + spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz_c), 8); else - spl_out->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.h_taps_c = 4; } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; } else { if (spl_is_yuv420(spl_in->basic_in.format)) { - spl_out->scl_data.taps.h_taps = 6; - spl_out->scl_data.taps.v_taps = 6; - spl_out->scl_data.taps.h_taps_c = 4; - spl_out->scl_data.taps.v_taps_c = 4; + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; } else { /* RGB */ - spl_out->scl_data.taps.h_taps = 6; - spl_out->scl_data.taps.v_taps = 6; - spl_out->scl_data.taps.h_taps_c = 6; - spl_out->scl_data.taps.v_taps_c = 6; + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 6; + spl_scratch->scl_data.taps.v_taps_c = 6; } } /*Ensure we can support the requested number of vtaps*/ - min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); + min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -932,16 +936,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -950,11 +954,11 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_out->scl_data.taps.v_taps > max_taps_y) - spl_out->scl_data.taps.v_taps = max_taps_y; + if (spl_scratch->scl_data.taps.v_taps > max_taps_y) + spl_scratch->scl_data.taps.v_taps = max_taps_y; - if (spl_out->scl_data.taps.v_taps_c > max_taps_c) - spl_out->scl_data.taps.v_taps_c = max_taps_c; + if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) + spl_scratch->scl_data.taps.v_taps_c = max_taps_c; if (!skip_easf) { /* @@ -971,45 +975,45 @@ static bool spl_get_optimal_number_of_taps( * If optimal no of taps is 7 or 8, then fine since max tap is 6 * */ - if (spl_out->scl_data.taps.v_taps == 5) - spl_out->scl_data.taps.v_taps = 4; + if (spl_scratch->scl_data.taps.v_taps == 5) + spl_scratch->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps_c == 5) - spl_out->scl_data.taps.v_taps_c = 4; + if (spl_scratch->scl_data.taps.v_taps_c == 5) + spl_scratch->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.h_taps == 5) - spl_out->scl_data.taps.h_taps = 4; + if (spl_scratch->scl_data.taps.h_taps == 5) + spl_scratch->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps_c == 5) - spl_out->scl_data.taps.h_taps_c = 4; + if (spl_scratch->scl_data.taps.h_taps_c == 5) + spl_scratch->scl_data.taps.h_taps_c = 4; if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_out->scl_data.taps.h_taps <= 4) || - (spl_out->scl_data.taps.h_taps_c <= 3)) { + if ((spl_scratch->scl_data.taps.h_taps <= 4) || + (spl_scratch->scl_data.taps.h_taps_c <= 3)) { *enable_easf_v = false; *enable_easf_h = false; - } else if ((spl_out->scl_data.taps.v_taps <= 3) || - (spl_out->scl_data.taps.v_taps_c <= 3)) { + } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || + (spl_scratch->scl_data.taps.v_taps_c <= 3)) { *enable_easf_v = false; *enable_easf_h = true; } else { *enable_easf_v = true; *enable_easf_h = true; } - ASSERT((spl_out->scl_data.taps.v_taps > 1) && - (spl_out->scl_data.taps.v_taps_c > 1)); + SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && + (spl_scratch->scl_data.taps.v_taps_c > 1)); } else { /* RGB */ - if (spl_out->scl_data.taps.h_taps <= 3) { + if (spl_scratch->scl_data.taps.h_taps <= 3) { *enable_easf_v = false; *enable_easf_h = false; - } else if (spl_out->scl_data.taps.v_taps < 3) { + } else if (spl_scratch->scl_data.taps.v_taps < 3) { *enable_easf_v = false; *enable_easf_h = true; } else { *enable_easf_v = true; *enable_easf_h = true; } - ASSERT(spl_out->scl_data.taps.v_taps > 1); + SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); } } else { *enable_easf_v = false; @@ -1024,29 +1028,29 @@ static bool spl_get_optimal_number_of_taps( * EASF is not enabled */ - *enable_isharp = spl_get_isharp_en(spl_in, spl_out); + *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); if (!*enable_isharp && !spl_in->basic_out.always_scale) { - if ((IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.vert))) { - spl_out->scl_data.taps.h_taps = 1; - spl_out->scl_data.taps.v_taps = 1; + if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { + spl_scratch->scl_data.taps.h_taps = 1; + spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) + spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) + spl_scratch->scl_data.taps.v_taps_c = 1; *enable_easf_v = false; *enable_easf_h = false; } else { if ((!*enable_easf_h) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c))) - spl_out->scl_data.taps.h_taps_c = 1; + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) + spl_scratch->scl_data.taps.h_taps_c = 1; if ((!*enable_easf_v) && - (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c))) - spl_out->scl_data.taps.v_taps_c = 1; + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) + spl_scratch->scl_data.taps.v_taps_c = 1; } } return true; @@ -1069,38 +1073,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct fixed31_32 bot; + struct spl_fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.h) << 5; + spl_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - dc_fixpt_floor(scl_data->inits.h); + spl_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.h_c) << 5; + spl_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.h_c); + spl_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.v) << 5; + spl_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - dc_fixpt_floor(scl_data->inits.v); + spl_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.v_c) << 5; + spl_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.v_c); + spl_fixpt_floor(scl_data->inits.v_c); - bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); - bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -1113,22 +1117,22 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, } /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out, - bool enable_easf_v, bool enable_easf_h, bool enable_isharp) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, + struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_out->scl_data; + const struct spl_scaler_data *data = &spl_scratch->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; bool enable_easf = enable_easf_v || enable_easf_h; // Set values for recout - dscl_prog_data->recout = spl_out->scl_data.recout; + dscl_prog_data->recout = spl_scratch->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; // SCL_MODE - Set SCL_MODE data dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, @@ -1143,15 +1147,15 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_out->scl_data.viewport; + dscl_prog_data->viewport = spl_scratch->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; // Set filters data spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } /* Set EASF data */ -static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, +static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, enum spl_pixel_format format, enum system_setup setup) { @@ -1164,47 +1168,47 @@ static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode /* 2-bit, BF3 chroma mode correction calculation mode */ dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( - spl_out->scl_data.recip_ratios.vert); + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - spl_get_3tap_dntilt_uptilt_offset(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - spl_get_3tap_uptilt_maxval(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - spl_get_3tap_dntilt_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - spl_get_3tap_uptilt1_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - spl_get_3tap_uptilt2_slope(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - spl_get_3tap_uptilt2_offset(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - spl_get_gainRing4(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - spl_get_gainRing6(spl_out->scl_data.taps.v_taps, - spl_out->scl_data.recip_ratios.vert); + spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 @@ -1330,23 +1334,23 @@ static void spl_set_easf_data(struct spl_out *spl_out, bool enable_easf_v, 0xF; // 4-bit, BF2 calculation mode /* 2-bit, BF3 chroma mode correction calculation mode */ dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( - spl_out->scl_data.recip_ratios.horz); + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - spl_get_gainRing4(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - spl_get_gainRing6(spl_out->scl_data.taps.h_taps, - spl_out->scl_data.recip_ratios.horz); + spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 @@ -1524,7 +1528,7 @@ static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data, struct fixed31_32 ratio, + const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, enum system_setup setup) { /* Turn off sharpener if not required */ @@ -1638,40 +1642,44 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_easf_h = false; int vratio = 0; int hratio = 0; - const struct spl_scaler_data *data = &spl_out->scl_data; - struct fixed31_32 isharp_scale_ratio; + struct spl_scratch spl_scratch; + struct spl_fixed31_32 isharp_scale_ratio; enum system_setup setup; bool enable_isharp = false; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + memset(&spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch.scl_data.h_active = spl_in->h_active; + spl_scratch.scl_data.v_active = spl_in->v_active; // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, spl_out); + spl_calculate_recout(spl_in, &spl_scratch, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, spl_out); + spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, spl_out); + spl_calculate_viewport_size(spl_in, &spl_scratch); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality, &enable_easf_v, + &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, spl_out); + spl_calculate_inits_and_viewports(spl_in, &spl_scratch); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); // Clamp - spl_clamp_viewport(&spl_out->scl_data.viewport); + spl_clamp_viewport(&spl_scratch.scl_data.viewport); if (!res) return res; // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out, enable_easf_v, enable_easf_h, enable_isharp); + spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); if (spl_in->lls_pref == LLS_PREF_YES) { if (spl_in->is_hdr_on) @@ -1685,15 +1693,15 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) setup = SDR_NL; } // Set EASF - spl_set_easf_data(spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, spl_in->basic_in.format, setup); // Set iSHARP - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); if (vratio <= hratio) - isharp_scale_ratio = spl_out->scl_data.recip_ratios.vert; + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; else - isharp_scale_ratio = spl_out->scl_data.recip_ratios.horz; + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index a5e544406e91..d483f259512e 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "spl_debug.h" #include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" @@ -631,10 +632,10 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) return filter_isharp_bs_3tap_64p_s1_12; } -void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup) +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup) { uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; - struct fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; + struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; int i, j; struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; int num_sharp_ramp_levels; @@ -680,12 +681,12 @@ void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum s * base scale ratio to sharpness curve */ j = 0; - sharp_level = dc_fixpt_zero; + sharp_level = spl_fixpt_zero; while (j < num_sharp_ramp_levels) { - ratio_level = dc_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, + ratio_level = spl_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, setup_lookup_ptr->ratio_denom); if (ratio.value >= ratio_level.value) { - sharp_level = dc_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, + sharp_level = spl_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, setup_lookup_ptr->sharpness_denom); break; } @@ -707,12 +708,12 @@ void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum s size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); memset(byte_ptr_1dlut_dst, 0, size_1dlut); for (j = 0; j < size_1dlut; j++) { - sharp_base = dc_fixpt_from_int((int)*byte_ptr_1dlut_src); - sharp_calc = dc_fixpt_mul(sharp_base, sharp_level); - sharp_calc = dc_fixpt_div(sharp_calc, dc_fixpt_from_int(3)); - sharp_calc = dc_fixpt_min(dc_fixpt_from_int(255), sharp_calc); - sharp_calc = dc_fixpt_add(sharp_calc, dc_fixpt_from_fraction(1, 2)); - sharp_calc_int = dc_fixpt_floor(sharp_calc); + sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); + sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); + sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); + sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); + sharp_calc_int = spl_fixpt_floor(sharp_calc); if (sharp_calc_int > 255) sharp_calc_int = 255; *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; @@ -742,7 +743,6 @@ void spl_init_blur_scale_coeffs(void) filter_isharp_bs_4tap_in_6_64p_s1_12, 6); } -#ifdef CONFIG_DRM_AMD_DC_FP uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) { if (taps == 3) @@ -753,7 +753,7 @@ uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) return spl_get_filter_isharp_bs_4tap_in_6_64p(); else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } @@ -767,5 +767,4 @@ void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); } -#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index c8b7cd6404dd..6cb000bf9d53 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -43,6 +43,6 @@ void spl_init_blur_scale_coeffs(void); void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data); -void spl_build_isharp_1dlut_from_reference_curve(struct fixed31_32 ratio, enum system_setup setup); +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup); uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c index 83dd3435ebcc..09bf82f7d468 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" #include "dc_spl_filters.h" #include "dc_spl_scl_filters.h" #include "dc_spl_scl_easf_filters.h" @@ -1406,67 +1407,67 @@ void spl_init_easf_filter_coeffs(void) easf_filter_6tap_64p_ratio_1_00_s1_12, 6); } -uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_3tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_3tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_3tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_3tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_3tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_3tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_3tap_64p_ratio_0_90_s1_12; else return easf_filter_3tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_4tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_4tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_4tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_4tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_4tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_4tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_4tap_64p_ratio_0_90_s1_12; else return easf_filter_4tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio) +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_from_fraction(3, 10).value) + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) return easf_filter_6tap_64p_ratio_0_30_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(4, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) return easf_filter_6tap_64p_ratio_0_40_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(5, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) return easf_filter_6tap_64p_ratio_0_50_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(6, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) return easf_filter_6tap_64p_ratio_0_60_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(7, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) return easf_filter_6tap_64p_ratio_0_70_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(8, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) return easf_filter_6tap_64p_ratio_0_80_s1_12; - else if (ratio.value < dc_fixpt_from_fraction(9, 10).value) + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) return easf_filter_6tap_64p_ratio_0_90_s1_12; else return easf_filter_6tap_64p_ratio_1_00_s1_12; } -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) { if (taps == 6) return spl_get_easf_filter_6tap_64p(ratio); @@ -1476,7 +1477,7 @@ uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio) return spl_get_easf_filter_3tap_64p(ratio); else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } @@ -1517,7 +1518,7 @@ void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, } } -static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, unsigned int num_entries) { @@ -1534,7 +1535,7 @@ static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, if (lookup_table_index_ptr->numer < 0) break; - if (ratio.value < dc_fixpt_from_fraction( + if (ratio.value < spl_fixpt_from_fraction( lookup_table_index_ptr->numer, lookup_table_index_ptr->denom).value) { value = lookup_table_index_ptr->reg_value; @@ -1545,7 +1546,7 @@ static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct fixed31_32 ratio, } return value; } -uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / @@ -1554,7 +1555,7 @@ uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio) easf_v_bf3_mode_lookup, num_entries); return value; } -uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / @@ -1563,7 +1564,7 @@ uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio) easf_h_bf3_mode_lookup, num_entries); return value; } -uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1582,7 +1583,7 @@ uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1601,7 +1602,7 @@ uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1620,7 +1621,7 @@ uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1639,7 +1640,7 @@ uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1653,7 +1654,7 @@ uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1667,7 +1668,7 @@ uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1681,7 +1682,7 @@ uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1695,7 +1696,7 @@ uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; @@ -1709,7 +1710,7 @@ uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio) value = 0; return value; } -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio) +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) { uint32_t value; unsigned int num_entries; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h index 542b5ce1a385..8bb2b8108e38 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -14,25 +14,25 @@ struct scale_ratio_to_reg_value_lookup { }; void spl_init_easf_filter_coeffs(void); -uint16_t *spl_get_easf_filter_3tap_64p(struct fixed31_32 ratio); -uint16_t *spl_get_easf_filter_4tap_64p(struct fixed31_32 ratio); -uint16_t *spl_get_easf_filter_6tap_64p(struct fixed31_32 ratio); -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data, bool enable_easf_v, bool enable_easf_h); -uint32_t spl_get_v_bf3_mode(struct fixed31_32 ratio); -uint32_t spl_get_h_bf3_mode(struct fixed31_32 ratio); -uint32_t spl_get_reducer_gain6(int taps, struct fixed31_32 ratio); -uint32_t spl_get_reducer_gain4(int taps, struct fixed31_32 ratio); -uint32_t spl_get_gainRing6(int taps, struct fixed31_32 ratio); -uint32_t spl_get_gainRing4(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct fixed31_32 ratio); +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); #endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index 156f8171e44f..b9a7b77a7167 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc_spl_types.h" +#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 @@ -1318,97 +1319,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1424,7 +1425,7 @@ const uint16_t *spl_get_filter_2tap_64p(void) return filter_2tap_64p; } -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) { if (taps == 8) return spl_get_filter_8tap_64p(ratio); @@ -1444,7 +1445,7 @@ const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio return NULL; else { /* should never happen, bug */ - BREAK_TO_DEBUGGER(); + SPL_BREAK_TO_DEBUGGER(); return NULL; } } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 27590846d92a..48202bc4f81e 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,16 +7,16 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index e54da5ea4ae8..1438a86826a4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,15 +2,15 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" -#include "dc_hw_types.h" -#ifndef ASSERT -#define ASSERT(_bool) (void *)0 -#endif -#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ +#include "spl_os_types.h" // swap +#ifndef SPL_ASSERT +#define SPL_ASSERT(_bool) ((void *)0) +#endif +#include "spl_fixpt31_32.h" // fixed31_32 and related functions + enum lb_memory_config { /* Enable all 3 pieces of memory */ LB_MEMORY_CONFIG_0 = 0, @@ -39,16 +39,16 @@ struct spl_rect { }; struct spl_ratios { - struct fixed31_32 horz; - struct fixed31_32 vert; - struct fixed31_32 horz_c; - struct fixed31_32 vert_c; + struct spl_fixed31_32 horz; + struct spl_fixed31_32 vert; + struct spl_fixed31_32 horz_c; + struct spl_fixed31_32 vert_c; }; struct spl_inits { - struct fixed31_32 h; - struct fixed31_32 h_c; - struct fixed31_32 v; - struct fixed31_32 v_c; + struct spl_fixed31_32 h; + struct spl_fixed31_32 h_c; + struct spl_fixed31_32 v; + struct spl_fixed31_32 v_c; }; struct spl_taps { @@ -409,10 +409,15 @@ struct dscl_prog_data { }; /* SPL input and output definitions */ -// SPL outputs struct -struct spl_out { +// SPL scratch struct +struct spl_scratch { // Pack all SPL outputs in scl_data struct spl_scaler_data scl_data; +}; + +/* SPL input and output definitions */ +// SPL outputs struct +struct spl_out { // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -497,6 +502,8 @@ struct spl_in { struct spl_debug debug; bool is_fullscreen; bool is_hdr_on; + int h_active; + int v_active; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h new file mode 100644 index 000000000000..a36239ab8d1c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h @@ -0,0 +1,23 @@ +/* Copyright 1997-2004 Advanced Micro Devices, Inc. All rights reserved. */ + +#ifndef SPL_DEBUG_H +#define SPL_DEBUG_H + +#ifdef SPL_ASSERT +#undef SPL_ASSERT +#endif +#define SPL_ASSERT(b) + +#define SPL_ASSERT_CRITICAL(expr) do {if (expr)/* Do nothing */; } while (0) + +#ifdef SPL_DALMSG +#undef SPL_DALMSG +#endif +#define SPL_DALMSG(b) + +#ifdef SPL_DAL_ASSERT_MSG +#undef SPL_DAL_ASSERT_MSG +#endif +#define SPL_DAL_ASSERT_MSG(b, m) + +#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c new file mode 100644 index 000000000000..2bb1de88aef7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c @@ -0,0 +1,518 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "spl_fixpt31_32.h" + +static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; + +static inline unsigned long long abs_i64( + long long arg) +{ + if (arg > 0) + return (unsigned long long)arg; + else + return (unsigned long long)(-arg); +} + +/* + * @brief + * result = dividend / divisor + * *remainder = dividend % divisor + */ +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) +{ + unsigned long long result; + + ASSERT(divisor); + + result = spl_div64_u64_rem(dividend, divisor, remainder); + + return result; +} + + +#define FRACTIONAL_PART_MASK \ + ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) + +#define GET_INTEGER_PART(x) \ + ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + +#define GET_FRACTIONAL_PART(x) \ + (FRACTIONAL_PART_MASK & (x)) + +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; + + unsigned long long remainder; + + /* determine integer part */ + + unsigned long long res_value = complete_integer_division_u64( + arg1_value, arg2_value, &remainder); + + ASSERT(res_value <= LONG_MAX); + + /* determine fractional part */ + { + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + unsigned long long summand = (remainder << 1) >= arg2_value; + + ASSERT(res_value <= LLONG_MAX - summand); + + res_value += summand; + } + + res.value = (long long)res_value; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = arg1.value < 0; + bool arg2_negative = arg2.value < 0; + + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; + + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); + + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); + + unsigned long long tmp; + + res.value = arg1_int * arg2_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg1_int * arg2_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg2_int * arg1_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg1_fra * arg2_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + unsigned long long arg_value = abs_i64(arg.value); + + unsigned long long arg_int = GET_INTEGER_PART(arg_value); + + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); + + unsigned long long tmp; + + res.value = arg_int * arg_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg_int * arg_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg_fra * arg_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) +{ + /* + * @note + * Good idea to use Newton's method + */ + + ASSERT(arg.value); + + return spl_fixpt_from_fraction( + spl_fixpt_one.value, + arg.value); +} + +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 square; + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 27; + + struct spl_fixed31_32 arg_norm = arg; + + if (spl_fixpt_le( + spl_fixpt_two_pi, + spl_fixpt_abs(arg))) { + arg_norm = spl_fixpt_sub( + arg_norm, + spl_fixpt_mul_int( + spl_fixpt_two_pi, + (int)spl_div64_s64( + arg_norm.value, + spl_fixpt_two_pi.value))); + } + + square = spl_fixpt_sqr(arg_norm); + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n > 2); + + if (arg.value != arg_norm.value) + res = spl_fixpt_div( + spl_fixpt_mul(res, arg_norm), + arg); + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) +{ + return spl_fixpt_mul( + arg, + spl_fixpt_sinc(arg)); +} + +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) +{ + /* TODO implement argument normalization */ + + const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 26; + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n != 0); + + return res; +} + +/* + * @brief + * result = exp(arg), + * where abs(arg) < 1 + * + * Calculated as Taylor series. + */ +static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) +{ + unsigned int n = 9; + + struct spl_fixed31_32 res = spl_fixpt_from_fraction( + n + 2, + n + 1); + /* TODO find correct res */ + + ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); + + do + res = spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + arg, + res), + n)); + while (--n != 1); + + return spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_mul( + arg, + res)); +} + +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) +{ + /* + * @brief + * Main equation is: + * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), + * where m = round(x / ln(2)), r = x - m * ln(2) + */ + + if (spl_fixpt_le( + spl_fixpt_ln2_div_2, + spl_fixpt_abs(arg))) { + int m = spl_fixpt_round( + spl_fixpt_div( + arg, + spl_fixpt_ln2)); + + struct spl_fixed31_32 r = spl_fixpt_sub( + arg, + spl_fixpt_mul_int( + spl_fixpt_ln2, + m)); + + ASSERT(m != 0); + + ASSERT(spl_fixpt_lt( + spl_fixpt_abs(r), + spl_fixpt_one)); + + if (m > 0) + return spl_fixpt_shl( + fixed31_32_exp_from_taylor_series(r), + (unsigned char)m); + else + return spl_fixpt_div_int( + fixed31_32_exp_from_taylor_series(r), + 1LL << -m); + } else if (arg.value != 0) + return fixed31_32_exp_from_taylor_series(arg); + else + return spl_fixpt_one; +} + +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); + /* TODO improve 1st estimation */ + + struct spl_fixed31_32 error; + + ASSERT(arg.value > 0); + /* TODO if arg is negative, return NaN */ + /* TODO if arg is zero, return -INF */ + + do { + struct spl_fixed31_32 res1 = spl_fixpt_add( + spl_fixpt_sub( + res, + spl_fixpt_one), + spl_fixpt_div( + arg, + spl_fixpt_exp(res))); + + error = spl_fixpt_sub( + res, + res1); + + res = res1; + /* TODO determine max_allowed_error based on quality of exp() */ + } while (abs_i64(error.value) > 100ULL); + + return res; +} + + +/* this function is a generic helper to translate fixed point value to + * specified integer format that will consist of integer_bits integer part and + * fractional_bits fractional part. For example it is used in + * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional + * part in 32 bits. It is used in hw programming (scaler) + */ + +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + /* 1. create mask of integer part */ + unsigned int result = (1 << integer_bits) - 1; + /* 2. mask out fractional part */ + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; + /* 3. shrink fixed point integer part to be of integer_bits width*/ + result &= GET_INTEGER_PART(value); + /* 4. make space for fractional part to be filled in after integer */ + result <<= fractional_bits; + /* 5. shrink fixed point fractional part to of fractional_bits width*/ + fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; + /* 6. merge the result */ + return result | fractional_part; +} + +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) +{ + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); + + if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) + return (1 << (integer_bits + fractional_bits)) - 1; + else if (truncated_val > min_clamp) + return truncated_val; + else + return min_clamp; +} + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 4, 19); +} + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 3, 19); +} + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 2, 19); +} + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 0, 19); +} + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 14, 1); +} + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 10, 1); +} + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); + else + return ux_dy(arg.value, 4, 19); +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; + struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h new file mode 100644 index 000000000000..27ec6d416b7c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h @@ -0,0 +1,546 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __SPL_FIXED31_32_H__ +#define __SPL_FIXED31_32_H__ + +#include "os_types.h" +#include "spl_os_types.h" // swap +#ifndef ASSERT +#define ASSERT(_bool) ((void *)0) +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX 9223372036854775807ll +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1ll) +#endif + +#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 +#ifndef LLONG_MIN +#define LLONG_MIN (1LL<<63) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX (-1LL>>1) +#endif + +/* + * @brief + * Arithmetic operations on real numbers + * represented as fixed-point numbers. + * There are: 1 bit for sign, + * 31 bit for integer part, + * 32 bits for fractional part. + * + * @note + * Currently, overflows and underflows are asserted; + * no special result returned. + */ + +struct spl_fixed31_32 { + long long value; +}; + + +/* + * @brief + * Useful constants + */ + +static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; +static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; +static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; +static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; + +/* + * @brief + * Initialization routines + */ + +/* + * @brief + * result = numerator / denominator + */ +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); + +/* + * @brief + * result = arg + */ +static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) +{ + struct spl_fixed31_32 res; + + res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; + + return res; +} + +/* + * @brief + * Unary operators + */ + +/* + * @brief + * result = -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + res.value = -arg.value; + + return res; +} + +/* + * @brief + * result = abs(arg) := (arg >= 0) ? arg : -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return spl_fixpt_neg(arg); + else + return arg; +} + +/* + * @brief + * Binary relational operators + */ + +/* + * @brief + * result = arg1 < arg2 + */ +static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value < arg2.value; +} + +/* + * @brief + * result = arg1 <= arg2 + */ +static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value <= arg2.value; +} + +/* + * @brief + * result = arg1 == arg2 + */ +static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value == arg2.value; +} + +/* + * @brief + * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg1; + else + return arg2; +} + +/* + * @brief + * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 + */ +static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg2; + else + return arg1; +} + +/* + * @brief + * | min_value, when arg <= min_value + * result = | arg, when min_value < arg < max_value + * | max_value, when arg >= max_value + */ +static inline struct spl_fixed31_32 spl_fixpt_clamp( + struct spl_fixed31_32 arg, + struct spl_fixed31_32 min_value, + struct spl_fixed31_32 max_value) +{ + if (spl_fixpt_le(arg, min_value)) + return min_value; + else if (spl_fixpt_le(max_value, arg)) + return max_value; + else + return arg; +} + +/* + * @brief + * Binary shift operators + */ + +/* + * @brief + * result = arg << shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) +{ + ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || + ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); + + arg.value = arg.value << shift; + + return arg; +} + +/* + * @brief + * result = arg >> shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) +{ + bool negative = arg.value < 0; + + if (negative) + arg.value = -arg.value; + arg.value = arg.value >> shift; + if (negative) + arg.value = -arg.value; + return arg; +} + +/* + * @brief + * Binary additive operators + */ + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || + ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); + + res.value = arg1.value + arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || + ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); + + res.value = arg1.value - arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); +} + + +/* + * @brief + * Binary multiplicative operators + */ + +/* + * @brief + * result = arg1 * arg2 + */ +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); + + +/* + * @brief + * result = arg1 * arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = square(arg) := arg * arg + */ +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) +{ + return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); +} + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return spl_fixpt_from_fraction(arg1.value, arg2.value); +} + +/* + * @brief + * Reciprocal function + */ + +/* + * @brief + * result = reciprocal(arg) := 1 / arg + * + * @note + * No special actions taken in case argument is zero. + */ +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); + +/* + * @brief + * Trigonometric functions + */ + +/* + * @brief + * result = sinc(arg) := sin(arg) / arg + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); + +/* + * @brief + * result = sin(arg) + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); + +/* + * @brief + * result = cos(arg) + * + * @note + * Argument specified in radians + * and should be in [-2pi...2pi] range - + * passing arguments outside that range + * will cause incorrect result! + */ +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); + +/* + * @brief + * Transcendent functions + */ + +/* + * @brief + * result = exp(arg) + * + * @note + * Currently, function is verified for abs(arg) <= 1. + */ +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); + +/* + * @brief + * result = log(arg) + * + * @note + * Currently, abs(arg) should be less than 1. + * No normalization is done. + * Currently, no special actions taken + * in case of invalid argument(s). Take care! + */ +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); + +/* + * @brief + * Power function + */ + +/* + * @brief + * result = pow(arg1, arg2) + * + * @note + * Currently, abs(arg1) should be less than 1. Take care! + */ +static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value == 0) + return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; + + return spl_fixpt_exp( + spl_fixpt_mul( + spl_fixpt_log(arg1), + arg2)); +} + +/* + * @brief + * Rounding functions + */ + +/* + * @brief + * result = floor(arg) := greatest integer lower than or equal to arg + */ +static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = round(arg) := integer nearest to arg + */ +static inline int spl_fixpt_round(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_half.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = ceil(arg) := lowest integer greater than or equal to arg + */ +static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_one.value - + spl_fixpt_epsilon.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* the following two function are used in scaler hw programming to convert fixed + * point value to format 2 bits from integer part and 19 bits from fractional + * part. The same applies for u0d19, 0 bits from integer part and 19 bits from + * fractional + */ + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg); + +static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) +{ + bool negative = arg.value < 0; + + if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { + ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); + return arg; + } + + if (negative) + arg.value = -arg.value; + arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); + if (negative) + arg.value = -arg.value; + return arg; +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h new file mode 100644 index 000000000000..7ebea91c84f6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -0,0 +1,77 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _SPL_OS_TYPES_H_ +#define _SPL_OS_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +/* + * + * general debug capabilities + * + */ +// TODO: need backport +#define SPL_BREAK_TO_DEBUGGER() ASSERT(0) + +static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) +{ + return div_u64_rem(dividend, divisor, remainder); +} + +static inline uint64_t spl_div_u64(uint64_t dividend, uint32_t divisor) +{ + return div_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64(uint64_t dividend, uint64_t divisor) +{ + return div64_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64_rem(uint64_t dividend, uint64_t divisor, uint64_t *remainder) +{ + return div64_u64_rem(dividend, divisor, remainder); +} + +static inline int64_t spl_div64_s64(int64_t dividend, int64_t divisor) +{ + return div64_s64(dividend, divisor); +} + +#define spl_swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#ifndef spl_min +#define spl_min(a, b) (((a) < (b)) ? (a):(b)) +#endif + +#endif /* _SPL_OS_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..990fa1f19c22 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -531,4 +531,10 @@ static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigne return arg; } +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + #endif From 9932ca4c03c8181601f611dd366fceb765329800 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 12:55:26 -0400 Subject: [PATCH 0081/1523] drm/amd/display: Add P-State Keepout to dcn401 Global Sync [WHY&HOW] OTG has new functionality to allow P-State relative to VStartup. Keepout region for this should be configured based on DML outputs same as other global sync params. Reviewed-by: Alvin Lee Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dce110/dce110_timing_generator.c | 1 + .../dc/dce110/dce110_timing_generator.h | 1 + .../dc/dce110/dce110_timing_generator_v.c | 1 + .../dc/dce120/dce120_timing_generator.c | 1 + .../display/dc/dce60/dce60_timing_generator.c | 3 +- .../display/dc/dce80/dce80_timing_generator.c | 3 +- .../amd/display/dc/dml/display_mode_structs.h | 1 + .../dc/dml2/dml21/dml21_translation_helper.c | 1 + .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 1 + .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 4 ++- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 7 ++-- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/optc.h | 5 ++- .../amd/display/dc/inc/hw/timing_generator.h | 4 ++- .../amd/display/dc/optc/dcn10/dcn10_optc.c | 9 +++-- .../amd/display/dc/optc/dcn10/dcn10_optc.h | 7 +++- .../amd/display/dc/optc/dcn401/dcn401_optc.c | 36 ++++++++++++++++++- .../amd/display/dc/optc/dcn401/dcn401_optc.h | 6 +++- .../dc/resource/dce110/dce110_resource.c | 1 + .../dc/resource/dcn401/dcn401_resource.h | 3 +- 20 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 49bcfe6ec999..fa422a8cbced 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 28c58f1dff2d..ee4de740aceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index bf35dc65ca29..9837dec837ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index eb3557965781..fcf59348eb62 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index c1a85ee374d9..e5fb0e8333e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce60_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 2df4654858be..003a9330c286 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 410e4b671228..641a8cd019cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -523,6 +523,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_offset; unsigned int vupdate_width; unsigned int vready_offset; + unsigned int pstate_keepout; unsigned char interlaced; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index e9647f068ee4..1fce61323201 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1129,6 +1129,7 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 982b2d5bfb5f..849b41f886d3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1549,6 +1549,7 @@ static enum dc_status dce110_enable_stream_timing( 0, 0, 0, + 0, pipe_ctx->stream->signal, true); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..4846601c612d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1005,6 +1005,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -2995,7 +2996,8 @@ void dcn10_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 9a00479f0417..dd652436a539 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -909,6 +909,7 @@ enum dc_status dcn20_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1885,7 +1886,8 @@ static void dcn20_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); @@ -2458,7 +2460,8 @@ bool dcn20_update_bandwidth( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 31e0e9210dd7..d0b4308dca96 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -871,6 +871,7 @@ enum dc_status dcn401_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 287bf8a90ff6..03cbcbb36f1c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -65,6 +65,7 @@ struct optc { int vupdate_offset; int vupdate_width; int vready_offset; + int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; }; @@ -110,6 +111,7 @@ void optc1_program_timing(struct timing_generator *optc, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); @@ -127,7 +129,8 @@ void optc1_program_global_sync(struct timing_generator *optc, int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); bool optc1_disable_crtc(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 0f453452234c..3d4c8bd42b49 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -172,6 +172,7 @@ struct timing_generator_funcs { int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios ); @@ -256,7 +257,8 @@ struct timing_generator_funcs { int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 94427875bcdd..f00d27b7c6fe 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -65,7 +65,8 @@ void optc1_program_global_sync( int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width) + int vupdate_width, + int pstate_keepout) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -73,6 +74,7 @@ void optc1_program_global_sync( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); @@ -157,6 +159,7 @@ void optc1_program_timing( int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { @@ -177,6 +180,7 @@ void optc1_program_timing( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; patched_crtc_timing = *dc_crtc_timing; apply_front_porch_workaround(&patched_crtc_timing); optc1->orginal_patched_timing = patched_crtc_timing; @@ -282,7 +286,8 @@ void optc1_program_timing( vready_offset, vstartup_start, vupdate_offset, - vupdate_width); + vupdate_width, + pstate_keepout); optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 369a13244e5e..b7a57f98553d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -201,6 +201,7 @@ struct dcn_optc_registers { uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; uint32_t OPTC_CLOCK_CONTROL; uint32_t OPTC_WIDTH_CONTROL2; + uint32_t OTG_PSTATE_REGISTER; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -590,7 +591,11 @@ struct dcn_optc_registers { type OTG_V_COUNT_STOP_TIMER; #define TG_REG_FIELD_LIST_DCN401(type) \ - type OPTC_SEGMENT_WIDTH_LAST; + type OPTC_SEGMENT_WIDTH_LAST;\ + type OTG_PSTATE_KEEPOUT_START;\ + type OTG_PSTATE_EXTEND;\ + type OTG_UNBLANK;\ + type OTG_PSTATE_ALLOW_WIDTH_MIN; struct dcn_optc_shift { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 9f5c2efa7560..a5d6a7dca554 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -396,13 +396,47 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i } } +static void optc401_program_global_sync( + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + int pstate_keepout) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; + + if (optc1->vstartup_start == 0) { + BREAK_TO_DEBUGGER(); + return; + } + + REG_SET(OTG_VSTARTUP_PARAM, 0, + VSTARTUP_START, optc1->vstartup_start); + + REG_SET_2(OTG_VUPDATE_PARAM, 0, + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); + + REG_SET(OTG_VREADY_PARAM, 0, + VREADY_OFFSET, optc1->vready_offset); + + REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout); +} + static struct timing_generator_funcs dcn401_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, + .program_global_sync = optc401_program_global_sync, .enable_crtc = optc401_enable_crtc, .disable_crtc = optc401_disable_crtc, .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h index 3114ecef332a..bb13a645802d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -155,7 +155,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh) void dcn401_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index fe518fd27b08..91da5cf85b69 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -1163,6 +1163,7 @@ static struct pipe_ctx *dce110_acquire_underlay( 0, 0, 0, + 0, pipe_ctx->stream->signal, false); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..106008593464 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -534,7 +534,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN4_01_RI(id) \ From 0beca868cde8742240cd0038141c30482d2b7eb8 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 16:45:39 -0600 Subject: [PATCH 0082/1523] drm/amd/display: Check link_res->hpo_dp_link_enc before using it [WHAT & HOW] Functions dp_enable_link_phy and dp_disable_link_phy can pass link_res without initializing hpo_dp_link_enc and it is necessary to check for null before dereferencing. This fixes 2 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index e1257404357b..d0148f10dfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -28,6 +28,8 @@ #include "dccg.h" #include "clk_mgr.h" +#define DC_LOGGER link->ctx->logger + void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -124,6 +126,11 @@ void disable_hpo_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc); link_res->hpo_dp_link_enc->funcs->disable_link_phy( link_res->hpo_dp_link_enc, signal); From be1fb44389ca3038ad2430dac4234669bc177ee3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 10:35:52 -0600 Subject: [PATCH 0083/1523] drm/amd/display: Check null pointers before used [WHAT & HOW] Poniters, such as dc->clk_mgr, are null checked previously in the same function, so Coverity warns "implies that "dc->clk_mgr" might be null". As a result, these pointers need to be checked when used again. This fixes 10 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c | 3 ++- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 +++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 4 ++-- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 4 ++-- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 ++++---- .../amd/display/dc/link/protocols/link_dp_capability.c | 2 +- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 78df96882d6e..f8409453434c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -195,7 +195,7 @@ void dce11_pplib_apply_display_requirements( * , then change minimum memory clock based on real-time bandwidth * limitation. */ - if ((dc->ctx->asic_id.chip_family == FAMILY_AI) && + if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) && ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, (uint32_t) div64_s64( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index bf399819ca80..22ac2b7e49ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -749,7 +749,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 6bba020ad6fb..0637e4c552d8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -927,7 +927,8 @@ bool hubp2_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 849b41f886d3..4593fb2a0536 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -949,7 +949,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t pwrseq_instance; + uint8_t pwrseq_instance = 0; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1002,7 +1002,8 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - pwrseq_instance = link->panel_cntl->pwrseq_inst; + if (link->panel_cntl) + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 4846601c612d..212576dbc336 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1553,7 +1553,7 @@ void dcn10_init_hw(struct dc *dc) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); /* Align bw context with hw config when system resume. */ - if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { + if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz; dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz; } @@ -1673,7 +1673,7 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 746c522adf84..3d4b31bd9946 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -256,10 +256,10 @@ void dcn31_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..ddf0807db627 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -235,7 +235,7 @@ void dcn35_init_hw(struct dc *dc) if (hws->funcs.enable_power_gating_plane) hws->funcs.enable_power_gating_plane(dc->hwseq, true); */ - if (res_pool->hubbub->funcs->dchubbub_init) + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -328,10 +328,10 @@ void dcn35_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); @@ -1039,7 +1039,7 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->plane_res.hubp->inst] = false; - if (pipe_ctx->plane_res.dpp) + if (pipe_ctx->plane_res.dpp && pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->plane_res.hubp->inst] = false; if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index c257e733044a..f3b6d8936f91 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -2258,7 +2258,7 @@ bool dp_verify_link_cap_with_retries( memset(&link->verified_link_cap, 0, sizeof(struct dc_link_settings)); - if (!link_detect_connection_type(link, &type) || type == dc_connection_none) { + if (link->link_enc && (!link_detect_connection_type(link, &type) || type == dc_connection_none)) { link->verified_link_cap = fail_safe_link_settings; break; } else if (dp_verify_link_cap(link, known_limit_link_setting, &fail_count)) { From fdd5ecbbff751c3b9061d8ebb08e5c96119915b4 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 25 Jun 2024 10:37:35 -0600 Subject: [PATCH 0084/1523] drm/amd/display: Check null pointers before multiple uses [WHAT & HOW] Poniters, such as stream_enc and dc->bw_vbios, are null checked previously in the same function, so Coverity warns "implies that stream_enc and dc->bw_vbios might be null". They are used multiple times in the subsequent code and need to be checked. This fixes 10 FORWARD_NULL issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 96 ++++++++++--------- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 +- .../display/dc/link/accessories/link_dp_cts.c | 5 +- .../amd/display/dc/link/hwss/link_hwss_dio.c | 5 +- .../dc/resource/dce112/dce112_resource.c | 5 +- .../dc/resource/dcn32/dcn32_resource.c | 3 + .../resource/dcn32/dcn32_resource_helpers.c | 10 +- 7 files changed, 76 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 9e42a0128baa..5f9b6e8ef428 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -636,57 +636,59 @@ void hwss_build_fast_sequence(struct dc *dc, while (current_pipe) { current_mpc_pipe = current_pipe; while (current_mpc_pipe) { - if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; - block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; - (*num_steps)++; - } - if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; - block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; - block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; - (*num_steps)++; - } - if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { - if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - stream_status->mall_stream_config.type == SUBVP_MAIN) { - block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; - block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; - block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; - block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + if (current_mpc_pipe->plane_state) { + if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; + block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; + (*num_steps)++; + } + if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; + block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; + block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; + (*num_steps)++; + } + if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { + if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && + stream_status->mall_stream_config.type == SUBVP_MAIN) { + block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; + block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; + block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; + block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + (*num_steps)++; + } + + block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; + block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; (*num_steps)++; } - block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; - block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; - (*num_steps)++; - } + if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { + block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; + block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; + block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; + (*num_steps)++; + } - if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { - block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; - block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; - block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; - (*num_steps)++; - } - - if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { - block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { - block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_SETUP_DPP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { - block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; - (*num_steps)++; + if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { + block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { + block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_SETUP_DPP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { + block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; + (*num_steps)++; + } } if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) { block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index dd652436a539..bd7b186fb2e4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2284,6 +2284,9 @@ void dcn20_post_unlock_program_front_end( } } + if (!hwseq) + return; + /* P-State support transitions: * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) @@ -2291,7 +2294,7 @@ void dcn20_post_unlock_program_front_end( * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ - if (hwseq && hwseq->funcs.update_force_pstate) + if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); /* Only program the MALL registers after all the main and phantom pipes @@ -2531,6 +2534,9 @@ bool dcn20_wait_for_blank_complete( { int counter; + if (!opp) + return false; + for (counter = 0; counter < 1000; counter++) { if (!opp->funcs->dpg_is_pending(opp)) break; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 555c1c484cfd..df3781081da7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -804,8 +804,11 @@ bool dp_set_test_pattern( break; } + if (!pipe_ctx->stream) + return false; + if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b76737b7b9e4..3e47a6735912 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -74,7 +74,10 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; - if (stream_enc && stream_enc->funcs->disable_fifo) + if (!stream_enc) + return; + + if (stream_enc->funcs->disable_fifo) stream_enc->funcs->disable_fifo(stream_enc); if (stream_enc->funcs->set_input_mode) stream_enc->funcs->set_input_mode(stream_enc, 0); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 88afb2a30eef..162856c523e4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -1067,7 +1067,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels clks = {0}; int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + if (!dc->bw_vbios) + return; + + if (dc->bw_vbios->memory_type == bw_def_hbm) memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index ee009716d39b..6eaf3cfebcb7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1651,6 +1651,9 @@ static void dcn32_enable_phantom_plane(struct dc *dc, else phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + if (!phantom_plane) + continue; + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, sizeof(phantom_plane->scaling_quality)); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index d184105ce2b3..47c8a9fbe754 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -218,12 +218,12 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe) pipe->stream->timing.v_addressable != pipe->stream->src.height) { is_center_timing = true; } - } - if (pipe->plane_state) { - if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && - pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { - is_center_timing = true; + if (pipe->plane_state) { + if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && + pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { + is_center_timing = true; + } } } From 6d64d39486197083497a01b39e23f2f8474b35d3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 3 Jul 2024 10:50:35 -0600 Subject: [PATCH 0085/1523] drm/amd/display: Increase array size of dummy_boolean [WHY] dml2_core_shared_mode_support and dml_core_mode_support access the third element of dummy_boolean, i.e. hw_debug5 = &s->dummy_boolean[2], when dummy_boolean has size of 2. Any assignment to hw_debug5 causes an OVERRUN. [HOW] Increase dummy_boolean's array size to 3. This fixes 2 OVERRUN issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 02498c0e3282..317008eff61b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -866,7 +866,7 @@ struct dml2_core_calcs_mode_support_locals { unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; - bool dummy_boolean[2]; + bool dummy_boolean[3]; unsigned int dummy_integer[3]; unsigned int dummy_integer_array[36][DML2_MAX_PLANES]; enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES]; From 323b19f424486e61784e8797a8bfb2e24585e162 Mon Sep 17 00:00:00 2001 From: Fudongwang Date: Mon, 1 Jul 2024 16:47:28 +0800 Subject: [PATCH 0086/1523] drm/amd/display: add dmcub support check [Why & How] For DCN harvest case, if there is no dmcub support, we should return false to avoid bugcheck later. Reviewed-by: Aric Cyr Signed-off-by: Jerry Zuo Signed-off-by: Fudongwang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9897e322e2d5..f07b13ad4ead 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5574,6 +5574,9 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { + if (!dc->caps.dmcub_support) + return false; + switch (dc->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: From b8d3782da396215615c3d125f0829d1d06ad2c97 Mon Sep 17 00:00:00 2001 From: Mudimela Date: Tue, 2 Jul 2024 11:55:56 +0530 Subject: [PATCH 0087/1523] drm/amd/display: Refactoring DWB related files from dcn30 Files [Why] To refactor DWB related files from dcn30 Files [How] Moved DWB related files from dcn30 to specific DWB folder and updated Makefiles to fix Compilation. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Mudimela Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 +- drivers/gpu/drm/amd/display/dc/dwb/Makefile | 9 +++++++++ .../drm/amd/display/dc/{ => dwb}/dcn30/dcn30_cm_common.h | 0 .../gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.c | 0 .../gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.h | 0 .../drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb_cm.c | 0 drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c | 1 - 8 files changed, 10 insertions(+), 4 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_cm_common.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dwb}/dcn30/dcn30_dwb_cm.c (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 804851247acc..ccb4b21338b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -25,8 +25,6 @@ DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ - dcn30_dwb.o \ - dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index b8327237ed44..685702321d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -28,7 +28,7 @@ #include "reg_helper.h" #include "dcn30/dcn30_dpp.h" #include "basics/conversion.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "custom_float.h" #define REG(reg) reg diff --git a/drivers/gpu/drm/amd/display/dc/dwb/Makefile b/drivers/gpu/drm/amd/display/dc/dwb/Makefile index 16f7a454fed9..3952ba4cd508 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dwb/Makefile @@ -24,6 +24,15 @@ # ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN30 +############################################################################### +DWB_DCN30 = dcn30_dwb.o dcn30_dwb_cm.o + +AMD_DAL_DWB_DCN30 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn30/,$(DWB_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN30) + ############################################################################### # DCN35 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c rename to drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c index b23a809999ed..d5e8294f5a16 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c @@ -21,7 +21,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #include "reg_helper.h" #include "dcn35_dwb.h" From 08cbe68d4aafcf651a86730a485df53e6ee9f594 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 14:00:14 -0400 Subject: [PATCH 0088/1523] drm/amd/display: Export additional FAMS2 global configuration options from DML [WHY&HOW] Some global configuration options were previously hardcoded in DC, now they are exported by DML and sent to FW. Reviewed-by: Martin Leung Signed-off-by: Jerry Zuo Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_state.c | 4 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 36 ++--- .../amd/display/dc/dml2/dml21/dml21_utils.c | 145 ++++++++++-------- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 1 + .../src/dml2_core/dml2_core_dcn4_calcs.c | 17 +- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 4 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 2 +- 7 files changed, 112 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 665157f8d4cb..2597e3fd562b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -967,10 +967,10 @@ bool dc_state_is_fams2_in_use( bool is_fams2_in_use = false; if (state) - is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; if (dc->current_state) - is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; return is_fams2_in_use; } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ded13026c8ff..fb3391854eed 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1672,22 +1672,17 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - /* send global configuration parameters */ - global_cmd->config.global.max_allow_delay_us = 100 * 1000; //100ms - global_cmd->config.global.lock_wait_time_us = 5000; //5ms - global_cmd->config.global.recovery_timeout_us = 5000; //5ms - global_cmd->config.global.hwfq_flip_programming_delay_us = 100; //100us - - /* copy static feature configuration */ - global_cmd->config.global.features.all = dc->debug.fams2_config.all; - - /* apply feature configuration based on current driver state */ - global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; - global_cmd->config.global.features.bits.enable = enable; - - /* construct per-stream configs */ if (enable) { - for (i = 0; i < context->bw_ctx.bw.dcn.fams2_stream_count; i++) { + /* send global configuration parameters */ + memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config)); + + /* copy static feature configuration overrides */ + global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery; + global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug; + global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip; + + /* construct per-stream configs */ + for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; /* configure command header */ @@ -1702,12 +1697,15 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, } } - if (enable && context->bw_ctx.bw.dcn.fams2_stream_count) { + /* apply feature configuration based on current driver state */ + global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; + global_cmd->config.global.features.bits.enable = enable; + + if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ - global_cmd->config.global.num_streams = context->bw_ctx.bw.dcn.fams2_stream_count; global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_stream_count].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_stream_count; + cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 622c98f4b7fb..e11246e525ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -461,94 +461,103 @@ void dml21_build_fams2_programming(const struct dc *dc, struct dml2_context *dml_ctx) { int i, j, k; + unsigned int num_fams2_streams = 0; /* reset fams2 data */ - context->bw_ctx.bw.dcn.fams2_stream_count = 0; memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); - if (!dml_ctx->v21.mode_programming.programming->fams2_required) - return; + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; - for (i = 0; i < context->stream_count; i++) { - int dml_stream_idx; - struct dc_stream_state *phantom_stream; - struct dc_stream_status *phantom_status; + struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[context->bw_ctx.bw.dcn.fams2_stream_count]; + struct dc_stream_state *stream = context->streams[i]; - struct dc_stream_state *stream = context->streams[i]; - - if (context->stream_status[i].plane_count == 0 || - dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { - /* can ignore blanked or phantom streams */ - continue; - } - - dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); - if (dml_stream_idx < 0) { - ASSERT(dml_stream_idx >= 0); - continue; - } - - /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); - } + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; } - } - /* get per method programming */ - switch (static_state->type) { - case FAMS2_STREAM_TYPE_VBLANK: - case FAMS2_STREAM_TYPE_VACTIVE: - case FAMS2_STREAM_TYPE_DRR: - break; - case FAMS2_STREAM_TYPE_SUBVP: - phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); - if (!phantom_stream) - break; + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } - phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + /* copy static state from PMO */ + memcpy(static_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, + sizeof(struct dmub_fams2_stream_static_state)); - /* phantom status should always be present */ - ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + /* get information from context */ + static_state->num_planes = context->stream_status[i].plane_count; + static_state->otg_inst = context->stream_status[i].primary_otg_inst; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { for (k = 0; k < dc->res_pool->pipe_count; k++) { if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_state->pipe_mask |= (1 << k); + static_state->plane_pipe_masks[j] |= (1 << k); } } } - break; - default: - ASSERT(false); - break; - } - context->bw_ctx.bw.dcn.fams2_stream_count++; + /* get per method programming */ + switch (static_state->type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + break; + default: + ASSERT(false); + break; + } + + num_fams2_streams++; + } } - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; } bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index b7a6f7f4c342..8c803b12404b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -351,6 +351,7 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in programming->fams2_required = display_cfg->stage3.fams2_required; dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); + programming->fams2_global_config.features.bits.enable = display_cfg->stage3.fams2_required; } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 45e43a915fd6..3b1e5c548435 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12221,12 +12221,19 @@ void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_interna const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config) { - fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; - fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; - fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; - fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; - fams2_global_config->num_streams = display_cfg->display_config.num_streams; + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } } void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index d0b4308dca96..f4c1547a368f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1531,7 +1531,7 @@ void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool en if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) return; - fams2_required = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + fams2_required = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required); } @@ -1656,7 +1656,7 @@ void dcn401_hardware_release(struct dc *dc) */ if (dc->current_state) { if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0) && + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, true, true); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4c8e6436c7e1..bfb8b8502d20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -534,8 +534,8 @@ struct dcn_bw_output { unsigned int legacy_svp_drr_stream_index; bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct dmub_cmd_fams2_global_config fams2_global_config; struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; - unsigned fams2_stream_count; struct dml2_display_arb_regs arb_regs; }; From be7a6a5171649c39f79a6259518218351082bd99 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Wed, 3 Jul 2024 16:15:59 -0400 Subject: [PATCH 0089/1523] drm/amd/display: Check stream pointer is initialized before accessing [why & how] We calculate static screen wait frames based on the current timing info in the active stream. If stream is not initialized, then we should skip the calculation and go with the default values. Reviewed-by: Gabe Teeger Signed-off-by: Jerry Zuo Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ddf0807db627..ac1e3331a77c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1461,10 +1461,9 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, for (i = 0; i < num_pipes; i++) { if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; - struct dc *dc = pipe_ctx[i]->stream->ctx->dc; - - if (dc->debug.static_screen_wait_frames) { + if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); if (frame_rate >= 120 && dc->caps.ips_support && From acce1eb8eb5de360fd82fd900454ff180912d8b1 Mon Sep 17 00:00:00 2001 From: Daniel Sa Date: Thu, 4 Jul 2024 13:41:15 -0400 Subject: [PATCH 0090/1523] drm/amd/display: Set Cursor Matrix to bypass instead of Input Plane why: When the cursor disappears/reappears on fullscreen video, there is a short transitional period where the cursor's color matrix is using the same format as the video plane. This sets the cursor to the wrong color momentarily before the UI plane appears, correcting the color. how: Instead of defaulting to using the color space from the input plane, default to bypass mode. Reviewed-by: Nevenko Stupar Signed-off-by: Jerry Zuo Signed-off-by: Daniel Sa Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index d0f8c9ff5232..92b34fe47f74 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -246,16 +246,6 @@ void dpp401_set_cursor_matrix( enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix) { - struct dpp_input_csc_matrix cursor_tbl_entry; - unsigned int i; - - if (cursor_csc_color_matrix.enable_adjustment == true) { - for (i = 0; i < 12; i++) - cursor_tbl_entry.regval[i] = cursor_csc_color_matrix.matrix[i]; - - cursor_tbl_entry.color_space = color_space; - dpp401_program_cursor_csc(dpp_base, color_space, &cursor_tbl_entry); - } else { - dpp401_program_cursor_csc(dpp_base, color_space, NULL); - } + //Since we don't have cursor matrix information, force bypass mode by passing in unknown color space + dpp401_program_cursor_csc(dpp_base, COLOR_SPACE_UNKNOWN, NULL); } From 748b3c4ca0bf43cec186ace2ecb33457d7a4653d Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Thu, 27 Jun 2024 12:11:58 -0400 Subject: [PATCH 0091/1523] drm/amd/display: Add visual confirm for Idle State [Why] Visual Confirm would tell us if it ever entered idle state. [How] Add debug option for IPS visual confirm Reviewed-by: Ovidiu Bunea Signed-off-by: Jerry Zuo Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 2 ++ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++++ 4 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 83fe13f5a367..9d4b821ab219 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1056,6 +1056,7 @@ struct dc_debug_options { unsigned int force_sharpness; unsigned int force_lls; bool notify_dpia_hr_bw; + bool enable_ips_visual_confirm; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ccf153b7a467..0f3d15126a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -363,6 +363,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR; copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->debug.bitfields.force_full_frame_update = 0; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; if (psr_context->su_granularity_required == 0) copy_settings_data->su_y_granularity = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..44df9e2351c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -167,6 +167,8 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; + copy_settings_data->flags.u32All = 0; copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled); copy_settings_data->flags.bitfields.dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5ff0a865705f..7c3838362c49 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -336,6 +336,10 @@ union dmub_psr_debug_flags { */ uint32_t back_to_back_flip : 1; + /** + * Enable visual confirm for IPS + */ + uint32_t enable_ips_visual_confirm : 1; } bitfields; /** From f59549c7e705be0087d08bc116ccc767b86d8362 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 3 Jul 2024 16:41:52 -0400 Subject: [PATCH 0092/1523] drm/amd/display: free bo used for dmub bounding box fix a memleak introduced by not removing the buffer object for use with early dmub bounding box value storage Fixes: 234e94555800 ("drm/amd/display: Enable copying of bounding box data from VBIOS DMUB") Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Jerry Zuo Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a3b0f2748af0..fa8d455de7f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1740,7 +1740,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * /* Send the chunk */ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000); if (ret != DMUB_STATUS_OK) - /* No need to free bb here since it shall be done unconditionally */ + /* No need to free bb here since it shall be done in dm_sw_fini() */ return NULL; } @@ -2465,8 +2465,17 @@ static int dm_sw_init(void *handle) static int dm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct dal_allocation *da; + + list_for_each_entry(da, &adev->dm.da_list, list) { + if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) { + amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); + list_del(&da->list); + kfree(da); + break; + } + } - kfree(adev->dm.bb_from_dmub); adev->dm.bb_from_dmub = NULL; kfree(adev->dm.dmub_fb_info); From 48eba83018c14ba6c102f5801d31c398807535d9 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 4 Jul 2024 15:22:13 -0400 Subject: [PATCH 0093/1523] drm/amd/display: Remove unnecessary DSC power gating for DCN401 [Why] In some cases during topology changes, a pipe that was used to drive a stream being removed can be re-assigned to drive a different stream. In these cases, DSC power gating is not handled properly, leading to situations where DSC is being setup while power gated. [How] - remove enable_stream_gating and disable_stream_gating for DCN401 Reviewed-by: Wenjing Liu Signed-off-by: Jerry Zuo Signed-off-by: Joshua Aberback Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 6a768702c7bd..1439f07f0b64 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -115,8 +115,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, .did_underflow_occur = dcn10_did_underflow_occur, .init_blank = dcn32_init_blank, From 6b0a9bf72c04cd8d2864fccce42817c24b060aec Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 9 Jul 2024 14:34:25 -0600 Subject: [PATCH 0094/1523] drm/amd/display: Remove unused dml2_core_ip_params struct Acked-by: Jerry Zuo Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 78 ------------------- 1 file changed, 78 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 8c803b12404b..f5c6cd5cf5e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -77,84 +77,6 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .subvp_swath_height_margin_lines = 16, }; -struct dml2_core_ip_params core_dcn4sw_ip_caps_base = { - .vblank_nom_default_us = 668, - .remote_iommu_outstanding_translations = 256, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1280, - .config_return_buffer_segment_size_in_kbytes = 64, - .compressed_buffer_segment_size_in_kbytes = 64, - .dpte_buffer_size_in_pte_reqs_luma = 68, - .dpte_buffer_size_in_pte_reqs_chroma = 36, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, - .min_pixel_chunk_size_bytes = 1024, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 1171920, - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - - //Number of pipes after DCN Pipe harvesting - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .cursor_buffer_size = 24, - .cursor_chunk_size = 2, - .dispclk_delay_subtotal = 125, - .max_inter_dcn_tile_repeaters = 8, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_buffer_size = 0, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 5760, - .dsc422_native_support = true, - .dcc_supported = true, - .ptoi_supported = false, - - .cursor_64bpp_support = true, - .dynamic_metadata_vm_enabled = false, - - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .max_num_dp2p0_streams = 4, - .imall_supported = 1, - .max_flip_time_us = 80, - .words_per_channel = 16, - - .subvp_fw_processing_delay_us = 15, - .subvp_pstate_allow_width_us = 20, - .subvp_swath_height_margin_lines = 16, - - .dcn_mrq_present = 1, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_zs = 64, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - - .dchub_arb_to_ret_delay = 102, - .hostvm_mode = 1, -}; - static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) { ip_caps->pipe_count = ip_params->max_num_dpp; From 2d67c4b54909982d462bfe227279d1499b329545 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 8 Jul 2024 10:03:28 -0400 Subject: [PATCH 0095/1523] drm/amd/display: 3.2.292 * FW Release 0.0.225.0 * DML2 fixes * Allow display DCC for DCN401 * Refactor DWB, OPP, MPC, MMHUBBUB * Fix dscclk Programming issue on DCN401 Acked-by: Rodrigo Siqueira Signed-off-by: Jerry Zuo Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9d4b821ab219..036b23a6e324 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.291" +#define DC_VER "3.2.292" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 60a9472cf0a8568f32be6cbbd32ee7759bffe215 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:17 -0400 Subject: [PATCH 0096/1523] drm/radeon: change variable name "dev" to "ddev" for consistency In the probe function of amdgpu, it uses "ddev" as the name of "struct drm_device *", so I suggest renaming it to be consistent. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7bf08164140e..739bb1da9dcc 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -259,7 +259,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; - struct drm_device *dev; + struct drm_device *ddev; int ret; if (!ent) @@ -300,28 +300,28 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - dev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); + ddev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); ret = pci_enable_device(pdev); if (ret) goto err_free; - pci_set_drvdata(pdev, dev); + pci_set_drvdata(pdev, ddev); - ret = drm_dev_register(dev, ent->driver_data); + ret = drm_dev_register(ddev, ent->driver_data); if (ret) goto err_agp; - radeon_fbdev_setup(dev->dev_private); + radeon_fbdev_setup(ddev->dev_private); return 0; err_agp: pci_disable_device(pdev); err_free: - drm_dev_put(dev); + drm_dev_put(ddev); return ret; } From 90985660ba488cd3428706e7d53d6c9cdbbf3101 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:18 -0400 Subject: [PATCH 0097/1523] drm/radeon: remove load callback from kms_driver The ".load" callback in "struct drm_driver" is deprecated. In order to remove the callback, we have to manually call "radeon_driver_load_kms" instead. Acked-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 739bb1da9dcc..88d3de2a79f8 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -310,6 +310,10 @@ static int radeon_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + ret = radeon_driver_load_kms(ddev, flags); + if (ret) + goto err_agp; + ret = drm_dev_register(ddev, ent->driver_data); if (ret) goto err_agp; @@ -569,7 +573,6 @@ static const struct drm_ioctl_desc radeon_ioctls_kms[] = { static const struct drm_driver kms_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET, - .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, .unload = radeon_driver_unload_kms, From 78dd6a8d33a3363fb4196e173be0eb4002962798 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:19 -0400 Subject: [PATCH 0098/1523] drm/radeon: use variable flags as parameter To be consistent with amdgpu driver, use "flags" as the parameter because it is already assigned as "ent->driver_data". Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88d3de2a79f8..7b8aa8406751 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -314,7 +314,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) goto err_agp; - ret = drm_dev_register(ddev, ent->driver_data); + ret = drm_dev_register(ddev, flags); if (ret) goto err_agp; From a6e23bec8ed184ed2a11080b28cdbd7a3024f0c0 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:20 -0400 Subject: [PATCH 0099/1523] drm/radeon: add helper rdev_to_drm(rdev) Add helper rdev_to_drm(rdev), similar to amdgpu, most function should access the "drm_device" with "rdev_to_drm(rdev)" instead, where amdgpu has "adev_to_drm(adev)". It also makes changing from "*drm_device" to "drm_device" in "radeon_devicce" later on easier. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0999c8eaae94..ae35c102a487 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2476,6 +2476,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index); void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); +static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) +{ + return rdev->ddev; +} + /* * Cast helper */ From fb1b5e1dd53fc834e12f69749cbc8484382599c4 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:21 -0400 Subject: [PATCH 0100/1523] drm/radeon: change rdev->ddev to rdev_to_drm(rdev) This patch changes the way "drm_device" is accessed. It uses "rdev_to_drm(rdev)" instead of accessing the struct member directly. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_encoders.c | 2 +- drivers/gpu/drm/radeon/cik.c | 14 ++-- drivers/gpu/drm/radeon/dce6_afmt.c | 2 +- drivers/gpu/drm/radeon/evergreen.c | 12 ++-- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r100.c | 24 +++---- drivers/gpu/drm/radeon/r300.c | 6 +- drivers/gpu/drm/radeon/r420.c | 6 +- drivers/gpu/drm/radeon/r520.c | 2 +- drivers/gpu/drm/radeon/r600.c | 12 ++-- drivers/gpu/drm/radeon/r600_cs.c | 2 +- drivers/gpu/drm/radeon/r600_dpm.c | 4 +- drivers/gpu/drm/radeon/r600_hdmi.c | 2 +- drivers/gpu/drm/radeon/radeon_acpi.c | 10 +-- drivers/gpu/drm/radeon/radeon_agp.c | 2 +- drivers/gpu/drm/radeon/radeon_atombios.c | 2 +- drivers/gpu/drm/radeon/radeon_audio.c | 4 +- drivers/gpu/drm/radeon/radeon_combios.c | 12 ++-- drivers/gpu/drm/radeon/radeon_device.c | 10 +-- drivers/gpu/drm/radeon/radeon_display.c | 74 +++++++++++----------- drivers/gpu/drm/radeon/radeon_fbdev.c | 26 ++++---- drivers/gpu/drm/radeon/radeon_fence.c | 8 +-- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_i2c.c | 2 +- drivers/gpu/drm/radeon/radeon_ib.c | 2 +- drivers/gpu/drm/radeon/radeon_irq_kms.c | 12 ++-- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_pm.c | 20 +++--- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 6 +- drivers/gpu/drm/radeon/rs400.c | 6 +- drivers/gpu/drm/radeon/rs600.c | 14 ++-- drivers/gpu/drm/radeon/rs690.c | 2 +- drivers/gpu/drm/radeon/rv515.c | 4 +- drivers/gpu/drm/radeon/rv770.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 +- 36 files changed, 159 insertions(+), 159 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 03e6871b3065..c82e0fbc49b4 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2179,7 +2179,7 @@ assigned: void radeon_atom_encoder_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_encoder *encoder; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b5e96a8fc2c1..11a492f21157 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7585,7 +7585,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7615,7 +7615,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7645,7 +7645,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); + drm_handle_vblank(rdev_to_drm(rdev), 2); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7675,7 +7675,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); + drm_handle_vblank(rdev_to_drm(rdev), 3); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7705,7 +7705,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); + drm_handle_vblank(rdev_to_drm(rdev), 4); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7735,7 +7735,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); + drm_handle_vblank(rdev_to_drm(rdev), 5); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -8581,7 +8581,7 @@ int cik_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4c06f47453fd..d6ab93ed9ec4 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -91,7 +91,7 @@ struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) pin = &rdev->audio.pin[i]; pin_count = 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c634dc28e6c3..bc4ab71613a5 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1673,7 +1673,7 @@ void evergreen_pm_misc(struct radeon_device *rdev) */ void evergreen_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1698,7 +1698,7 @@ void evergreen_pm_prepare(struct radeon_device *rdev) */ void evergreen_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1763,7 +1763,7 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev, */ void evergreen_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | @@ -1804,7 +1804,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) */ void evergreen_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disabled = 0; @@ -4753,7 +4753,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -5211,7 +5211,7 @@ int evergreen_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 77aee99e473a..3890911fe693 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2360,7 +2360,7 @@ int cayman_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize memory controller */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 0b1e19345f43..d7d7d23bf9a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -459,7 +459,7 @@ void r100_pm_misc(struct radeon_device *rdev) */ void r100_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -490,7 +490,7 @@ void r100_pm_prepare(struct radeon_device *rdev) */ void r100_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -603,7 +603,7 @@ void r100_hpd_set_polarity(struct radeon_device *rdev, */ void r100_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -626,7 +626,7 @@ void r100_hpd_init(struct radeon_device *rdev) */ void r100_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -798,7 +798,7 @@ int r100_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (status & RADEON_CRTC_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -807,7 +807,7 @@ int r100_irq_process(struct radeon_device *rdev) } if (status & RADEON_CRTC2_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1471,7 +1471,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header = radeon_get_ib_value(p, h_idx); crtc_id = radeon_get_ib_value(p, h_idx + 5); reg = R100_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; @@ -3059,7 +3059,7 @@ DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info); void r100_debugfs_rbbm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_rbbm_info", 0444, root, rdev, &r100_debugfs_rbbm_info_fops); @@ -3069,7 +3069,7 @@ void r100_debugfs_rbbm_init(struct radeon_device *rdev) void r100_debugfs_cp_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_cp_ring_info", 0444, root, rdev, &r100_debugfs_cp_ring_info_fops); @@ -3081,7 +3081,7 @@ void r100_debugfs_cp_init(struct radeon_device *rdev) void r100_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_mc_info", 0444, root, rdev, &r100_debugfs_mc_info_fops); @@ -3947,7 +3947,7 @@ int r100_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r100_clock_startup(rdev); /* Initialize surface registers */ @@ -4056,7 +4056,7 @@ int r100_init(struct radeon_device *rdev) /* Set asic errata */ r100_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 1620f534f55f..05c13102a8cb 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -616,7 +616,7 @@ DEFINE_SHOW_ATTRIBUTE(rv370_debugfs_pcie_gart_info); static void rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv370_pcie_gart_info", 0444, root, rdev, &rv370_debugfs_pcie_gart_info_fops); @@ -1452,7 +1452,7 @@ int r300_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -1538,7 +1538,7 @@ int r300_init(struct radeon_device *rdev) /* Set asic errata */ r300_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index a979662eaa73..9a31cdec6415 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -322,7 +322,7 @@ int r420_resume(struct radeon_device *rdev) if (rdev->is_atom_bios) { atom_asic_init(rdev->mode_info.atom_context); } else { - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); } /* Resume clock after posting */ r420_clock_resume(rdev); @@ -414,7 +414,7 @@ int r420_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); @@ -493,7 +493,7 @@ DEFINE_SHOW_ATTRIBUTE(r420_debugfs_pipes_info); void r420_debugfs_pipes_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r420_pipes_info", 0444, root, rdev, &r420_debugfs_pipes_info_fops); diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 6cbcaa845192..08e127b3249a 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -287,7 +287,7 @@ int r520_init(struct radeon_device *rdev) atom_asic_init(rdev->mode_info.atom_context); } /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 087d41e370fd..8b62f7faa5b9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -950,7 +950,7 @@ void r600_hpd_set_polarity(struct radeon_device *rdev, void r600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -1017,7 +1017,7 @@ void r600_hpd_init(struct radeon_device *rdev) void r600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -3280,7 +3280,7 @@ int r600_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); if (rdev->flags & RADEON_IS_AGP) { @@ -4136,7 +4136,7 @@ restart_ih: DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4166,7 +4166,7 @@ restart_ih: DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4358,7 +4358,7 @@ DEFINE_SHOW_ATTRIBUTE(r600_debugfs_mc_info); static void r600_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r600_mc_info", 0444, root, rdev, &r600_debugfs_mc_info_fops); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 6cf54a747749..1b2d31c4d77c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -884,7 +884,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1); reg = R600_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index 64980a61d38a..81d58ef667dd 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -153,7 +153,7 @@ void r600_dpm_print_ps_status(struct radeon_device *rdev, u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vblank_in_pixels; @@ -180,7 +180,7 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vrefresh = 0; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f3551ebaa2f0..661f374f5f27 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -116,7 +116,7 @@ void r600_audio_update_hdmi(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, audio_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 603a78e41ba5..22ce61bdfc06 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -405,11 +405,11 @@ static int radeon_atif_handler(struct radeon_device *rdev, if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if ((rdev->flags & RADEON_IS_PX) && radeon_atpx_dgpu_req_power_for_displays()) { - pm_runtime_get_sync(rdev->ddev->dev); + pm_runtime_get_sync(rdev_to_drm(rdev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ - drm_helper_hpd_irq_event(rdev->ddev); - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + drm_helper_hpd_irq_event(rdev_to_drm(rdev)); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); } } /* TODO: check other events */ @@ -736,7 +736,7 @@ int radeon_acpi_init(struct radeon_device *rdev) struct radeon_encoder *target = NULL; /* Find the encoder controlling the brightness */ - list_for_each_entry(tmp, &rdev->ddev->mode_config.encoder_list, + list_for_each_entry(tmp, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { struct radeon_encoder *enc = to_radeon_encoder(tmp); diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c index a3d749e350f9..89d7b0e9e79f 100644 --- a/drivers/gpu/drm/radeon/radeon_agp.c +++ b/drivers/gpu/drm/radeon/radeon_agp.c @@ -161,7 +161,7 @@ struct radeon_agp_head *radeon_agp_head_init(struct drm_device *dev) static int radeon_agp_head_acquire(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); if (!rdev->agp) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 10793a433bf5..97c4e10d0550 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -187,7 +187,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) if (i2c.valid) { sprintf(stmp, "0x%x", i2c.i2c_id); - rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp); + rdev->i2c_bus[i] = radeon_i2c_create(rdev_to_drm(rdev), &i2c, stmp); } gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 0bcd767b9f47..47aa06a9a942 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -196,7 +196,7 @@ static void radeon_audio_enable(struct radeon_device *rdev, return; if (rdev->mode_info.mode_config_initialized) { - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; @@ -760,7 +760,7 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) return 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (!radeon_encoder_is_digital(encoder)) continue; radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 6952b1273b0f..41ddc576f8f8 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -372,7 +372,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) int edid_info, size; struct edid *edid; unsigned char *raw; - edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE); + edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) return false; @@ -642,7 +642,7 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; u16 offset; u8 id, blocks, clk, data; @@ -670,7 +670,7 @@ static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct r void radeon_combios_i2c_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; /* actual hw pads @@ -812,7 +812,7 @@ bool radeon_combios_get_clock_info(struct drm_device *dev) bool radeon_combios_sideport_present(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 igp_info; /* sideport is AMD only */ @@ -915,7 +915,7 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); uint16_t tv_info; enum radeon_tv_std tv_std = TV_STD_NTSC; @@ -2637,7 +2637,7 @@ static const char *thermal_controller_names[] = { void radeon_combios_get_power_modes(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 offset, misc, misc2 = 0; u8 rev, tmp; int state_index = 0; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..32851632643d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -760,7 +760,7 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev) if (rdev->is_atom_bios) atom_asic_init(rdev->mode_info.atom_context); else - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); return true; } else { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); @@ -980,7 +980,7 @@ int radeon_atombios_init(struct radeon_device *rdev) return -ENOMEM; rdev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = rdev->ddev; + atom_card_info->dev = rdev_to_drm(rdev); atom_card_info->reg_read = cail_reg_read; atom_card_info->reg_write = cail_reg_write; /* needed for iio ops */ @@ -1005,7 +1005,7 @@ int radeon_atombios_init(struct radeon_device *rdev) mutex_init(&rdev->mode_info.atom_context->mutex); mutex_init(&rdev->mode_info.atom_context->scratch_mutex); - radeon_atom_initialize_bios_scratch_regs(rdev->ddev); + radeon_atom_initialize_bios_scratch_regs(rdev_to_drm(rdev)); atom_allocate_fb_scratch(rdev->mode_info.atom_context); return 0; } @@ -1049,7 +1049,7 @@ void radeon_atombios_fini(struct radeon_device *rdev) */ int radeon_combios_init(struct radeon_device *rdev) { - radeon_combios_initialize_bios_scratch_regs(rdev->ddev); + radeon_combios_initialize_bios_scratch_regs(rdev_to_drm(rdev)); return 0; } @@ -1847,7 +1847,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) downgrade_write(&rdev->exclusive_lock); - drm_helper_resume_force_mode(rdev->ddev); + drm_helper_resume_force_mode(rdev_to_drm(rdev)); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 843383f7237f..10fd58f400bc 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -302,13 +302,13 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) if ((radeon_use_pflipirq == 2) && ASIC_IS_DCE4(rdev)) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -334,7 +334,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ if (update_pending && (DRM_SCANOUTPOS_VALID & - radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc_id, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && @@ -347,7 +347,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ update_pending = 0; } - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); if (!update_pending) radeon_crtc_handle_flip(rdev, crtc_id); } @@ -370,14 +370,14 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (radeon_crtc == NULL) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); work = radeon_crtc->flip_work; if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -389,7 +389,7 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (work->event) drm_crtc_send_vblank_event(&radeon_crtc->base, work->event); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); drm_crtc_vblank_put(&radeon_crtc->base); radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id); @@ -408,7 +408,7 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_flip_work *work = container_of(__work, struct radeon_flip_work, flip_work); struct radeon_device *rdev = work->rdev; - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; @@ -1401,7 +1401,7 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (rdev->is_atom_bios) { rdev->mode_info.coherent_mode_property = - drm_property_create_range(rdev->ddev, 0 , "coherent", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "coherent", 0, 1); if (!rdev->mode_info.coherent_mode_property) return -ENOMEM; } @@ -1409,57 +1409,57 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!ASIC_IS_AVIVO(rdev)) { sz = ARRAY_SIZE(radeon_tmds_pll_enum_list); rdev->mode_info.tmds_pll_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tmds_pll", radeon_tmds_pll_enum_list, sz); } rdev->mode_info.load_detect_property = - drm_property_create_range(rdev->ddev, 0, "load detection", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "load detection", 0, 1); if (!rdev->mode_info.load_detect_property) return -ENOMEM; - drm_mode_create_scaling_mode_property(rdev->ddev); + drm_mode_create_scaling_mode_property(rdev_to_drm(rdev)); sz = ARRAY_SIZE(radeon_tv_std_enum_list); rdev->mode_info.tv_std_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tv standard", radeon_tv_std_enum_list, sz); sz = ARRAY_SIZE(radeon_underscan_enum_list); rdev->mode_info.underscan_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "underscan", radeon_underscan_enum_list, sz); rdev->mode_info.underscan_hborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan hborder", 0, 128); if (!rdev->mode_info.underscan_hborder_property) return -ENOMEM; rdev->mode_info.underscan_vborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan vborder", 0, 128); if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; sz = ARRAY_SIZE(radeon_audio_enum_list); rdev->mode_info.audio_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "audio", radeon_audio_enum_list, sz); sz = ARRAY_SIZE(radeon_dither_enum_list); rdev->mode_info.dither_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "dither", radeon_dither_enum_list, sz); sz = ARRAY_SIZE(radeon_output_csc_enum_list); rdev->mode_info.output_csc_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "output_csc", radeon_output_csc_enum_list, sz); @@ -1578,29 +1578,29 @@ int radeon_modeset_init(struct radeon_device *rdev) int i; int ret; - drm_mode_config_init(rdev->ddev); + drm_mode_config_init(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = true; - rdev->ddev->mode_config.funcs = &radeon_mode_funcs; + rdev_to_drm(rdev)->mode_config.funcs = &radeon_mode_funcs; if (radeon_use_pflipirq == 2 && rdev->family >= CHIP_R600) - rdev->ddev->mode_config.async_page_flip = true; + rdev_to_drm(rdev)->mode_config.async_page_flip = true; if (ASIC_IS_DCE5(rdev)) { - rdev->ddev->mode_config.max_width = 16384; - rdev->ddev->mode_config.max_height = 16384; + rdev_to_drm(rdev)->mode_config.max_width = 16384; + rdev_to_drm(rdev)->mode_config.max_height = 16384; } else if (ASIC_IS_AVIVO(rdev)) { - rdev->ddev->mode_config.max_width = 8192; - rdev->ddev->mode_config.max_height = 8192; + rdev_to_drm(rdev)->mode_config.max_width = 8192; + rdev_to_drm(rdev)->mode_config.max_height = 8192; } else { - rdev->ddev->mode_config.max_width = 4096; - rdev->ddev->mode_config.max_height = 4096; + rdev_to_drm(rdev)->mode_config.max_width = 4096; + rdev_to_drm(rdev)->mode_config.max_height = 4096; } - rdev->ddev->mode_config.preferred_depth = 24; - rdev->ddev->mode_config.prefer_shadow = 1; + rdev_to_drm(rdev)->mode_config.preferred_depth = 24; + rdev_to_drm(rdev)->mode_config.prefer_shadow = 1; - rdev->ddev->mode_config.fb_modifiers_not_supported = true; + rdev_to_drm(rdev)->mode_config.fb_modifiers_not_supported = true; ret = radeon_modeset_create_props(rdev); if (ret) { @@ -1618,11 +1618,11 @@ int radeon_modeset_init(struct radeon_device *rdev) /* allocate crtcs */ for (i = 0; i < rdev->num_crtc; i++) { - radeon_crtc_init(rdev->ddev, i); + radeon_crtc_init(rdev_to_drm(rdev), i); } /* okay we should have all the bios connectors */ - ret = radeon_setup_enc_conn(rdev->ddev); + ret = radeon_setup_enc_conn(rdev_to_drm(rdev)); if (!ret) { return ret; } @@ -1639,7 +1639,7 @@ int radeon_modeset_init(struct radeon_device *rdev) /* setup afmt */ radeon_afmt_init(rdev); - drm_kms_helper_poll_init(rdev->ddev); + drm_kms_helper_poll_init(rdev_to_drm(rdev)); /* do pm late init */ ret = radeon_pm_late_init(rdev); @@ -1650,11 +1650,11 @@ int radeon_modeset_init(struct radeon_device *rdev) void radeon_modeset_fini(struct radeon_device *rdev) { if (rdev->mode_info.mode_config_initialized) { - drm_kms_helper_poll_fini(rdev->ddev); + drm_kms_helper_poll_fini(rdev_to_drm(rdev)); radeon_hpd_fini(rdev); - drm_helper_force_disable_all(rdev->ddev); + drm_helper_force_disable_all(rdev_to_drm(rdev)); radeon_afmt_fini(rdev); - drm_mode_config_cleanup(rdev->ddev); + drm_mode_config_cleanup(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = false; } diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index 02bf25759059..fb70de29545c 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -67,7 +67,7 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev->ddev, mode_cmd); + info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -148,15 +148,15 @@ static int radeon_fbdev_fb_open(struct fb_info *info, int user) struct radeon_device *rdev = fb_helper->dev->dev_private; int ret; - ret = pm_runtime_get_sync(rdev->ddev->dev); + ret = pm_runtime_get_sync(rdev_to_drm(rdev)->dev); if (ret < 0 && ret != -EACCES) goto err_pm_runtime_mark_last_busy; return 0; err_pm_runtime_mark_last_busy: - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return ret; } @@ -165,8 +165,8 @@ static int radeon_fbdev_fb_release(struct fb_info *info, int user) struct drm_fb_helper *fb_helper = info->par; struct radeon_device *rdev = fb_helper->dev->dev_private; - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return 0; } @@ -236,7 +236,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev->ddev, fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; @@ -374,12 +374,12 @@ void radeon_fbdev_setup(struct radeon_device *rdev) fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); if (!fb_helper) return; - drm_fb_helper_prepare(rdev->ddev, fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); + drm_fb_helper_prepare(rdev_to_drm(rdev), fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); - ret = drm_client_init(rdev->ddev, &fb_helper->client, "radeon-fbdev", + ret = drm_client_init(rdev_to_drm(rdev), &fb_helper->client, "radeon-fbdev", &radeon_fbdev_client_funcs); if (ret) { - drm_err(rdev->ddev, "Failed to register client: %d\n", ret); + drm_err(rdev_to_drm(rdev), "Failed to register client: %d\n", ret); goto err_drm_client_init; } @@ -394,13 +394,13 @@ err_drm_client_init: void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state) { - if (rdev->ddev->fb_helper) - drm_fb_helper_set_suspend(rdev->ddev->fb_helper, state); + if (rdev_to_drm(rdev)->fb_helper) + drm_fb_helper_set_suspend(rdev_to_drm(rdev)->fb_helper, state); } bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) { - struct drm_fb_helper *fb_helper = rdev->ddev->fb_helper; + struct drm_fb_helper *fb_helper = rdev_to_drm(rdev)->fb_helper; struct drm_gem_object *gobj; if (!fb_helper) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4fb780d96f32..daff61586be5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -150,7 +150,7 @@ int radeon_fence_emit(struct radeon_device *rdev, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); - trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + trace_radeon_fence_emit(rdev_to_drm(rdev), ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } @@ -489,7 +489,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, if (!target_seq[i]) continue; - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_begin(rdev_to_drm(rdev), i, target_seq[i]); radeon_irq_kms_sw_irq_get(rdev, i); } @@ -511,7 +511,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, continue; radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_end(rdev_to_drm(rdev), i, target_seq[i]); } return r; @@ -995,7 +995,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops, void radeon_debugfs_fence_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gpu_reset", 0444, root, rdev, &radeon_debugfs_gpu_reset_fops); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e66a230331ee..210e8d43bb23 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -899,7 +899,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_gem_info); void radeon_gem_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gem_info", 0444, root, rdev, &radeon_debugfs_gem_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 3d174390a8af..1f16619ed06e 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -1011,7 +1011,7 @@ void radeon_i2c_add(struct radeon_device *rdev, struct radeon_i2c_bus_rec *rec, const char *name) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int i; for (i = 0; i < RADEON_MAX_I2C_BUS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 63d914f3414d..1aa41cc3f991 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -309,7 +309,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_sa_info); static void radeon_debugfs_sa_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_sa_info", 0444, root, rdev, &radeon_debugfs_sa_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index c4dda908666c..9961251b44ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -80,7 +80,7 @@ static void radeon_hotplug_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, hotplug_work.work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -101,7 +101,7 @@ static void radeon_dp_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, dp_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -197,7 +197,7 @@ static void radeon_driver_irq_uninstall_kms(struct drm_device *dev) static int radeon_irq_install(struct radeon_device *rdev, int irq) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int ret; if (irq == IRQ_NOTCONNECTED) @@ -218,7 +218,7 @@ static int radeon_irq_install(struct radeon_device *rdev, int irq) static void radeon_irq_uninstall(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); radeon_driver_irq_uninstall_kms(dev); @@ -322,9 +322,9 @@ int radeon_irq_kms_init(struct radeon_device *rdev) spin_lock_init(&rdev->irq.lock); /* Disable vblank irqs aggressively for power-saving */ - rdev->ddev->vblank_disable_immediate = true; + rdev_to_drm(rdev)->vblank_disable_immediate = true; - r = drm_vblank_init(rdev->ddev, rdev->num_crtc); + r = drm_vblank_init(rdev_to_drm(rdev), rdev->num_crtc); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a955f8a2f7fe..450ff7daa46c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -150,7 +150,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size); + drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2d9d9f46f243..b4fb7e70320b 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -282,7 +282,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.active_crtcs & (1 << i)) { /* This can fail if a modeset is in progress */ if (drm_crtc_vblank_get(crtc) == 0) @@ -299,7 +299,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.req_vblank & (1 << i)) { rdev->pm.req_vblank &= ~(1 << i); drm_crtc_vblank_put(crtc); @@ -671,7 +671,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); int temp; /* Can't get temperature when the card is off */ @@ -715,7 +715,7 @@ static ssize_t radeon_hwmon_show_sclk(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u32 sclk = 0; /* Can't get clock frequency when the card is off */ @@ -740,7 +740,7 @@ static ssize_t radeon_hwmon_show_vddc(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u16 vddc = 0; /* Can't get vddc when the card is off */ @@ -1692,7 +1692,7 @@ void radeon_pm_fini(struct radeon_device *rdev) static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; @@ -1765,7 +1765,7 @@ static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; struct radeon_connector *radeon_connector; @@ -1826,7 +1826,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev) */ for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { if (rdev->pm.active_crtcs & (1 << crtc)) { - vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, + vbl_status = radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc, USE_REAL_VBLANKSTART, &vpos, &hpos, NULL, NULL, @@ -1918,7 +1918,7 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work) static int radeon_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct radeon_device *rdev = m->private; - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); if ((rdev->flags & RADEON_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { @@ -1955,7 +1955,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_pm_info); static void radeon_debugfs_pm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_pm_info", 0444, root, rdev, &radeon_debugfs_pm_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8d1d458286a8..581ae20c46e4 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -550,7 +550,7 @@ static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_r { #if defined(CONFIG_DEBUG_FS) const char *ring_name = radeon_debugfs_ring_idx_to_name(ring->idx); - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; if (ring_name) debugfs_create_file(ring_name, 0444, root, ring, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5c65b6dfb99a..69d0c12fa419 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -682,8 +682,8 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev, - rdev->ddev->anon_inode->i_mapping, - rdev->ddev->vma_offset_manager, + rdev_to_drm(rdev)->anon_inode->i_mapping, + rdev_to_drm(rdev)->vma_offset_manager, rdev->need_swiotlb, dma_addressing_limited(&rdev->pdev->dev)); if (r) { @@ -890,7 +890,7 @@ static const struct file_operations radeon_ttm_gtt_fops = { static void radeon_ttm_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct drm_minor *minor = rdev->ddev->primary; + struct drm_minor *minor = rdev_to_drm(rdev)->primary; struct dentry *root = minor->debugfs_root; debugfs_create_file("radeon_vram", 0444, root, rdev, diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d4d1501e6576..d6c18fd740ec 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -379,7 +379,7 @@ DEFINE_SHOW_ATTRIBUTE(rs400_debugfs_gart_info); static void rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rs400_gart_info", 0444, root, rdev, &rs400_debugfs_gart_info_fops); @@ -474,7 +474,7 @@ int rs400_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -552,7 +552,7 @@ int rs400_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs400_mc_init(rdev); /* Fence driver */ diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 5c162778899b..88c8e91ea651 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -321,7 +321,7 @@ void rs600_pm_misc(struct radeon_device *rdev) void rs600_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -339,7 +339,7 @@ void rs600_pm_prepare(struct radeon_device *rdev) void rs600_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -408,7 +408,7 @@ void rs600_hpd_set_polarity(struct radeon_device *rdev, void rs600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -435,7 +435,7 @@ void rs600_hpd_init(struct radeon_device *rdev) void rs600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -797,7 +797,7 @@ int rs600_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -806,7 +806,7 @@ int rs600_irq_process(struct radeon_device *rdev) } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1133,7 +1133,7 @@ int rs600_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs600_mc_init(rdev); r100_debugfs_rbbm_init(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 14fb0819b8c1..016eb4992803 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -845,7 +845,7 @@ int rs690_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs690_mc_init(rdev); rv515_debugfs(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index bbc6ccabf788..1b4dfb645585 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -255,7 +255,7 @@ DEFINE_SHOW_ATTRIBUTE(rv515_debugfs_ga_info); void rv515_debugfs(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv515_pipes_info", 0444, root, rdev, &rv515_debugfs_pipes_info_fops); @@ -636,7 +636,7 @@ int rv515_init(struct radeon_device *rdev) if (radeon_boot_test_post_card(rdev) == false) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 9ce12fa3c356..7d4b0bf59109 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1935,7 +1935,7 @@ int rv770_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 15759c8ca5b7..6c95575ce109 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6277,7 +6277,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -6839,7 +6839,7 @@ int si_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); From a9ed2f052c5c14e4be58c5ec8794dffc87588123 Mon Sep 17 00:00:00 2001 From: Wu Hoi Pok Date: Sun, 30 Jun 2024 12:59:22 -0400 Subject: [PATCH 0101/1523] drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc "drm_dev_alloc" is deprecated, in order to use the newer "devm_drm_dev_alloc", the "drm_device" is stored inside "radeon_device", by changing "rdev_to_drm(rdev)" other functions still gain access to the member "drm_device". Also, "devm_drm_dev_alloc" is now allocating "radeon_device", allocation inside "radeon_driver_load_kms" has to be removed. In "radeon_device_init", it originally assigned "rdev->dev" etc. However it is already done right after "devm_drm_dev_alloc" as you can see down below. It is better remove them. Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Signed-off-by: Wu Hoi Pok Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 4 ++-- drivers/gpu/drm/radeon/radeon_device.c | 3 --- drivers/gpu/drm/radeon/radeon_drv.c | 12 +++++++++--- drivers/gpu/drm/radeon/radeon_kms.c | 8 +------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ae35c102a487..fd8a4513025f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2297,7 +2297,7 @@ typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); struct radeon_device { struct device *dev; - struct drm_device *ddev; + struct drm_device ddev; struct pci_dev *pdev; #ifdef __alpha__ struct pci_controller *hose; @@ -2478,7 +2478,7 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) { - return rdev->ddev; + return &rdev->ddev; } /* diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 32851632643d..554b236c2328 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1285,9 +1285,6 @@ int radeon_device_init(struct radeon_device *rdev, bool runtime = false; rdev->shutdown = false; - rdev->dev = &pdev->dev; - rdev->ddev = ddev; - rdev->pdev = pdev; rdev->flags = flags; rdev->family = flags & RADEON_FAMILY_MASK; rdev->is_atom_bios = false; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7b8aa8406751..f36aa71c57c7 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -260,6 +260,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, { unsigned long flags = 0; struct drm_device *ddev; + struct radeon_device *rdev; int ret; if (!ent) @@ -300,9 +301,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - ddev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(ddev)) - return PTR_ERR(ddev); + rdev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*rdev), ddev); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + rdev->dev = &pdev->dev; + rdev->pdev = pdev; + ddev = rdev_to_drm(rdev); + ddev->dev_private = rdev; ret = pci_enable_device(pdev); if (ret) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index a16590c6247f..645e33bf7947 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -104,15 +104,9 @@ done_free: int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct pci_dev *pdev = to_pci_dev(dev->dev); - struct radeon_device *rdev; + struct radeon_device *rdev = dev->dev_private; int r, acpi_status; - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); - if (rdev == NULL) { - return -ENOMEM; - } - dev->dev_private = (void *)rdev; - #ifdef __alpha__ rdev->hose = pdev->sysdata; #endif From 27cdf8c3cae2ad1dbfe1a32ff999c98349366862 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:06:25 +0800 Subject: [PATCH 0102/1523] drm/amdgpu: optimize umc v12 address conversion function Split into 3 parts: 1. Convert soc physical address via ras ta. 2. Expand bad pages from soc physical address. 3. Dump bad address info. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 120 ++++++++++++++++--------- 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 9dbb13adb661..54d9f0a44f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -225,26 +225,16 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, } } -static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, - struct ta_ras_query_address_input *addr_in, - uint64_t *pfns, int len) +static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, + struct ta_ras_query_address_output *addr_out, + uint64_t err_addr) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; - uint32_t pos = 0; + uint64_t soc_pa, retired_page, column; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return 0; - } - - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; + soc_pa = addr_out->pa.pa; + bank = addr_out->pa.bank; + channel_index = addr_out->pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; @@ -254,6 +244,37 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } +} + +static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + uint64_t soc_pa, retired_page, column; + uint32_t pos = 0; + + soc_pa = pa_addr; + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -263,13 +284,6 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -277,14 +291,40 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); } return pos; } +static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + uint64_t *addr, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + addr_in.addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return -EINVAL; + } + + if (dump_addr) + umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); + + *addr = addr_out.pa.pa; + + return 0; +} + static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) @@ -483,12 +523,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; - struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0; + uint64_t err_addr, hash_val = 0, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count; - int ret; + int count, ret; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -514,17 +552,17 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); + ret = umc_v12_0_convert_mca_to_addr(adev, + err_addr, MCA_IPID_2_UMC_CH(ipid), + MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + if (ret) + return ret; + memset(page_pfn, 0, sizeof(page_pfn)); - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); - addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); - addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); - addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); - - count = umc_v12_0_convert_err_addr(adev, - &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + pa_addr, + page_pfn, ARRAY_SIZE(page_pfn)); if (count <= 0) { dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); return 0; From 56631dee2932dbc203f0abd1011aa9d3d621e206 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:14:22 +0800 Subject: [PATCH 0103/1523] drm/amdgpu: optimize logging deferred error info 1. Use pa_pfn as the radix-tree key index to log deferred error info. 2. Use local array to store a row of bad pages. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 14 +--- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 87 ++++++++++++------------- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 5 ++ 4 files changed, 51 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index dcf1f3dbb5c4..f607ff620015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -476,10 +476,10 @@ struct ras_err_pages { }; struct ras_ecc_err { - u64 hash_index; uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t pa_pfn; struct ras_err_pages err_pages; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2f84bdb8c594..096e867a6a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -519,18 +519,10 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, ecc_log = &con->umc_ecc_log; mutex_lock(&ecc_log->lock); - ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); - if (!ret) { - struct ras_err_pages *err_pages = &ecc_err->err_pages; - int i; - - /* Reserve memory */ - for (i = 0; i < err_pages->count; i++) - amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); - + ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err); + if (!ret) radix_tree_tag_set(ecc_tree, - ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); - } + ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); mutex_unlock(&ecc_log->lock); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 54d9f0a44f18..0e6c3ce3ea8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -524,9 +524,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0, pa_addr = 0; + uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count, ret; + int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -559,6 +559,32 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (ret) return ret; + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + + /* If converted pa_pfn is 0, use pa C4 pfn. */ + if (!ecc_err->pa_pfn) + ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_queried_count++; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err); + return ret; + } + + con->umc_ecc_log.de_queried_count++; + memset(page_pfn, 0, sizeof(page_pfn)); count = umc_v12_0_lookup_bad_pages_in_a_row(adev, pa_addr, @@ -568,44 +594,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, return 0; } - ret = amdgpu_umc_build_pages_hash(adev, - page_pfn, count, &hash_val); - if (ret) { - dev_err(adev->dev, "Fail to build error pages hash\n"); - return ret; - } - - ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); - if (!ecc_err) - return -ENOMEM; - - ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); - if (!ecc_err->err_pages.pfn) { - kfree(ecc_err); - return -ENOMEM; - } - - memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); - ecc_err->err_pages.count = count; - - ecc_err->hash_index = hash_val; - ecc_err->status = status; - ecc_err->ipid = ipid; - ecc_err->addr = addr; - - ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); - if (ret) { - if (ret == -EEXIST) - con->umc_ecc_log.de_queried_count++; - else - dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); - - kfree(ecc_err->err_pages.pfn); - kfree(ecc_err); - return ret; - } - - con->umc_ecc_log.de_queried_count++; + /* Reserve memory */ + for (i = 0; i < count; i++) + amdgpu_ras_reserve_page(adev, page_pfn[i]); /* The problem case is as follows: * 1. GPU A triggers a gpu ras reset, and GPU A drives @@ -631,16 +622,21 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, struct ras_ecc_err *ecc_err, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t i = 0; - int ret = 0; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + int ret, i, count; if (!err_data || !ecc_err) return -EINVAL; - for (i = 0; i < ecc_err->err_pages.count; i++) { + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, + page_pfn, ARRAY_SIZE(page_pfn)); + + for (i = 0; i < count; i++) { ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, - ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, MCA_IPID_2_UMC_CH(ecc_err->ipid), MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) @@ -674,7 +670,8 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); break; } - radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + radix_tree_tag_clear(ecc_tree, + entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); } mutex_unlock(&con->umc_ecc_log.lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b4974793850b..be5598d76c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -81,6 +81,11 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) +#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ + ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ + (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ + (0x1ULL << UMC_V12_0_PA_R13_BIT))) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); From a7e8467fbeee654e390aad1736291d273b407a2c Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 11 Jul 2024 16:27:08 +0800 Subject: [PATCH 0104/1523] drm/amdgpu: Remove unused code Remove unused code. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 86 ------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 5 -- 4 files changed, 124 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d0307c55da50..0fb2d9285834 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2881,9 +2881,6 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { mutex_init(&ecc_log->lock); - /* Set any value as siphash key */ - memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); - INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; ecc_log->prev_de_queried_count = 0; @@ -4611,8 +4608,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; @@ -4622,18 +4617,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) -{ - /* This function will be retired. */ - return; -} - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) -{ - list_del(&mca_err_addr->node); - kfree(mca_err_addr); -} - int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count) @@ -4650,9 +4633,6 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->ue_count += count; err_data->ue_count += count; @@ -4697,9 +4677,6 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->de_count += count; err_data->de_count += count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f607ff620015..7ddd13d5c06b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,7 +28,6 @@ #include #include #include -#include #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -485,7 +484,6 @@ struct ras_ecc_err { struct ras_ecc_log_info { struct mutex lock; - siphash_key_t ecc_key; struct radix_tree_root de_page_tree; uint64_t de_queried_count; uint64_t prev_de_queried_count; @@ -573,7 +571,6 @@ struct ras_fs_data { }; struct ras_err_addr { - struct list_head node; uint64_t err_status; uint64_t err_ipid; uint64_t err_addr; @@ -584,7 +581,6 @@ struct ras_err_info { u64 ce_count; u64 ue_count; u64 de_count; - struct list_head err_addr_list; }; struct ras_err_node { @@ -957,12 +953,6 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, struct aca_handle *handle, char *buf, void *data); -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *err_addr); - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *mca_err_addr); - void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 096e867a6a6d..2ed55f3c5fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -204,55 +204,6 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms) -{ - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - uint32_t timeout = timeout_ms; - - memset(&err_data, 0, sizeof(err_data)); - amdgpu_ras_error_data_init(&err_data); - - do { - - amdgpu_umc_handle_bad_pages(adev, &err_data); - - if (timeout && !err_data.de_count) { - msleep(1); - timeout--; - } - - } while (timeout && !err_data.de_count); - - if (!timeout) - dev_warn(adev->dev, "Can't find bad pages\n"); - - if (err_data.de_count) - dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); - - if (obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - obj->err_data.de_count += err_data.de_count; - } - - amdgpu_ras_error_data_fini(&err_data); - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - - if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { - con->gpu_reset_flags |= reset; - amdgpu_ras_reset_gpu(adev); - } - - return 0; -} - int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) @@ -472,43 +423,6 @@ int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_umc_uint64_cmp(const void *a, const void *b) -{ - uint64_t *addr_a = (uint64_t *)a; - uint64_t *addr_b = (uint64_t *)b; - - if (*addr_a > *addr_b) - return 1; - else if (*addr_a < *addr_b) - return -1; - else - return 0; -} - -/* Use string hash to avoid logging the same bad pages repeatedly */ -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; - int offset = 0, i = 0; - uint64_t hash_val; - - if (!pfns || !len) - return -EINVAL; - - sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); - - for (i = 0; i < len; i++) - offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); - - hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); - - *val = hash_val; - - return 0; -} - int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 5f50c69c3cec..ce4179db2a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -127,13 +127,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms); - int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val); int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); From 7a38efeee6b59d0984ff0470d234a06fe6a7cf3c Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 21:13:29 +0800 Subject: [PATCH 0105/1523] drm/radeon: fix null pointer dereference in radeon_add_common_modes In radeon_add_common_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a possible NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: d50ba256b5f1 ("drm/kms: start adding command line interface using fb.") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_connectors.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 69693ba5949e..880edabfc9e3 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -505,6 +505,9 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); } } From ca82ee4e9fc1443f9ceec615918b299766432eec Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 18 Jul 2024 09:30:01 +0530 Subject: [PATCH 0106/1523] drm/amd/display: Add 'pstate_keepout' kdoc entry in 'optc1_program_timing' Fixes the below with gcc W=1: Function parameter or struct member 'pstate_keepout' not described in 'optc1_program_timing' Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index f00d27b7c6fe..097d06023e64 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -148,6 +148,7 @@ void optc1_setup_vertical_interrupt2( * @vstartup_start: Vstartup period. * @vupdate_offset: Vupdate starting position. * @vupdate_width: Vupdate duration. + * @pstate_keepout: determines low power mode timing during refresh * @signal: DC signal types. * @use_vbios: to program timings from BIOS command table. * From f2ac52634963fc38e4935e11077b6f7854e5d700 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Jul 2024 17:54:11 -0400 Subject: [PATCH 0107/1523] drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell We seem to have a case where SDMA will sometimes miss a doorbell if GFX is entering the powergating state when the doorbell comes in. To workaround this, we can update the wptr via MMIO, however, this is only safe because we disallow gfxoff in begin_ring() for SDMA 5.2 and then allow it again in end_ring(). Enable this workaround while we are root causing the issue with the HW team. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/3440 Tested-by: Friedrich Vock Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 66bb85955fa4..93890f83e270 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -225,6 +225,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -1707,6 +1715,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) * but it shouldn't hurt for other parts since * this GFXOFF will be disallowed anyway when SDMA is * active, this just makes it explicit. + * sdma_v5_2_ring_set_wptr() takes advantage of this + * to update the wptr because sometimes SDMA seems to miss + * doorbells when entering PG. If you remove this, update + * sdma_v5_2_ring_set_wptr() as well! */ amdgpu_gfx_off_ctrl(adev, false); } From 4df9e2200fb8ae7199e46eaffeb9430bc0ba3ea7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 12:01:36 +0530 Subject: [PATCH 0108/1523] drm/amdgpu: Add sdma_v7_0 ip dump for devcoredump Add ip dump for sdma_v7_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 91 ++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..327b5387949a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -51,6 +51,64 @@ MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1217,6 +1275,8 @@ static int sdma_v7_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1247,6 +1307,13 @@ static int sdma_v7_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1263,6 +1330,8 @@ static int sdma_v7_0_sw_fini(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) sdma_v12_0_free_ucode_buffer(adev); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1466,6 +1535,27 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v7_0_get_reg_offset(adev, i, + sdma_reg_list_7_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .name = "sdma_v7_0", .early_init = sdma_v7_0_early_init, @@ -1483,6 +1573,7 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_clockgating_state = sdma_v7_0_set_clockgating_state, .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, + .dump_ip_state = sdma_v7_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { From 666f14cab21b17ccc1bdfe1e82458aa429b3b7e0 Mon Sep 17 00:00:00 2001 From: David Belanger Date: Mon, 10 Jun 2024 16:38:55 -0400 Subject: [PATCH 0109/1523] drm/amdgpu: Fix atomics on GFX12 If PCIe supports atomics, configure register to prevent DF from breaking atomics in separate load/store operations. Signed-off-by: David Belanger Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +++ drivers/gpu/drm/amd/amdgpu/df_v4_15.c | 45 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/df_v4_15.h | 30 +++++++++++++ drivers/gpu/drm/amd/amdgpu/soc24.c | 4 ++ .../amd/include/asic_reg/df/df_4_15_offset.h | 28 ++++++++++++ .../amd/include/asic_reg/df/df_4_15_sh_mask.h | 28 ++++++++++++ 8 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.c create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9dd8294032ef..38408e4e158e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -106,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1..eb605e79ae0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b241f61fe9c9..ac108fca64fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = &df_v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = &df_v4_15_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index 000000000000..2a573e33908b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); + } +} + +const struct amdgpu_df_funcs df_v4_15_funcs = { + .hw_init = df_v4_15_hw_init +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h new file mode 100644 index 000000000000..dddf2422112a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_15_H__ +#define __DF_V4_15_H__ + +extern const struct amdgpu_df_funcs df_v4_15_funcs; + +#endif /* __DF_V4_15_H__ */ + diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index d27fb4ea6612..7d641d0dadba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -484,6 +484,10 @@ static int soc24_common_hw_init(void *handle) */ if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); + + if (adev->df.funcs->hw_init) + adev->df.funcs->hw_init(adev); + /* enable the doorbell aperture */ soc24_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h new file mode 100644 index 000000000000..c2b009752f60 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_OFFSET_HEADER +#define _df_4_15_OFFSET_HEADER + +#define regNCSConfigurationRegister1 0x0901 +#define regNCSConfigurationRegister1_BASE_IDX 4 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h new file mode 100644 index 000000000000..9868a9c32795 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_SH_MASK_HEADER +#define _df_4_15_SH_MASK_HEADER + +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT 0x3 +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing_MASK 0x0003FFF8L + +#endif From 93381e6b61804b777f60357d96d6254eb10b9b56 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 22:11:41 +0800 Subject: [PATCH 0110/1523] drm/amdgpu: fix a possible null pointer dereference In amdgpu_connector_add_common_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cae7479c3ecf..bd0fbdc5f55d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -442,6 +442,9 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + return; + drm_mode_probed_add(connector, mode); } } From 6472de66c0aa18d50a4b5ca85f8272e88a737676 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 18 Jul 2024 22:17:35 +0800 Subject: [PATCH 0111/1523] drm/amd/amdgpu: Fix uninitialized variable warnings Return 0 to avoid returning an uninitialized variable r. Cc: stable@vger.kernel.org Fixes: 230dd6bb6117 ("drm/amd/amdgpu: implement mode2 reset on smu_v13_0_10") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 04c797d54511..0af648931df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int From abf839f5ebd98134f51764f2a2841faa6cbf268a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 12:03:33 +0530 Subject: [PATCH 0112/1523] drm/amdgpu: add print support for sdma_v_7_0 ip_dump Add print support for ip dump for sdma_v_7_0 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 327b5387949a..62ef4a737a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1535,6 +1535,27 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v7_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1574,6 +1595,7 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, .dump_ip_state = sdma_v7_0_dump_ip_state, + .print_ip_state = sdma_v7_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { From 80237bfc031cd74cb8abf0d21094207284d56a48 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 17:10:33 +0530 Subject: [PATCH 0113/1523] drm/amdgpu: Add sdma_v4_0 ip dump for devcoredump Add ip dump for sdma_v4_0 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 772604feb6ac..f39d3d94ba9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL) +}; + #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -1750,6 +1797,8 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1870,6 +1919,13 @@ static int sdma_v4_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1890,6 +1946,8 @@ static int sdma_v4_0_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -2292,6 +2350,27 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_0_get_reg_offset(adev, i, + sdma_reg_list_4_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, @@ -2308,6 +2387,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_clockgating_state = sdma_v4_0_set_clockgating_state, .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, + .dump_ip_state = sdma_v4_0_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { From fec5f8e8c6bcf83ed7a392801d7b44c5ecfc1e82 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 2 Jul 2024 11:54:30 +0200 Subject: [PATCH 0114/1523] drm/amdgpu: disallow multiple BO_HANDLES chunks in one submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this commit, only submits with both a BO_HANDLES chunk and a 'bo_list_handle' would be rejected (by amdgpu_cs_parser_bos). But if UMD sent multiple BO_HANDLES, what would happen is: * only the last one would be really used * all the others would leak memory as amdgpu_cs_p1_bo_handles would overwrite the previous p->bo_list value This commit rejects submissions with multiple BO_HANDLES chunks to match the implementation of the parser. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9..cde2f4548a62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -263,6 +263,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; + /* Only a single BO list is allowed to simplify handling. */ + if (p->bo_list) + ret = -EINVAL; + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; From 585e3fdb36f59c5cfed0ae06c852dc1df22b1d60 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 11 Dec 2023 10:45:38 +0530 Subject: [PATCH 0115/1523] drm/amdgpu: Add empty HDP flush function to JPEG v4.0.3 JPEG v4.0.3 doesn't support HDP flush when RRMT is enabled. Instead, mmsch fw will do the flush. This change is necessary for JPEG v4.0.3, no need for backward compatibility Signed-off-by: Lijo Lazar Signed-off-by: Jane Jian Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 04d8966423de..30a143ab592d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -621,6 +621,13 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } +static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through JPEG ring. + */ +} + /** * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer * @@ -1072,6 +1079,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_jpeg_dec_ring_test_ring, .test_ib = amdgpu_jpeg_dec_ring_test_ib, .insert_nop = jpeg_v4_0_3_dec_ring_nop, From 49cfaebe48e97500a68d5322a8194736b0a2c3cf Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 11 Dec 2023 11:18:42 +0530 Subject: [PATCH 0116/1523] drm/amdgpu: Add empty HDP flush function to VCN v4.0.3 VCN 4.0.3 does not HDP flush with RRMT enabled. Instead, mmsch will do the HDP flush. This change is necessary for VCN v4.0.3, no need for backward compatibility Signed-off-by: Lijo Lazar Signed-off-by: Jane Jian Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f53054e39ebb..101b120f6fbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1375,6 +1375,13 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through VCN ring. + */ +} + /** * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer * @@ -1415,6 +1422,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, From caaf576292f8ccef5cdc0ac16e77b87dbf6e17ab Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Mon, 15 Jul 2024 18:48:31 +0800 Subject: [PATCH 0117/1523] drm/amdgpu/vcn: Use offsets local to VCN/JPEG in VF For VCN/JPEG 4.0.3, use only the local addressing scheme. - Mask bit higher than AID0 range v2 remain the case for mmhub use master XCC Signed-off-by: Jane Jian Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++-- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 46 ++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 30a143ab592d..ad524ddc9760 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -32,6 +32,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 101b120f6fbd..9bae95538b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -1375,6 +1378,43 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { /* VCN engine access for HDP flush doesn't work when RRMT is enabled. @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; From f9e292cbba21e79abea7315b41a52c36ea2b6980 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 11:17:59 -0400 Subject: [PATCH 0118/1523] drm/amdkfd: kfd_bo_mapped_dev support partition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change amdgpu_amdkfd_bo_mapped_to_dev to use drm_priv as parameter instead of adev, to support spatial partition. This is only used by CRIU checkpoint restore now. No functional change. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e7bb1ca35801..66b1c72c81e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -345,7 +345,7 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad pasid_notify pasid_fn, void *data, uint32_t reset); bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 11672bfe4fad..199e387d35f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -3200,12 +3200,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem) { + struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv); struct kfd_mem_attachment *entry; list_for_each_entry(entry, &mem->attachments, list) { - if (entry->is_mapped && entry->adev == adev) + if (entry->is_mapped && entry->bo_va->base.vm == vm) return true; } return false; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 32e5db509560..1d9b21628be7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1963,7 +1963,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); for (i = 0; i < p->n_pdds; i++) { - if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem)) + if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } From c86ad39140bbcb9dc75a10046c2221f657e8083b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sun, 14 Jul 2024 11:11:05 -0400 Subject: [PATCH 0119/1523] drm/amdkfd: amdkfd_free_gtt_mem clear the correct pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass pointer reference to amdgpu_bo_unref to clear the correct pointer, otherwise amdgpu_bo_unref clear the local variable, the original pointer not set to NULL, this could cause use-after-free bug. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 03205e3c3746..c272461d70a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -364,15 +364,15 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { - struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(bo, true); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unpin(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&(bo)); + amdgpu_bo_reserve(*bo, true); + amdgpu_bo_kunmap(*bo); + amdgpu_bo_unpin(*bo); + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); } int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 66b1c72c81e5..6e591280774b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -235,7 +235,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1d9b21628be7..823f245dc7d0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -423,7 +423,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&wptr_bo); err_wptr_map_gart: err_bind_process: err_pdd: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f4d20adaa068..6619028dd58b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -907,7 +907,7 @@ node_alloc_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); alloc_gtt_mem_failure: dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -925,7 +925,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); } kfree(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4f48507418d2..420444eb8e98 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2621,7 +2621,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 50a81da43ce1..d9ae854b6908 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 17e42161b015..9e29b92eb523 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1048,7 +1048,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, - pdd->proc_ctx_bo); + &pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 21f5a1fb3bf8..36f0460cbffe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -204,9 +204,9 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, } if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); } } From a11b36ba9c1ac494c6a5cf7f1a5e68c1ce4dbe18 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 17:12:56 +0530 Subject: [PATCH 0120/1523] drm/amdgpu: add print support for sdma_v_4_0 ip_dump Add print support for ip dump for sdma_v_4_0 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f39d3d94ba9b..23ef4eb36b40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2350,6 +2350,27 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v4_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2388,6 +2409,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, .dump_ip_state = sdma_v4_0_dump_ip_state, + .print_ip_state = sdma_v4_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { From db54a725d57985c869f6fe4153a36cd229ab0b73 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 18:40:47 +0530 Subject: [PATCH 0121/1523] drm/amdgpu: Add sdma_v4_4_2 ip dump for devcoredump Add ip dump for sdma_v4_4_2 for devcoredump for all instances of sdma. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 2c55bfd935bb..67e0e894579a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -46,6 +46,53 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL) +}; + #define mmSMNAID_AID0_MCA_SMU 0x03b30400 #define WREG32_SDMA(instance, offset, value) \ @@ -1291,6 +1338,8 @@ static int sdma_v4_4_2_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 aid_id; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { @@ -1386,6 +1435,13 @@ static int sdma_v4_4_2_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1406,6 +1462,8 @@ static int sdma_v4_4_2_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1799,6 +1857,27 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_4_2_get_reg_offset(adev, i, + sdma_reg_list_4_4_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .name = "sdma_v4_4_2", .early_init = sdma_v4_4_2_early_init, @@ -1815,6 +1894,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_clockgating_state = sdma_v4_4_2_set_clockgating_state, .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, + .dump_ip_state = sdma_v4_4_2_dump_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { From fb91065851cd5f2735348c5f3eddeeca3d7c2973 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 11:53:50 -0400 Subject: [PATCH 0122/1523] drm/amdkfd: Refactor queue wptr_bo GART mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helper function kfd_queue_acquire_buffers to get queue wptr_bo reference from queue write_ptr if it is mapped to the KFD node with expected size. Add wptr_bo to structure queue_properties because structure queue is allocated after queue buffers are validated, then we can remove wptr_bo parameter from pqm_create_queue. Rename structure queue wptr_bo_gart to hold wptr_bo reference for GART mapping and umapping. Move MES wptr_bo_gart mapping to init_user_queue, the same location with queue ctx_bo GART mapping. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 56 +++--------------- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 +++-- .../amd/amdkfd/kfd_process_queue_manager.c | 45 +++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 57 +++++++++++++++++++ 7 files changed, 116 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 6e591280774b..4ed49265c764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -322,7 +322,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 199e387d35f4..0ab37e7aec26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2226,11 +2226,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count * @bo: Buffer object to be mapped + * @bo_gart: Return bo reference * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart) { int ret; @@ -2257,7 +2258,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); + *bo_gart = amdgpu_bo_ref(bo); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 823f245dc7d0..202f24ee4bd7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -247,8 +247,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; - q_properties->read_ptr = (uint32_t *) args->read_pointer_address; - q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->read_ptr = (void __user *)args->read_pointer_address; + q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; q_properties->eop_ring_buffer_size = args->eop_buffer_size; q_properties->ctx_save_restore_area_address = @@ -306,7 +306,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; - struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -342,53 +341,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } } - /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work - * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) - */ - if (dev->kfd->shared_resources.enable_mes && - ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) - >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - - wptr_vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (err) - goto err_wptr_map_gart; - - wptr_mapping = amdgpu_vm_bo_lookup_mapping( - wptr_vm, args->write_pointer_address >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - pr_err("Failed to lookup wptr bo\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - wptr_bo = wptr_mapping->bo_va->base.bo; - if (wptr_bo->tbo.base.size > PAGE_SIZE) { - pr_err("Requested GART mapping for wptr bo larger than one page\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) { - pr_err("Queue memory allocated to wrong device\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo); - if (err) { - pr_err("Failed to map wptr bo to GART\n"); - goto err_wptr_map_gart; - } + err = kfd_queue_acquire_buffers(pdd, &q_properties); + if (err) { + pr_debug("failed to acquire user queue buffers\n"); + goto err_acquire_queue_buf; } pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -422,9 +385,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: - if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&wptr_bo); -err_wptr_map_gart: + kfd_queue_release_buffers(pdd, &q_properties); +err_acquire_queue_buf: err_bind_process: err_pdd: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 420444eb8e98..fdc76c24b2e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -208,10 +208,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.mqd_addr = q->gart_mqd_addr; queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; - if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; - } + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; queue_input.is_kfd_process = 1; queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2b3ec92981e8..aba9bcd91f65 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -494,8 +494,8 @@ struct queue_properties { uint64_t queue_size; uint32_t priority; uint32_t queue_percent; - uint32_t *read_ptr; - uint32_t *write_ptr; + void __user *read_ptr; + void __user *write_ptr; void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; @@ -522,6 +522,8 @@ struct queue_properties { uint64_t tba_addr; uint64_t tma_addr; uint64_t exception_status; + + struct amdgpu_bo *wptr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ @@ -604,7 +606,7 @@ struct queue { uint64_t gang_ctx_gpu_addr; void *gang_ctx_cpu_ptr; - struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *wptr_bo_gart; }; enum KFD_MQD_TYPE { @@ -1284,6 +1286,10 @@ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size); +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); @@ -1320,7 +1326,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 36f0460cbffe..4947f28b3afb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -205,18 +205,21 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, if (dev->kfd->shared_resources.enable_mes) { amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); } } void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; + struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q) + if (pqn->q) { + pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); + } kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); @@ -231,8 +234,7 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, struct amdgpu_bo *wptr_bo, - unsigned int qid) + struct file *f, unsigned int qid) { int retval; @@ -263,12 +265,32 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - (*q)->wptr_bo = wptr_bo; + + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + retval = -EINVAL; + goto free_gang_ctx_bo; + } + + retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, + &(*q)->wptr_bo_gart); + if (retval) { + pr_err("Failed to map wptr bo to GART\n"); + goto free_gang_ctx_bo; + } + } } pr_debug("PQM After init queue"); return 0; +free_gang_ctx_bo: + amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -280,7 +302,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -351,7 +372,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -372,7 +393,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -490,6 +511,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { + retval = kfd_queue_release_buffers(pdd, &pqn->q->properties); + if (retval) + goto err_destroy_queue; + kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); @@ -971,7 +996,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0f6992b1895c..b4529ec298a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -82,3 +82,60 @@ void uninit_queue(struct queue *q) { kfree(q); } + +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *mapping; + u64 user_addr; + u64 size; + + user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!mapping) + goto out_err; + + if (user_addr != mapping->start || user_addr + size - 1 != mapping->last) { + pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", + expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, + (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); + goto out_err; + } + + *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + return 0; + +out_err: + *pbo = NULL; + return -EINVAL; +} + +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); + if (err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + return err; +} + +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + amdgpu_bo_unref(&properties->wptr_bo); + return 0; +} From 72dc6bf159467f43667bead6016965821186490b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 3 Jul 2024 14:44:15 -0400 Subject: [PATCH 0123/1523] drm/amd/display: Remove hardmax usage for dcn401 [WHY&HOW] Hardmax message will be retired for dcn4, so this removes it. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 44 ++++++++++--------- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++--- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 -- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index c453c5f15ce7..cce425dd62d2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -931,12 +931,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned static unsigned int dcn401_build_update_bandwidth_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool enter_display_off = false; bool update_active_fclk = false; @@ -1218,13 +1218,13 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( static unsigned int dcn401_build_update_display_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool force_reset = false; bool update_dispclk = false; @@ -1375,6 +1375,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1383,6 +1384,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build display related clocks update sequence */ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1474,33 +1476,34 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + const struct dc *dc = clk_mgr->base.ctx->dc; + struct dc_state *context = dc->current_state; + struct dc_clocks new_clocks; + int num_steps; if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) return; + /* build clock update */ + memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks)); + if (current_mode) { - if (clk_mgr_base->clks.p_state_change_support) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - else - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz; + new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz; + new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; } else { - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000; + new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz; + new_clocks.p_state_change_support = true; } -} -/* Set max memclk to highest DPM value */ -static void dcn401_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, + context, + &new_clocks, + true); - if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - return; - - dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + /* execute sequence */ + dcn401_execute_block_sequence(clk_mgr_base, num_steps); } /* Get current memclk states, update bounding box */ @@ -1631,7 +1634,6 @@ static struct clk_mgr_funcs dcn401_funcs = { .init_clocks = dcn401_init_clocks, .notify_wm_ranges = dcn401_notify_wm_ranges, .set_hard_min_memclk = dcn401_set_hard_min_memclk, - .set_hard_max_memclk = dcn401_set_hard_max_memclk, .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu, .are_clock_states_equal = dcn401_are_clock_states_equal, .enable_pme_wa = dcn401_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f07b13ad4ead..b71c4d8e73dd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5462,9 +5462,10 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl hubp->funcs->set_blank_regs(hubp, true); } } - - dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); - dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_max_memclk) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_min_memclk) + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; @@ -5513,7 +5514,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); // else: No-Op } else { @@ -5523,7 +5524,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) } } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); // else: No-Op } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index f4c1547a368f..779960278a5c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -416,9 +416,6 @@ void dcn401_init_hw(struct dc *dc) if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) - dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); - if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, false, false); From 295d91cbc700651782a60572f83c24861607b648 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Mon, 8 Jul 2024 19:29:49 -0400 Subject: [PATCH 0124/1523] drm/amd/display: Check for NULL pointer [why & how] Need to make sure plane_state is initialized before accessing its members. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Xi (Alex) Liu Signed-off-by: Sung Joon Kim Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 067f6555cfdf..ccbb15f1638c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -143,7 +143,8 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - pipe_ctx->plane_state->status.is_flip_pending = false; + if (pipe_ctx->plane_state) + pipe_ctx->plane_state->status.is_flip_pending = false; break; } From 3f7477bfbb906ec1cd9ad681475a04a142345eae Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 9 Jul 2024 15:56:36 -0400 Subject: [PATCH 0125/1523] drm/amd/display: Add private data type for RCG [why & how] Add private data types for better RCG control Reviewed-by: Chris Park Reviewed-by: Yihan Zhu Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 68cd3258f4a9..64b25e5d9d7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,6 +41,87 @@ #define DC_LOGGER \ dccg->ctx->logger +enum physymclk_fe_source { + PHYSYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + PHYSYMCLK_FE_SYMCLK_B, + PHYSYMCLK_FE_SYMCLK_C, + PHYSYMCLK_FE_SYMCLK_D, + PHYSYMCLK_FE_SYMCLK_E, + PHYSYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum physymclk_source { + PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dtbclk_source { + DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dppclk_clock_source { + DPP_REFCLK = 0, // refclk is selected + DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK +}; + +enum dp_stream_clk_source { + DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P# + DP_STREAM_DTBCLK_P1, + DP_STREAM_DTBCLK_P2, + DP_STREAM_DTBCLK_P3, + DP_STREAM_DTBCLK_P4, + DP_STREAM_DTBCLK_P5, + DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_char_clk { + HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK + HDMI_CHAR_PHYBD18CLK, + HDMI_CHAR_PHYCD18CLK, + HDMI_CHAR_PHYDD18CLK, + HDMI_CHAR_PHYED18CLK, + HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_stream_clk_source { + HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P# + HDMI_STREAM_DTBCLK_P1, + HDMI_STREAM_DTBCLK_P2, + HDMI_STREAM_DTBCLK_P3, + HDMI_STREAM_DTBCLK_P4, + HDMI_STREAM_DTBCLK_P5, + HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_se_clk_source { + SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_SE_PHYBD32CLK, + SYMCLK32_SE_PHYCD32CLK, + SYMCLK32_SE_PHYDD32CLK, + SYMCLK32_SE_PHYED32CLK, + SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_le_clk_source { + SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_LE_PHYBD32CLK, + SYMCLK32_LE_PHYCD32CLK, + SYMCLK32_LE_PHYDD32CLK, + SYMCLK32_LE_PHYED32CLK, + SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dsc_clk_source { + DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK + DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock +}; + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); From 6fa4bf3dce0668a96faca0024e382f4489a9cc9b Mon Sep 17 00:00:00 2001 From: Revalla Hari Krishna Date: Mon, 8 Jul 2024 15:35:08 +0530 Subject: [PATCH 0126/1523] drm/amd/display: Refactoring HPO [Why] To refactor HPO files [How] Moved hpo related files to specific hpo folder and update Makefiles. Reviewed-by: Martin Leung Signed-off-by: Revalla Hari Krishna Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/hpo/Makefile | 15 +++++++++++++++ .../{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.c | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.h | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.c | 0 .../{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.h | 0 7 files changed, 16 insertions(+), 3 deletions(-) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_link_encoder.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => hpo}/dcn31/dcn31_hpo_dp_stream_encoder.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index ccb4b21338b9..b17277de0340 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -28,8 +28,6 @@ DCN30 := dcn30_vpg.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index e2601d0aba41..d510e4652c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -5,7 +5,7 @@ # Makefile for dcn31. DCN31 = dcn31_panel_cntl.o \ - dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ + dcn31_apg.o \ dcn31_afmt.o dcn31_vpg.o AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) diff --git a/drivers/gpu/drm/amd/display/dc/hpo/Makefile b/drivers/gpu/drm/amd/display/dc/hpo/Makefile index c248bd86b477..7f2c9ee0dff1 100644 --- a/drivers/gpu/drm/amd/display/dc/hpo/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hpo/Makefile @@ -25,6 +25,21 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN30 +############################################################################### + +AMD_DAL_HPO_DCN30 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn30/,$(HPO_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN30) +############################################################################### +# DCN31 +############################################################################### +HPO_DCN31 = dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o + +AMD_DAL_HPO_DCN31 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn31/,$(HPO_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN31) +############################################################################### # DCN32 ############################################################################### HPO_DCN32 = dcn32_hpo_dp_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h From 332315885d3ccc6d8fe99700f3c2e4c24aa65ab7 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 4 Jul 2024 11:54:34 -0600 Subject: [PATCH 0127/1523] drm/amd/display: Remove ASSERT if significance is zero in math_ceil2 In the DML math_ceil2 function, there is one ASSERT if the significance is equal to zero. However, significance might be equal to zero sometimes, and this is not an issue for a ceil function, but the current ASSERT will trigger warnings in those cases. This commit removes the ASSERT if the significance is equal to zero to avoid unnecessary noise. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Chaitanya Dhere Signed-off-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index 4822dbcc86bb..e17b5ceba447 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -63,8 +63,6 @@ double math_ceil(const double arg) double math_ceil2(const double arg, const double significance) { - ASSERT(significance != 0); - return ((int)(arg / significance + 0.99999)) * significance; } From 14d6ca0740e6237f4bca2dabee4e240b6f4be508 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 9 Jul 2024 16:50:05 -0400 Subject: [PATCH 0128/1523] drm/amd/display: Add RCG helper functions [why & how] Add standard RCG helpers based on DCCG spec Reviewed-by: Daniel Miess Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 307 ++++++++++++++++++ 1 file changed, 307 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 64b25e5d9d7a..76f069f703ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -122,6 +122,302 @@ enum dsc_clk_source { DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock }; + +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ + /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */ + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_le_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_fe_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dppclk_rcg(struct dccg *dccg, + int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dpstreamclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_smclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1123,6 +1419,17 @@ struct dccg *dccg35_create( return NULL; } + /* Temporary declaration to handle unused static functions */ + (void)&dccg35_set_dsc_clk_rcg; + (void)&dccg35_set_symclk32_se_rcg; + (void)&dccg35_set_symclk32_le_rcg; + (void)&dccg35_set_physymclk_rcg; + (void)&dccg35_set_physymclk_fe_rcg; + (void)&dccg35_set_dtbclk_p_rcg; + (void)&dccg35_set_dppclk_rcg; + (void)&dccg35_set_dpstreamclk_rcg; + (void)&dccg35_set_smclk32_se_rcg; + base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 0cf80506918da0d2cd9d31e3b07831f94a2c5cc6 Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Wed, 10 Jul 2024 17:03:32 -0400 Subject: [PATCH 0129/1523] drm/amd/display: Fix visual confirm bug for SubVP [Why] Visual confirm was incorrect on dual monitor SubVP setup [How] Adjusted p_state assignment for dual monitor SubVP setup Signed-off-by: Ryan Seto Reviewed-by: Chaitanya Dhere Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 13 ++++++++--- .../dc/dml2/dml21/dml21_translation_helper.h | 2 +- .../amd/display/dc/dml2/dml21/dml21_utils.c | 22 +++++++++++++++++-- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 1fce61323201..9fdb209bcab2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1183,7 +1183,8 @@ void dml21_get_pipe_mcache_config( void dml21_set_dc_p_state_type( struct pipe_ctx *pipe_ctx, - struct dml2_per_stream_programming *stream_programming) + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) { switch (stream_programming->uclk_pstate_method) { case dml2_uclk_pstate_support_method_vactive: @@ -1192,14 +1193,20 @@ void dml21_set_dc_p_state_type( break; case dml2_uclk_pstate_support_method_vblank: case dml2_uclk_pstate_support_method_fw_vblank_drr: - pipe_ctx->p_state_type = P_STATE_V_BLANK; + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; break; case dml2_uclk_pstate_support_method_fw_subvp_phantom: case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: pipe_ctx->p_state_type = P_STATE_SUB_VP; break; case dml2_uclk_pstate_support_method_fw_drr: - pipe_ctx->p_state_type = P_STATE_FPO; + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; break; default: pipe_ctx->p_state_type = P_STATE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 97a8f51b7780..476a7f6e4875 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,5 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); -void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index e11246e525ac..ec4195336444 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -11,7 +11,6 @@ #include "dml2_core_dcn4_calcs.h" - int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) { int i; @@ -280,6 +279,23 @@ bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) dc_is_dp_signal(pipe_ctx->stream->signal)); } + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_per_stream_programming *stream_prog) { @@ -317,7 +333,9 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); - dml21_set_dc_p_state_type(pipe_ctx, stream_prog); + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, From 94beb4ac1b3bc5fbeef977960a90ee4f594b4465 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 10 Jul 2024 17:09:04 -0400 Subject: [PATCH 0130/1523] drm/amd/display: ensure EASF and ISHARP coefficients are programmed together [Why] EASF coefficients are programmed to RAM and then RAM selector is toggled. ISHARP coefficients are programmed after so they will not be in the same RAM block [How] Move ISHARP programming before EASF programming Add flag if ISHARP coefficients are updated. If so, then force EASF coefficients programming Reviewed-by: Alvin Lee Signed-off-by: Samson Tam Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 27cbda1cf8cf..703d7b51c6c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,7 +280,8 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode) + bool chroma_coef_mode, + bool force_coeffs_update) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -343,7 +344,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if (filter_updated) { + if ((filter_updated) || (force_coeffs_update)) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -955,9 +956,11 @@ static void dpp401_dscl_set_isharp_filter( * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data) + const struct scaler_data *scl_data, + bool *bs_coeffs_updated) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + *bs_coeffs_updated = false; PERF_TRACE(); /* ISHARP_MODE */ @@ -1030,12 +1033,14 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); + *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); + *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1066,6 +1071,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; + bool bs_coeffs_updated = false; if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; @@ -1125,7 +1131,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data); + dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); return; } @@ -1152,12 +1158,18 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); + /* ISharp configuration + * - B&S coeffs are written to same coeff RAM as WB scaler coeffs + * - coeff RAM toggle is in EASF programming + * - if we are only programming B&S coeffs, then need to reprogram + * WB scaler coeffs and toggle coeff RAM together + */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); + + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); - /* isharp configuration */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } From aaa21e6a33bae017fc190bd75f76baa29d259346 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Wed, 10 Jul 2024 14:15:57 -0400 Subject: [PATCH 0131/1523] drm/amd/display: Check if Mode is Supported Before Returning Result [Why] Even if the mode is not supported dml2_check_mode_supported() would still return true. This causes an unsupported mode to be programmed. [How] Check if the mode is supported or not and return the proper result. Reviewed-by: Chaitanya Dhere Signed-off-by: Austin Zheng Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 30d07cd1065f..e9b40a45ffdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -138,8 +138,9 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) } in_out->is_supported = mcache_success; + result = result && in_out->is_supported; - return true; + return result; } bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) From f52ea01925f4eeb967c09cb5dae61608a2330541 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 11 Jul 2024 10:58:51 -0400 Subject: [PATCH 0132/1523] drm/amd/display: Add source select helper functions [why & how] Add source select helpers based on DCCG spec Reviewed-by: Daniel Miess Signed-off-by: Hansen Dsouza Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 324 ++++++++++++++++++ 1 file changed, 324 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 76f069f703ef..b698b773338a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -418,6 +418,320 @@ static void dccg35_set_smclk32_se_rcg( } } +static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* DSCCLK#_EN=0 switches to refclock from functional clock */ + + switch (inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src); + break; + case 3: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_src_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src + ) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int +dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst) +{ + uint32_t en; + uint32_t src_sel; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en); + + if (en == 1 && src_sel == symclk_32_le_inst) + return 1; + + return 0; +} + + +static void dccg35_set_symclk32_le_src_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dcn35_set_dppclk_src_new(struct dccg *dccg, + int inst, enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src); + break; + case 1: + REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src); + break; + case 2: + REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src); + break; + case 3: + REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dtbclk_p_src_new( + struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock + * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL + */ + + switch (inst) { + case 0: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dpstreamclk_src_new( + struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK0_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK1_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 2: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK2_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 3: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK3_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_src_new( + struct dccg *dccg, + enum physymclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYASYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYBSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYCSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYDSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYESYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, + int symclk_fe_inst, + int symclk_be_inst) +{ + + uint32_t en = 0; + uint32_t src_sel = 0; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (symclk_fe_inst) { + case 0: + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en); + break; + case 1: + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en); + break; + case 2: + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en); + break; + case 3: + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en); + break; + case 4: + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en); + break; + } + + if (en == 1 && src_sel == symclk_be_inst) + return 1; + + return 0; +} + +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_source src, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + break; + } +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1429,6 +1743,16 @@ struct dccg *dccg35_create( (void)&dccg35_set_dppclk_rcg; (void)&dccg35_set_dpstreamclk_rcg; (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_set_dsc_clk_src_new; + (void)&dccg35_set_symclk32_se_src_new; + (void)&dccg35_is_symclk32_se_src_functional_le_new; + (void)&dccg35_set_symclk32_le_src_new; + (void)&dcn35_set_dppclk_src_new; + (void)&dccg35_set_dtbclk_p_src_new; + (void)&dccg35_set_dpstreamclk_src_new; + (void)&dccg35_set_physymclk_src_new; + (void)&dccg35_is_symclk_fe_src_functional_be; + (void)&dccg35_set_symclk_fe_src_new; base = &dccg_dcn->base; base->ctx = ctx; From f7543209ce5dc09e3f5a27a7d4ee53e226283719 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 4 Jul 2024 18:33:02 +0000 Subject: [PATCH 0133/1523] drm/amd/display: rename dcn3/dcn4 to more sound terms Use more accurate names to refer to the asic architecture. dcn3 in DML actually refers to DCN32 and DCN321, so rename it to dcn32x dcn4 refers to any DCN4x soc., and hence rename dcn4 to dcn4x Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 36 +- .../amd/display/dc/dml2/dml21/dml21_utils.c | 2 +- .../dml21/inc/bounding_boxes/dcn3_soc_bb.h | 8 +- .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 8 +- .../dml21/inc/dml_top_soc_parameter_types.h | 10 +- .../display/dc/dml2/dml21/inc/dml_top_types.h | 10 +- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 2 +- .../src/dml2_core/dml2_core_dcn4_calcs.c | 186 ++++----- .../dml21/src/dml2_core/dml2_core_shared.c | 190 ++++----- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 130 +++---- .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 364 +++++++++--------- .../gpu/drm/amd/display/dc/inc/hw/mem_input.h | 2 +- 12 files changed, 474 insertions(+), 474 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 9fdb209bcab2..ec663c7442e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1027,17 +1027,17 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) { /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ - context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.uclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.fclk_khz; - context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.uclk_khz; - context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.fclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz > 0; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) @@ -1068,16 +1068,16 @@ static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_wat switch (wm_index) { case DML2_DCHUB_WATERMARK_SET_A: - wm_regs = &watermarks->dcn4.a; + wm_regs = &watermarks->dcn4x.a; break; case DML2_DCHUB_WATERMARK_SET_B: - wm_regs = &watermarks->dcn4.b; + wm_regs = &watermarks->dcn4x.b; break; case DML2_DCHUB_WATERMARK_SET_C: - wm_regs = &watermarks->dcn4.c; + wm_regs = &watermarks->dcn4x.c; break; case DML2_DCHUB_WATERMARK_SET_D: - wm_regs = &watermarks->dcn4.d; + wm_regs = &watermarks->dcn4x.d; break; case DML2_DCHUB_WATERMARK_SET_NUM: default: @@ -1125,11 +1125,11 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ global_sync = &stream_programming->phantom_stream.global_sync; } - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; - pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4.pstate_keepout_start_lines; + pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; + pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; + pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; + pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index ec4195336444..51d491bffa32 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -326,7 +326,7 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } - pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4.dppclk_khz; + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h index 521f77b8ac44..d82c681a5402 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h @@ -72,7 +72,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -128,7 +128,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn31 = { @@ -228,7 +228,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -332,7 +332,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 5af94f06c667..0fe70bd0dbac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -52,7 +52,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -78,7 +78,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn401 = { @@ -178,7 +178,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -282,7 +282,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 99d775adc3e0..4a46b21c3e55 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -26,7 +26,7 @@ struct dml2_soc_derates { struct dml2_soc_derate_values system_idle_average; }; -struct dml2_dcn3_soc_qos_params { +struct dml2_dcn32x_soc_qos_params { struct { unsigned int base_latency_us; unsigned int base_latency_pixel_vm_us; @@ -52,7 +52,7 @@ struct dml2_dcn4_uclk_dpm_dependent_qos_params { unsigned int average_latency_when_non_urgent_uclk_cycles; }; -struct dml2_dcn4_soc_qos_params { +struct dml2_dcn4x_soc_qos_params { unsigned int df_qos_response_time_fclk_cycles; unsigned int max_round_trip_to_furthest_cs_fclk_cycles; unsigned int mall_overhead_fclk_cycles; @@ -68,7 +68,7 @@ struct dml2_dcn4_soc_qos_params { enum dml2_qos_param_type { dml2_qos_param_type_dcn3, - dml2_qos_param_type_dcn4 + dml2_qos_param_type_dcn4x }; struct dml2_soc_qos_parameters { @@ -80,8 +80,8 @@ struct dml2_soc_qos_parameters { } writeback; union { - struct dml2_dcn3_soc_qos_params dcn3; - struct dml2_dcn4_soc_qos_params dcn4; + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; } qos_params; enum dml2_qos_param_type qos_type; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index c47a07f473e5..a824ce56c54e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -228,7 +228,7 @@ struct dml2_per_plane_programming { union { struct { unsigned long dppclk_khz; - } dcn4; + } dcn4x; } min_clocks; struct dml2_mcache_surface_allocation mcache_allocation; @@ -263,7 +263,7 @@ union dml2_global_sync_programming { unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; unsigned int pstate_keepout_start_lines; - } dcn4; + } dcn4x; }; struct dml2_per_stream_programming { @@ -274,7 +274,7 @@ struct dml2_per_stream_programming { unsigned long dscclk_khz; unsigned long dtbclk_khz; unsigned long phyclk_khz; - } dcn4; + } dcn4x; } min_clocks; union dml2_global_sync_programming global_sync; @@ -375,7 +375,7 @@ struct dml2_display_cfg_programming { unsigned long dispclk_khz; unsigned long dcfclk_deepsleep_khz; unsigned long dpp_ref_khz; - } dcn3; + } dcn32x; struct { struct { unsigned long uclk_khz; @@ -404,7 +404,7 @@ struct dml2_display_cfg_programming { uint32_t dpprefclk_did; uint32_t dtbrefclk_did; } divider_ids; - } dcn4; + } dcn4x; } min_clocks; bool uclk_pstate_supported; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index f5c6cd5cf5e9..9375c6ae1147 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -551,7 +551,7 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; l->mode_programming_ex_params.programming = in_out->programming; - l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4.active.uclk_khz, + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, &core->clean_me_up.mode_lib.soc); result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 3b1e5c548435..0b671c665373 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -3183,7 +3183,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -3194,7 +3194,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -3224,7 +3224,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -3233,7 +3233,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -3263,7 +3263,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -3271,7 +3271,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -4961,7 +4961,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -6979,7 +6979,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -8079,32 +8079,32 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.excess_vactive_fill_bw_c); mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -8274,20 +8274,20 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8307,7 +8307,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8522,14 +8522,14 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -9015,13 +9015,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -9029,7 +9029,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9939,14 +9939,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9981,18 +9981,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10474,14 +10474,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, @@ -10567,32 +10567,32 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.excess_vactive_fill_bw_c); mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10601,10 +10601,10 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; @@ -12205,11 +12205,11 @@ void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) { - out->dcn4.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); - out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); - out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); - out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); - out->dcn4.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12658,7 +12658,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); @@ -12771,13 +12771,13 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 6d7701a97d3f..c54c29711a65 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -779,7 +779,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -1776,32 +1776,32 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -1995,21 +1995,21 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); double outstanding_latency_us = 0; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2029,7 +2029,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2243,14 +2243,14 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -2717,13 +2717,13 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -2731,7 +2731,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -5885,7 +5885,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -5896,7 +5896,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -5926,7 +5926,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -5935,7 +5935,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -5965,7 +5965,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -5973,7 +5973,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -7489,7 +7489,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -9835,14 +9835,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9877,18 +9877,18 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10410,14 +10410,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e } if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_out_of_order_return_per_channel_vm_only_bytes)); + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, @@ -10491,32 +10491,32 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10525,10 +10525,10 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { calculate_cursor_req_attributes( @@ -11971,14 +11971,14 @@ void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) { - // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 - // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4x.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - out->global_sync.dcn4.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; } void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) @@ -12281,7 +12281,7 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported; @@ -12394,13 +12394,13 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 0d847bccd5d2..0021bbaa4b91 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -82,9 +82,9 @@ static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dp get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -122,9 +122,9 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -146,9 +146,9 @@ static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); - in_out->programming->min_clocks.dcn4.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); - in_out->programming->min_clocks.dcn4.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); } static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) @@ -232,25 +232,25 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr { bool result; - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); return result; } @@ -262,12 +262,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -276,12 +276,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro if (result) { result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -289,9 +289,9 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro } // SVP is not supported on any coarse grained SoCs - display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; return result; } @@ -324,30 +324,30 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dispclk_khz, &state_table->dispclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.deepsleep_dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.deepsleep_dcfclk_khz, &state_table->dcfclk); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (result) - result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4.dppclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); } for (i = 0; i < display_cfg->display_config.num_streams; i++) { if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dscclk_khz, &state_table->dscclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dtbclk_khz, &state_table->dtbclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.phyclk_khz, &state_table->phyclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); } if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dpprefclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dtbrefclk_khz, &state_table->dtbclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); return result; } @@ -515,15 +515,15 @@ static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_m static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; } static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; } static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -539,14 +539,14 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore // active minimums must be boosted to prefetch minimums - if (in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4.active.uclk_khz) - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4.active.fclk_khz) - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4.active.dcfclk_khz) - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; // need some massaging for the dispclk ramping cases: dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); @@ -556,33 +556,33 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dispclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dispclk_did); + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) - in_out->programming->min_clocks.dcn4.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dpprefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dpprefclk_did); + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4.dpprefclk_khz, 1.0)); + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); } // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) - in_out->programming->min_clocks.dcn4.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dtbrefclk_did); + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); - in_out->programming->min_clocks.dcn4.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; - in_out->programming->min_clocks.dcn4.socclk_khz = mode_support_result->global.socclk_khz; + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 181041d6d177..d36f758971a8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -75,108 +75,108 @@ bool hubbub401_program_urgent_watermarks( /* Repeat for water mark set A and B */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.urgent > hubbub2->watermarks.dcn4.a.urgent) { - hubbub2->watermarks.dcn4.a.urgent = watermarks->dcn4.a.urgent; + if (safe_to_lower || watermarks->dcn4x.a.urgent > hubbub2->watermarks.dcn4x.a.urgent) { + hubbub2->watermarks.dcn4x.a.urgent = watermarks->dcn4x.a.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4.a.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4x.a.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.urgent, watermarks->dcn4.a.urgent); - } else if (watermarks->dcn4.a.urgent < hubbub2->watermarks.dcn4.a.urgent) + watermarks->dcn4x.a.urgent, watermarks->dcn4x.a.urgent); + } else if (watermarks->dcn4x.a.urgent < hubbub2->watermarks.dcn4x.a.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_flip = watermarks->dcn4.a.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip = watermarks->dcn4x.a.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4.a.frac_urg_bw_flip); - } else if (watermarks->dcn4.a.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4x.a.frac_urg_bw_flip); + } else if (watermarks->dcn4x.a.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_nom = watermarks->dcn4.a.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom = watermarks->dcn4x.a.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4.a.frac_urg_bw_nom); - } else if (watermarks->dcn4.a.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4x.a.frac_urg_bw_nom); + } else if (watermarks->dcn4x.a.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_mall = watermarks->dcn4.a.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall = watermarks->dcn4x.a.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4.a.frac_urg_bw_mall); - } else if (watermarks->dcn4.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4x.a.frac_urg_bw_mall); + } else if (watermarks->dcn4x.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem = watermarks->dcn4.a.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem = watermarks->dcn4x.a.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4.a.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4x.a.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4.a.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4.a.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.urgent > hubbub2->watermarks.dcn4.b.urgent) { - hubbub2->watermarks.dcn4.b.urgent = watermarks->dcn4.b.urgent; + if (safe_to_lower || watermarks->dcn4x.b.urgent > hubbub2->watermarks.dcn4x.b.urgent) { + hubbub2->watermarks.dcn4x.b.urgent = watermarks->dcn4x.b.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4.b.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4x.b.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.urgent, watermarks->dcn4.b.urgent); - } else if (watermarks->dcn4.b.urgent < hubbub2->watermarks.dcn4.b.urgent) + watermarks->dcn4x.b.urgent, watermarks->dcn4x.b.urgent); + } else if (watermarks->dcn4x.b.urgent < hubbub2->watermarks.dcn4x.b.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_flip = watermarks->dcn4.b.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip = watermarks->dcn4x.b.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4.b.frac_urg_bw_flip); - } else if (watermarks->dcn4.b.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4x.b.frac_urg_bw_flip); + } else if (watermarks->dcn4x.b.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_nom = watermarks->dcn4.b.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom = watermarks->dcn4x.b.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4.b.frac_urg_bw_nom); - } else if (watermarks->dcn4.b.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4x.b.frac_urg_bw_nom); + } else if (watermarks->dcn4x.b.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_mall = watermarks->dcn4.b.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall = watermarks->dcn4x.b.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4.b.frac_urg_bw_mall); - } else if (watermarks->dcn4.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4x.b.frac_urg_bw_mall); + } else if (watermarks->dcn4x.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem = watermarks->dcn4.b.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem = watermarks->dcn4x.b.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4.b.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4x.b.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4.b.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4.b.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) wm_pending = true; return wm_pending; @@ -192,89 +192,89 @@ bool hubbub401_program_stutter_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.sr_enter - > hubbub2->watermarks.dcn4.a.sr_enter) { - hubbub2->watermarks.dcn4.a.sr_enter = - watermarks->dcn4.a.sr_enter; + if (safe_to_lower || watermarks->dcn4x.a.sr_enter + > hubbub2->watermarks.dcn4x.a.sr_enter) { + hubbub2->watermarks.dcn4x.a.sr_enter = + watermarks->dcn4x.a.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4x.a.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_enter, watermarks->dcn4.a.sr_enter); + watermarks->dcn4x.a.sr_enter, watermarks->dcn4x.a.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4x.a.sr_enter); - } else if (watermarks->dcn4.a.sr_enter - < hubbub2->watermarks.dcn4.a.sr_enter) + } else if (watermarks->dcn4x.a.sr_enter + < hubbub2->watermarks.dcn4x.a.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.sr_exit - > hubbub2->watermarks.dcn4.a.sr_exit) { - hubbub2->watermarks.dcn4.a.sr_exit = - watermarks->dcn4.a.sr_exit; + if (safe_to_lower || watermarks->dcn4x.a.sr_exit + > hubbub2->watermarks.dcn4x.a.sr_exit) { + hubbub2->watermarks.dcn4x.a.sr_exit = + watermarks->dcn4x.a.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4x.a.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_exit, watermarks->dcn4.a.sr_exit); + watermarks->dcn4x.a.sr_exit, watermarks->dcn4x.a.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4x.a.sr_exit); - } else if (watermarks->dcn4.a.sr_exit - < hubbub2->watermarks.dcn4.a.sr_exit) + } else if (watermarks->dcn4x.a.sr_exit + < hubbub2->watermarks.dcn4x.a.sr_exit) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.sr_enter - > hubbub2->watermarks.dcn4.b.sr_enter) { - hubbub2->watermarks.dcn4.b.sr_enter = - watermarks->dcn4.b.sr_enter; + if (safe_to_lower || watermarks->dcn4x.b.sr_enter + > hubbub2->watermarks.dcn4x.b.sr_enter) { + hubbub2->watermarks.dcn4x.b.sr_enter = + watermarks->dcn4x.b.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4x.b.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_enter, watermarks->dcn4.b.sr_enter); + watermarks->dcn4x.b.sr_enter, watermarks->dcn4x.b.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4x.b.sr_enter); - } else if (watermarks->dcn4.b.sr_enter - < hubbub2->watermarks.dcn4.b.sr_enter) + } else if (watermarks->dcn4x.b.sr_enter + < hubbub2->watermarks.dcn4x.b.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.sr_exit - > hubbub2->watermarks.dcn4.b.sr_exit) { - hubbub2->watermarks.dcn4.b.sr_exit = - watermarks->dcn4.b.sr_exit; + if (safe_to_lower || watermarks->dcn4x.b.sr_exit + > hubbub2->watermarks.dcn4x.b.sr_exit) { + hubbub2->watermarks.dcn4x.b.sr_exit = + watermarks->dcn4x.b.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4x.b.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_exit, watermarks->dcn4.b.sr_exit); + watermarks->dcn4x.b.sr_exit, watermarks->dcn4x.b.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4x.b.sr_exit); - } else if (watermarks->dcn4.b.sr_exit - < hubbub2->watermarks.dcn4.b.sr_exit) + } else if (watermarks->dcn4x.b.sr_exit + < hubbub2->watermarks.dcn4x.b.sr_exit) wm_pending = true; return wm_pending; @@ -292,116 +292,116 @@ bool hubbub401_program_pstate_watermarks( /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.uclk_pstate - > hubbub2->watermarks.dcn4.a.uclk_pstate) { - hubbub2->watermarks.dcn4.a.uclk_pstate = - watermarks->dcn4.a.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.uclk_pstate + > hubbub2->watermarks.dcn4x.a.uclk_pstate) { + hubbub2->watermarks.dcn4x.a.uclk_pstate = + watermarks->dcn4x.a.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.uclk_pstate, watermarks->dcn4.a.uclk_pstate); - } else if (watermarks->dcn4.a.uclk_pstate - < hubbub2->watermarks.dcn4.a.uclk_pstate) + watermarks->dcn4x.a.uclk_pstate, watermarks->dcn4x.a.uclk_pstate); + } else if (watermarks->dcn4x.a.uclk_pstate + < hubbub2->watermarks.dcn4x.a.uclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.uclk_pstate - > hubbub2->watermarks.dcn4.b.uclk_pstate) { - hubbub2->watermarks.dcn4.b.uclk_pstate = - watermarks->dcn4.b.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.uclk_pstate + > hubbub2->watermarks.dcn4x.b.uclk_pstate) { + hubbub2->watermarks.dcn4x.b.uclk_pstate = + watermarks->dcn4x.b.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.uclk_pstate, watermarks->dcn4.b.uclk_pstate); - } else if (watermarks->dcn4.b.uclk_pstate - < hubbub2->watermarks.dcn4.b.uclk_pstate) + watermarks->dcn4x.b.uclk_pstate, watermarks->dcn4x.b.uclk_pstate); + } else if (watermarks->dcn4x.b.uclk_pstate + < hubbub2->watermarks.dcn4x.b.uclk_pstate) wm_pending = true; /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.fclk_pstate - > hubbub2->watermarks.dcn4.a.fclk_pstate) { - hubbub2->watermarks.dcn4.a.fclk_pstate = - watermarks->dcn4.a.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.fclk_pstate + > hubbub2->watermarks.dcn4x.a.fclk_pstate) { + hubbub2->watermarks.dcn4x.a.fclk_pstate = + watermarks->dcn4x.a.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.fclk_pstate, watermarks->dcn4.a.fclk_pstate); - } else if (watermarks->dcn4.a.fclk_pstate - < hubbub2->watermarks.dcn4.a.fclk_pstate) + watermarks->dcn4x.a.fclk_pstate, watermarks->dcn4x.a.fclk_pstate); + } else if (watermarks->dcn4x.a.fclk_pstate + < hubbub2->watermarks.dcn4x.a.fclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.fclk_pstate - > hubbub2->watermarks.dcn4.b.fclk_pstate) { - hubbub2->watermarks.dcn4.b.fclk_pstate = - watermarks->dcn4.b.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.fclk_pstate + > hubbub2->watermarks.dcn4x.b.fclk_pstate) { + hubbub2->watermarks.dcn4x.b.fclk_pstate = + watermarks->dcn4x.b.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.fclk_pstate, watermarks->dcn4.b.fclk_pstate); - } else if (watermarks->dcn4.b.fclk_pstate - < hubbub2->watermarks.dcn4.b.fclk_pstate) + watermarks->dcn4x.b.fclk_pstate, watermarks->dcn4x.b.fclk_pstate); + } else if (watermarks->dcn4x.b.fclk_pstate + < hubbub2->watermarks.dcn4x.b.fclk_pstate) wm_pending = true; /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; return wm_pending; @@ -418,29 +418,29 @@ bool hubbub401_program_usr_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.usr - > hubbub2->watermarks.dcn4.a.usr) { - hubbub2->watermarks.dcn4.a.usr = watermarks->dcn4.a.usr; + if (safe_to_lower || watermarks->dcn4x.a.usr + > hubbub2->watermarks.dcn4x.a.usr) { + hubbub2->watermarks.dcn4x.a.usr = watermarks->dcn4x.a.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4.a.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4x.a.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.usr, watermarks->dcn4.a.usr); - } else if (watermarks->dcn4.a.usr - < hubbub2->watermarks.dcn4.a.usr) + watermarks->dcn4x.a.usr, watermarks->dcn4x.a.usr); + } else if (watermarks->dcn4x.a.usr + < hubbub2->watermarks.dcn4x.a.usr) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.usr - > hubbub2->watermarks.dcn4.b.usr) { - hubbub2->watermarks.dcn4.b.usr = watermarks->dcn4.b.usr; + if (safe_to_lower || watermarks->dcn4x.b.usr + > hubbub2->watermarks.dcn4x.b.usr) { + hubbub2->watermarks.dcn4x.b.usr = watermarks->dcn4x.b.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4.b.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4x.b.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.usr, watermarks->dcn4.b.usr); - } else if (watermarks->dcn4.b.usr - < hubbub2->watermarks.dcn4.b.usr) + watermarks->dcn4x.b.usr, watermarks->dcn4x.b.usr); + } else if (watermarks->dcn4x.b.usr + < hubbub2->watermarks.dcn4x.b.usr) wm_pending = true; return wm_pending; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 5f6c7daa14d9..a8b44f398ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -63,7 +63,7 @@ union dcn_watermark_set { struct dml2_dchub_watermark_regs b; struct dml2_dchub_watermark_regs c; struct dml2_dchub_watermark_regs d; - } dcn4; //dcn4+ + } dcn4x; //dcn4+ }; struct dce_watermarks { From fa53b23ff7c98930c7e529b8f5228d9f182ae2bc Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 4 Jul 2024 18:41:58 +0000 Subject: [PATCH 0134/1523] drm/amd/display: rename dcn401_soc to dcn4_variant_a_soc To distinguish between different soc with same DCN IP, use variants starting with alphabets Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- .../amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index ec663c7442e5..4164cda90b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -31,7 +31,7 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ else soc_bb = &dml2_socbb_dcn401; - qos_params = &dml_dcn401_soc_qos_params; + qos_params = &dml_dcn4_variant_a_soc_qos_params; } /* patch soc bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 0fe70bd0dbac..898b1dd69edd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -8,7 +8,7 @@ #include "dml_top_soc_parameter_types.h" -static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { .derate_table = { .system_active_urgent = { .dram_derate_percent_pixel = 22, From a90e1dc25c064bf21353cb342aac938662a148e4 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 9 Jul 2024 13:11:55 -0400 Subject: [PATCH 0135/1523] drm/amd/display: Add helper function to check for non-address fast updates [Why/How] Need to identify which fast updates will update more than just the address. Reviewed-by: Alvin Lee Signed-off-by: Ilya Bakoulin Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 25 +++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 6 ++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b71c4d8e73dd..b8a6c062426d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4697,7 +4697,7 @@ static bool commit_minimal_transition_state(struct dc *dc, return true; } -static void populate_fast_updates(struct dc_fast_update *fast_update, +void populate_fast_updates(struct dc_fast_update *fast_update, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update) @@ -4707,6 +4707,9 @@ static void populate_fast_updates(struct dc_fast_update *fast_update, if (stream_update) { fast_update[0].out_transfer_func = stream_update->out_transfer_func; fast_update[0].output_csc_transform = stream_update->output_csc_transform; + } else { + fast_update[0].out_transfer_func = NULL; + fast_update[0].output_csc_transform = NULL; } for (i = 0; i < surface_count; i++) { @@ -4740,6 +4743,26 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c return false; } +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count) +{ + int i; + + if (fast_update[0].out_transfer_func || + fast_update[0].output_csc_transform) + return true; + + for (i = 0; i < surface_count; i++) { + if (fast_update[i].input_csc_color_matrix || + fast_update[i].gamma || + fast_update[i].gamut_remap_matrix || + fast_update[i].coeff_reduction_factor || + fast_update[i].cursor_csc_color_matrix) + return true; + } + + return false; +} + static bool full_update_required(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 036b23a6e324..272ae1bdc57f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1587,6 +1587,12 @@ bool dc_acquire_release_mpc_3dlut( bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); void get_audio_check(struct audio_info *aud_modes, struct audio_check *aud_chk); + +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count); +void populate_fast_updates(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update); /* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. From ec0d7abbb0d464619d6d1646f03603c6616d966e Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 11 Jul 2024 14:56:29 -0400 Subject: [PATCH 0136/1523] drm/amd/display: Fix Potential Null Dereference [what & why] System hang after s4 regression points to code change here. Removing possible NULL dereference. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Gabe Teeger Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 248d22b23a6d..2d5bd5c7ab94 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -139,9 +139,9 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * old_pipe->stream != new_pipe->stream && old_pipe->stream_res.tg == new_pipe->stream_res.tg && new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && - new_pipe->stream->link->link_enc->funcs->is_dig_enabled && - new_pipe->stream->link->link_enc->funcs->is_dig_enabled( - new_pipe->stream->link->link_enc) && + new_pipe->stream->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link_enc) && new_pipe->stream_res.stream_enc && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); From fae1879dc229249c17e5fd4ef81bf33684ee58f0 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 11 Jul 2024 11:24:07 -0400 Subject: [PATCH 0137/1523] drm/amd/display: Check top sink only when multiple streams for DP2 [why] When switching from extended to second display only mode, the top remote sink is not removed while the top stream itself is released. This causes DML to think there is no DP2 output encoder because top remote sink does not match with the second stream and disables DTBCLK and causes hang. [how] For DP2.0 MST hubs, only treat 1st remote sink as an encoder only when there are multiple displays connected. Reviewed-by: Michael Strauss Signed-off-by: Sung Joon Kim Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_internal_types.h | 1 + .../display/dc/dml2/dml2_translation_helper.c | 41 +++++++++++++++++-- .../display/dc/dml2/dml2_translation_helper.h | 2 +- .../gpu/drm/amd/display/dc/dml2/dml2_utils.c | 6 +-- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index b566f53608c6..3ba184be25d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,6 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; + unsigned int dp2_mst_stream_count; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b9dcee77266..25d4ef040173 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,7 +733,8 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, + unsigned int dp2_mst_stream_count) { unsigned int output_bpc; @@ -746,7 +747,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe)) + if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) out->OutputEncoder[location] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -1193,6 +1194,37 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, plane_index = 0; } } + +static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) +{ + int i, j; + unsigned int dp2_mst_stream_count = 0; + + for (i = 0; i < context->stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + + if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) + continue; + + for (j = 0; j < MAX_PIPES; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (!pipe_ctx || !pipe_ctx->stream) + continue; + + if (stream != pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { + dp2_mst_stream_count++; + break; + } + } + } + + return dp2_mst_stream_count; +} + static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1255,6 +1287,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { @@ -1276,7 +1309,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1337,7 +1370,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index d764773938f4..55659b22d87f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 92238ff333a4..7655501e75d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -162,7 +162,7 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal) && @@ -181,7 +181,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) return true; } return false; From 1a03b0e6c5fdb476203be4f4597205ff8799369b Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 12 Jul 2024 09:39:13 -0600 Subject: [PATCH 0138/1523] drm/amd/display: Add MST debug message when link detection fails [WHY & HOW] dc_link_detect returns a boolean value which can be used to print debug messages when it fails. This fixes 1 CHECKED_RETURN issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 62cb59f00929..db56b0aa5454 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3804,9 +3804,12 @@ static int trigger_hpd_mst_set(void *data, u64 val) if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { mutex_lock(&adev->dm.dc_lock); - dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); mutex_unlock(&adev->dm.dc_lock); + if (!ret) + DRM_ERROR("DM_MST: Failed to detect dc link!"); + ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true); if (ret < 0) DRM_ERROR("DM_MST: Failed to set the device into MST mode!"); From d925c04d974c657d10471c0c2dba3bc9c7d994ee Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Jun 2024 16:45:39 -0600 Subject: [PATCH 0139/1523] drm/amd/display: Check link_res->hpo_dp_link_enc before using it [WHAT & HOW] Functions dp_enable_link_phy and dp_disable_link_phy can pass link_res without initializing hpo_dp_link_enc and it is necessary to check for null before dereferencing. This fixes 1 FORWARD_NULL issue reported by Coverity. Fixes: 0beca868cde8 ("drm/amd/display: Check link_res->hpo_dp_link_enc before using it") Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index d0148f10dfc0..cec68c5dba13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -110,6 +110,11 @@ void enable_hpo_dp_link_output(struct dc_link *link, enum clock_source_id clock_source, const struct dc_link_settings *link_settings) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + if (link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating) link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating( link->dc->res_pool->dccg, From 3237403b98e173c296ec83cbba5e9def331c5e13 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 11 Jul 2024 10:53:41 -0600 Subject: [PATCH 0140/1523] drm/amd/display: Remove old comments Remove some old comments from DCN32/321. Signed-off-by: Rodrigo Siqueira Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 4cb0227bdd27..6f490d8d7038 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -160,8 +160,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 4297402bdab3..8839faf42207 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -139,8 +139,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, From 58ed441367b9b0fb48aa8cd471a73073d1143e16 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 20 May 2024 11:12:07 -0400 Subject: [PATCH 0141/1523] drm/amd/display: Various DML2 fixes for FAMS2 The disable fams2 operation was reworked, but some of the old code remained. This commit removes the disable_fams2_drr from the dml2_stream_parameters. Reviewed-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h | 1 - .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index fe153f4edaf5..b132f676a68d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -410,7 +410,6 @@ struct dml2_stream_parameters { enum dml2_odm_mode odm_mode; bool disable_dynamic_odm; bool disable_subvp; - bool disable_fams2_drr; int minimum_vblank_idle_requirement_us; bool minimize_active_latency_hiding; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index fa445067782e..dddb21818f8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1012,7 +1012,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, stream_descriptor = &display_config->display_config.stream_descriptors[i]; stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; - if (!stream_descriptor->timing.drr_config.enabled || stream_descriptor->overrides.disable_fams2_drr) + if (!stream_descriptor->timing.drr_config.enabled) return false; /* cannot support required vtotal */ From 8732594017d32132ca741f4ec4eec91211f5d10c Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 6 Jun 2024 15:51:16 -0400 Subject: [PATCH 0142/1523] drm/amd/display: Remove duplicate HWSS interfaces [Why] Some interface functions are defined in both the public and private HWSS interfaces, which can lead to confusion and runtime issues, therefore the duplicates should be eliminated. [How] - power_down should only be private, because it's only used within HWSS. - update_plane_addr should only be public, as it's used outside HWSS. Reviewed-by: Rodrigo Siqueira Signed-off-by: Joshua Aberback Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 10 +++++----- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c | 1 - .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c | 2 -- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++---- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 2 -- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 1 - .../gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h | 2 -- 21 files changed, 22 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 4593fb2a0536..e2f5c4d34a55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -3319,7 +3319,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { static const struct hwseq_private_funcs dce110_private_funcs = { .init_pipes = init_pipes, - .update_plane_addr = update_plane_addr, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 212576dbc336..e31249d1dd22 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1696,10 +1696,10 @@ void dcn10_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -1707,8 +1707,8 @@ void dcn10_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -2913,7 +2913,7 @@ static void dcn10_update_dchubp_dpp( hubp->power_gated = false; - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); if (is_pipe_tree_visible(pipe_ctx)) hubp->funcs->set_blank(hubp, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c index a5bdac79a744..5e51e1761707 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c @@ -78,7 +78,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, @@ -92,7 +91,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { static const struct hwseq_private_funcs dcn10_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn10_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn10_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index bd7b186fb2e4..270e337ae27b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1825,7 +1825,7 @@ static void dcn20_update_dchubp_dpp( params.subvp_save_surf_addr.subvp_index = pipe_ctx->subvp_index; hwss_subvp_save_surf_addr(¶ms); } - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); } if (pipe_ctx->update_flags.bits.enable) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c index ef6488165b8f..32707b344f0b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c @@ -105,7 +105,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = { static const struct hwseq_private_funcs dcn20_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c index a13bf6c9386e..78351408e864 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c @@ -96,7 +96,6 @@ static const struct hw_sequencer_funcs dcn201_funcs = { static const struct hwseq_private_funcs dcn201_private_funcs = { .init_pipes = NULL, - .update_plane_addr = dcn201_update_plane_addr, .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn201_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c index 3dfac372d165..e044e9e0a3a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -109,7 +108,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { static const struct hwseq_private_funcs dcn21_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index eaeeade31ed7..fc5936460ac2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -731,10 +731,10 @@ void dcn30_init_hw(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -742,8 +742,8 @@ void dcn30_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 4b32497c09d0..2a8dc40d2847 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -113,7 +113,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { static const struct hwseq_private_funcs dcn30_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 97e33eb7ac5a..93e49d87a67c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -111,7 +111,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { static const struct hwseq_private_funcs dcn301_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 9cb7afe0e731..b57dd45611f2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -98,7 +98,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -116,7 +115,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { static const struct hwseq_private_funcs dcn31_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 7a8db4b81471..fe5495a8e7a2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -119,7 +118,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { static const struct hwseq_private_funcs dcn314_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 7f41eccefe02..a7cb003f1dfb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -901,10 +901,10 @@ void dcn32_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -914,8 +914,8 @@ void dcn32_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5c50458b12cb..968b010971ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -124,7 +124,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = { static const struct hwseq_private_funcs dcn32_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ac1e3331a77c..a9dc7cf12dac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -629,10 +629,10 @@ void dcn35_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -640,8 +640,8 @@ void dcn35_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 428912f37129..55dc5799e725 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -101,7 +101,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -128,7 +127,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { static const struct hwseq_private_funcs dcn35_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 55e791552bca..b1b2a58684e7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -127,7 +126,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { static const struct hwseq_private_funcs dcn351_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 779960278a5c..87c5ef579ecb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -354,10 +354,10 @@ void dcn401_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -367,8 +367,8 @@ void dcn401_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 1439f07f0b64..2533f16510ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -99,12 +99,10 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, - .power_down = dce110_power_down, }; static const struct hwseq_private_funcs dcn401_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index d05be65a2256..f50b2955ce8c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -240,7 +240,6 @@ struct hw_sequencer_funcs { void (*program_triplebuffer)(const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enableTripleBuffer); void (*update_pending_status)(struct pipe_ctx *pipe_ctx); - void (*power_down)(struct dc *dc); void (*update_dsc_pg)(struct dc *dc, struct dc_state *context, bool safe_to_disable); /* Pipe Lock Related */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7a75ff320511..0ac675456979 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -76,8 +76,6 @@ struct hwseq_private_funcs { void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*init_pipes)(struct dc *dc, struct dc_state *context); void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context); - void (*update_plane_addr)(const struct dc *dc, - struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); From 779ea9d32612f8e78a2f362d52cf31c23ba878f0 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Mon, 15 Jul 2024 16:03:30 -0400 Subject: [PATCH 0143/1523] drm/amd/display: remove unused folder dc/{dcn401,dcn303} are unused since the files in it got moved under their respective new components location. Hence they are no longer necessary Fixes: 2d62bb450ed1 ("drm/amd/display: Refactor DCN3X into component folder") Reviewed-by: Leo Li Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn303/Makefile | 13 ------------- drivers/gpu/drm/amd/display/dc/dcn401/Makefile | 10 ---------- 2 files changed, 23 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn401/Makefile diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile deleted file mode 100644 index a954e316aca2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: MIT -# -# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn303. - -DCN3_03 = dcn303_init.o - -AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03) diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile deleted file mode 100644 index ded1f3140beb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. - -DCN401 += dcn401_dio_link_encoder.o -DCN401 += dcn401_dio_stream_encoder.o -DCN401 += dcn401_mpc.o - -AMD_DAL_DCN401 = $(addprefix $(AMDDALPATH)/dc/dcn401/,$(DCN401)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN401) From c8a0222dedf90e9a79b88814e32ffb43ac4beef6 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 14 Jul 2024 21:54:49 -0400 Subject: [PATCH 0144/1523] drm/amd/display: 3.2.293 Signed-off-by: Aurabindo Pillai Signed-off-by: Aric Cyr Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 272ae1bdc57f..4077c1ddb9c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.292" +#define DC_VER "3.2.293" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 0352e39e7e781fe6a408c70a336d0f321dfe108b Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 18:15:18 +0530 Subject: [PATCH 0145/1523] drm/amd/display: Add kdoc entry for 'bs_coeffs_updated' in dpp401_dscl_program_isharp Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/dpp/dcn401/dcn401_dpp_dscl.c:961: warning: Function parameter or struct member 'bs_coeffs_updated' not described in 'dpp401_dscl_program_isharp' Fixes: 94beb4ac1b3b ("drm/amd/display: ensure EASF and ISHARP coefficients are programmed together") Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 703d7b51c6c2..3a3745597f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -951,6 +951,7 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info + * @bs_coeffs_updated: coeffs update flag * * This is the primary function to program isharp * From bc50b614d59990747dd5aeced9ec22f9258991ff Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 18:44:02 +0530 Subject: [PATCH 0146/1523] drm/amd/display: Fix index out of bounds in DCN30 degamma hardware format translation This commit addresses a potential index out of bounds issue in the `cm3_helper_translate_curve_to_degamma_hw_format` function in the DCN30 color management module. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds, the function returns false to indicate an error. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:338 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:339 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:340 cm3_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 685702321d32..e55d7ff346d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -335,6 +335,8 @@ bool cm3_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From b7e99058eb2e86aabd7a10761e76cae33d22b49f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 17:48:27 +0530 Subject: [PATCH 0147/1523] drm/amd/display: Fix index out of bounds in degamma hardware format translation Fixes index out of bounds issue in `cm_helper_translate_curve_to_degamma_hw_format` function. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds the function returns false to indicate an error. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:594 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:595 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_cm_common.c:596 cm_helper_translate_curve_to_degamma_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 0b49362f71b0..eaed5d1c398a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -591,6 +591,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From bdf606810210e8e07a0cdf1af3c467291363b295 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 19 Jul 2024 21:39:57 +0530 Subject: [PATCH 0148/1523] drm/amd/display: Implement bounds check for stream encoder creation in DCN401 'stream_enc_regs' array is an array of dcn10_stream_enc_registers structures. The array is initialized with four elements, corresponding to the four calls to stream_enc_regs() in the array initializer. This means that valid indices for this array are 0, 1, 2, and 3. The error message 'stream_enc_regs' 4 <= 5 below, is indicating that there is an attempt to access this array with an index of 5, which is out of bounds. This could lead to undefined behavior Here, eng_id is used as an index to access the stream_enc_regs array. If eng_id is 5, this would result in an out-of-bounds access on the stream_enc_regs array. Thus fixing Buffer overflow error in dcn401_stream_encoder_create Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn401/dcn401_resource.c:1209 dcn401_stream_encoder_create() error: buffer overflow 'stream_enc_regs' 4 <= 5 Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index d3808c49d298..5ee20753572e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1190,7 +1190,7 @@ static struct stream_encoder *dcn401_stream_encoder_create( vpg = dcn401_vpg_create(ctx, vpg_inst); afmt = dcn401_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); From d81873f9e715b72d4f8d391c8eb243946f784dfc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 20 Jul 2024 18:05:20 +0530 Subject: [PATCH 0149/1523] drm/amd/display: Fix index out of bounds in DCN30 color transformation This commit addresses a potential index out of bounds issue in the `cm3_helper_translate_curve_to_hw_format` function in the DCN30 color management module. The issue could occur when the index 'i' exceeds the number of transfer function points (TRANSFER_FUNC_POINTS). The fix adds a check to ensure 'i' is within bounds before accessing the transfer function points. If 'i' is out of bounds, the function returns false to indicate an error. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:180 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.red' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:181 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.green' 1025 <= s32max drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_cm_common.c:182 cm3_helper_translate_curve_to_hw_format() error: buffer overflow 'output_tf->tf_pts.blue' 1025 <= s32max Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index e55d7ff346d2..f31f0e3abfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -177,6 +177,8 @@ bool cm3_helper_translate_curve_to_hw_format( i += increment) { if (j == hw_points) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; From f22f4754aaa47d8c59f166ba3042182859e5dff7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 21 Jul 2024 19:18:58 +0530 Subject: [PATCH 0150/1523] drm/amd/display: Add null check for head_pipe in dcn201_acquire_free_pipe_for_layer This commit addresses a potential null pointer dereference issue in the `dcn201_acquire_free_pipe_for_layer` function. The issue could occur when `head_pipe` is null. The fix adds a check to ensure `head_pipe` is not null before asserting it. If `head_pipe` is null, the function returns NULL to prevent a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn201/dcn201_resource.c:1016 dcn201_acquire_free_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 1010) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 131d98025bd4..fc54483b9104 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1007,8 +1007,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) return NULL; From ac2140449184a26eac99585b7f69814bd3ba8f2d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 21 Jul 2024 19:30:16 +0530 Subject: [PATCH 0151/1523] drm/amd/display: Add null check for head_pipe in dcn32_acquire_idle_pipe_for_head_pipe_in_layer This commit addresses a potential null pointer dereference issue in the `dcn32_acquire_idle_pipe_for_head_pipe_in_layer` function. The issue could occur when `head_pipe` is null. The fix adds a check to ensure `head_pipe` is not null before asserting it. If `head_pipe` is null, the function returns NULL to prevent a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn32/dcn32_resource.c:2690 dcn32_acquire_idle_pipe_for_head_pipe_in_layer() error: we previously assumed 'head_pipe' could be null (see line 2681) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 6eaf3cfebcb7..a124ad9bd108 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2678,8 +2678,10 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; int head_index; - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } /* * Modified from dcn20_acquire_idle_pipe_for_layer From 12fb3e9c88406732749fb2f111911a2438eeb0fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:29:20 -0400 Subject: [PATCH 0152/1523] drm/amdgpu/gfx7: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d84589137df9..5fbdef04c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5002,6 +5002,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; From 7e60ecc2b70adb41b92752cbcd749040e00b57b8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:29:59 -0400 Subject: [PATCH 0153/1523] drm/amdgpu/gfx8: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b4658c7db0e1..a1963e6c5cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6955,6 +6955,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; From 9c7e69d2e1245fdd5fa5c65cd022530b2a5ef1b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:21:48 -0400 Subject: [PATCH 0154/1523] drm/amdgpu/gfx9: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2929c8972ea7..d4e38edc9353 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7244,6 +7244,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, }; From 68e599db7a549f010a329515f3508d8a8c3467a4 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:21:57 -0400 Subject: [PATCH 0155/1523] drm/amdkfd: Validate user queue buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Find user queue rptr, ring buf, eop buffer and cwsr area BOs, and check BOs are mapped on the GPU with correct size and take the BO reference. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +++ drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 40 ++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index aba9bcd91f65..80d8080c5764 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -524,6 +524,10 @@ struct queue_properties { uint64_t exception_status; struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *rptr_bo; + struct amdgpu_bo *ring_bo; + struct amdgpu_bo *eop_buf_bo; + struct amdgpu_bo *cwsr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index b4529ec298a9..0e661160c295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -97,7 +97,8 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ if (!mapping) goto out_err; - if (user_addr != mapping->start || user_addr + size - 1 != mapping->last) { + if (user_addr != mapping->start || + (size != 0 && user_addr + size - 1 != mapping->last)) { pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); @@ -124,18 +125,51 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, + &properties->ring_bo, properties->queue_size); + if (err) + goto out_err_unreserve; + + /* only compute queue requires EOP buffer and CWSR area */ + if (properties->type != KFD_QUEUE_TYPE_COMPUTE) goto out_unreserve; - amdgpu_bo_unreserve(vm->root.bo); - return 0; + /* EOP buffer is not required for all ASICs */ + if (properties->eop_ring_buffer_address) { + err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, + &properties->eop_buf_bo, + properties->eop_ring_buffer_size); + if (err) + goto out_err_unreserve; + } + + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, + &properties->cwsr_bo, 0); + if (err) + goto out_err_unreserve; out_unreserve: amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_err_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + kfd_queue_release_buffers(pdd, properties); return err; } int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { amdgpu_bo_unref(&properties->wptr_bo); + amdgpu_bo_unref(&properties->rptr_bo); + amdgpu_bo_unref(&properties->ring_bo); + amdgpu_bo_unref(&properties->eop_buf_bo); + amdgpu_bo_unref(&properties->cwsr_bo); return 0; } From cba7fec864172dadd953daefdd26e01742b71a6a Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:21:19 +0530 Subject: [PATCH 0156/1523] drm/amd/display: Add NULL check for clk_mgr and clk_mgr->funcs in dcn30_init_hw This commit addresses a potential null pointer dereference issue in the `dcn30_init_hw` function. The issue could occur when `dc->clk_mgr` or `dc->clk_mgr->funcs` is null. The fix adds a check to ensure `dc->clk_mgr` and `dc->clk_mgr->funcs` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:789 dcn30_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 628) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index fc5936460ac2..98a40d46aaae 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -625,7 +625,7 @@ void dcn30_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -786,11 +786,12 @@ void dcn30_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); //if softmax is enabled then hardmax will be set by a different call - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) From 4b6377f0e96085cbec96eb7f0b282430ccdd3d75 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:58:32 +0530 Subject: [PATCH 0157/1523] drm/amd/display: Add NULL check for clk_mgr and clk_mgr->funcs in dcn401_init_hw This commit addresses a potential null pointer dereference issue in the `dcn401_init_hw` function. The issue could occur when `dc->clk_mgr` or `dc->clk_mgr->funcs` is null. The fix adds a check to ensure `dc->clk_mgr` and `dc->clk_mgr->funcs` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn401/dcn401_hwseq.c:416 dcn401_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 225) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 87c5ef579ecb..0fa610590245 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -222,7 +222,7 @@ void dcn401_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) { + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // mark dcmode limits present if any clock has distinct AC and DC values from SMU @@ -413,7 +413,7 @@ void dcn401_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) @@ -435,7 +435,9 @@ void dcn401_init_hw(struct dc *dc) dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { /* update bounding box if FAMS2 disabled */ - dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + if (dc->clk_mgr) + dc->res_pool->funcs->update_bw_bounding_box(dc, + dc->clk_mgr->bw_params); } } } From eac3b274aaea11ec4ade8e8f684055db80d5f8b7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 17 Jul 2024 18:45:50 +0530 Subject: [PATCH 0158/1523] drm/amdgpu: add print support for sdma_v_4_4_2 ip_dump Add print support for ip dump for sdma_v_4_4_2 in devcoredump. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 67e0e894579a..cb7fedb34fa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1857,6 +1857,27 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + static void sdma_v4_4_2_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1895,6 +1916,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, .dump_ip_state = sdma_v4_4_2_dump_ip_state, + .print_ip_state = sdma_v4_4_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { From 22a9d5cbf88a92ac6cd473c3ba1c369aee8fec9a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:27:37 -0400 Subject: [PATCH 0159/1523] drm/amdgpu/gfx9.4.3: implement wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on gfx9.0 implementation. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 20ea6cb01edf..2ac398184e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2833,6 +2833,19 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); +} + static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state, int xcc_id) @@ -4116,6 +4129,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, }; From 834368eab36922595a402b9e76470f8efa2fac7f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:31:36 -0400 Subject: [PATCH 0160/1523] drm/amdkfd: Ensure user queue buffers residency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add atomic queue_refcount to struct bo_va, return -EBUSY to fail unmap BO from the GPU if the bo_va queue_refcount is not zero. Create queue to increase the bo_va queue_refcount, destroy queue to decrease the bo_va queue_refcount, to ensure the queue buffers mapped on the GPU when queue is active. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 ++++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 34 ++++++++++++++++--- 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0ab37e7aec26..6d5fd371d5ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1252,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, return ret; } -static void unmap_bo_from_gpuvm(struct kgd_mem *mem, +static int unmap_bo_from_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync) { @@ -1260,11 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; + if (bo_va->queue_refcount) { + pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount); + return -EBUSY; + } + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); + + return 0; } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -2191,7 +2198,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, entry); - unmap_bo_from_gpuvm(mem, entry, ctx.sync); + ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync); + if (ret) + goto unreserve_out; + entry->is_mapped = false; mem->mapped_to_gpu_memory--; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659..d7e27957013f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -90,6 +90,12 @@ struct amdgpu_bo_va { bool cleared; bool is_xgmi; + + /* + * protected by vm reservation lock + * if non-zero, cannot unmap from GPU because user queues may still access it + */ + unsigned int queue_refcount; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 202f24ee4bd7..65a37ac5a0f0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1384,8 +1384,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { - pr_err("Failed to unmap from gpu %d/%d\n", - i, args->n_devices); + pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed; } args->n_success = i+1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 80d8080c5764..c31589043d5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1292,6 +1292,7 @@ void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size); +void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0e661160c295..3fd386dcb011 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -106,6 +106,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ } *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + mapping->bo_va->queue_refcount++; return 0; out_err: @@ -113,6 +114,19 @@ out_err: return -EINVAL; } +void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va) + bo_va->queue_refcount--; + } + + amdgpu_bo_unref(bo); +} + int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct amdgpu_vm *vm; @@ -166,10 +180,20 @@ out_err_unreserve: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { - amdgpu_bo_unref(&properties->wptr_bo); - amdgpu_bo_unref(&properties->rptr_bo); - amdgpu_bo_unref(&properties->ring_bo); - amdgpu_bo_unref(&properties->eop_buf_bo); - amdgpu_bo_unref(&properties->cwsr_bo); + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_buffer_put(vm, &properties->wptr_bo); + kfd_queue_buffer_put(vm, &properties->rptr_bo); + kfd_queue_buffer_put(vm, &properties->ring_bo); + kfd_queue_buffer_put(vm, &properties->eop_buf_bo); + kfd_queue_buffer_put(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); return 0; } From 8284951a6e79c6806c675e5f68a4cd425dd56bc4 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 19 Jul 2024 20:43:04 +0800 Subject: [PATCH 0161/1523] drm/amdgpu: fix ras UE error injection failure issue The ras command shared memory is allocated from VRAM and the response status of the command buffer will not be zero due to gpu being in fatal error state after ras UE error injection. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7cdff355cedb..189574d53ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1630,9 +1630,7 @@ static int psp_ras_send_cmd(struct psp_context *psp, switch (cmd) { case TA_RAS_COMMAND__TRIGGER_ERROR: - if (ret || psp->cmd_buf_mem->resp.status) - ret = -EINVAL; - else if (out) + if (!ret && out) memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); break; case TA_RAS_COMMAND__QUERY_ADDRESS: From c395fd47d1565bd67671f45cca281b3acc2c31ef Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 16:44:40 +0530 Subject: [PATCH 0162/1523] drm/amd/display: Add NULL check for clk_mgr in dcn32_init_hw This commit addresses a potential null pointer dereference issue in the `dcn32_init_hw` function. The issue could occur when `dc->clk_mgr` is null. The fix adds a check to ensure `dc->clk_mgr` is not null before accessing its functions. This prevents a potential null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn32/dcn32_hwseq.c:961 dcn32_init_hw() error: we previously assumed 'dc->clk_mgr' could be null (see line 782) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a7cb003f1dfb..fcaabad204a2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -779,7 +779,7 @@ void dcn32_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -958,10 +958,11 @@ void dcn32_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) From 015b8a2fdf39a4c288ff24e7b715b8d9198e56dc Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 18 Jul 2024 10:58:04 +0800 Subject: [PATCH 0163/1523] drm/amdgpu: Fix eeprom max record count The eeprom table is empty before initializing, set eeprom table version first before initializing. Changed from V1: Reuse amdgpu_ras_set_eeprom_table_version function Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index eae0a555df3c..aab8077e5098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1011,6 +1011,9 @@ Out: uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { + /* get available eeprom table version first before eeprom table init */ + amdgpu_ras_set_eeprom_table_version(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else From a2737c404cb2c1c335db30737925a306a2e7cc11 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:17:18 -0400 Subject: [PATCH 0164/1523] drm/amdgpu/gfx10: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..c4002db6e569 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9480,6 +9480,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; From f53f526f7050041718af84e33fc24f670e7dccdb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:19:42 -0400 Subject: [PATCH 0165/1523] drm/amdgpu/gfx11: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index dcef39907449..554aae995f41 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6603,6 +6603,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, }; From af4808ac40dbf668183d0b69ef6b31e62e1fc5df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:20:37 -0400 Subject: [PATCH 0166/1523] drm/amdgpu/gfx12: enable wave kill for compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work the same for compute as well as gfx. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..567f9196d6a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5081,6 +5081,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, }; From 4b95cec68937a6302c7085b26258cf721d726684 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 11:08:52 -0400 Subject: [PATCH 0167/1523] drm/amdgpu/gfx10: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 128 +++++++++++++++++++++---- 1 file changed, 108 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c4002db6e569..66d80f3dc661 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5212,26 +5212,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) } +static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v10_0_init_csb(struct amdgpu_device *adev) @@ -9073,12 +9121,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9092,12 +9167,25 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9121,8 +9209,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 2662b7d9d8bc1dda1f89f0dd33422e069f2f861c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 11:18:00 -0400 Subject: [PATCH 0168/1523] drm/amdgpu/gfx11: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 132 ++++++++++++++++++++----- 1 file changed, 110 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 554aae995f41..02efa475eb7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1953,26 +1953,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) gfx_v11_0_init_gds_vmid(adev); } +static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v11_0_init_csb(struct amdgpu_device *adev) @@ -6201,15 +6249,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6220,15 +6295,28 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6252,8 +6340,8 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 39879321769cc2d9a690725959ef76af92a38ac1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2024 17:40:55 -0400 Subject: [PATCH 0169/1523] drm/amdgpu/gfx12: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. v2: fix indexing (Jessie) Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 130 ++++++++++++++++++++----- 1 file changed, 106 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 567f9196d6a0..c74c8a60a23a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1680,26 +1680,68 @@ static void gfx_v12_0_constants_init(struct amdgpu_device *adev) gfx_v12_0_init_compute_vmid(adev); } -static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) +static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) { - u32 tmp; + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + default: + return 0; + } +} + +static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + default: + return 0; + } +} + +static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v12_0_init_csb(struct amdgpu_device *adev) @@ -4745,15 +4787,42 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4764,15 +4833,28 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4796,8 +4878,8 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; From 48695573d2feaf42812c1ad54e01caff0d1c2d71 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Jul 2024 10:24:59 -0400 Subject: [PATCH 0170/1523] drm/amdgpu/gfx9: properly handle error ints on all pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to handle the interrupt enables for all pipes. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 44 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 50 +++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d4e38edc9353..97476fb2ca40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2634,7 +2634,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if(adev->gfx.num_gfx_rings) + if (adev->gfx.num_gfx_rings) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); @@ -5929,17 +5929,59 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, } } +static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2ac398184e12..43a3ef276b5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2899,21 +2899,63 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( } } +static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev, + int xcc_id, int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { - int i, num_xcc; + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < num_xcc; i++) + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } break; default: break; From acddd5cf70e609e1e1e638ac0422977ea2b4783f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:01:06 -0400 Subject: [PATCH 0171/1523] drm/amdgpu/gfx: add bad opcode interrupt Add the irq source for bad opcodes. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..86d3fa7eef90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -391,6 +391,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src bad_op_irq; struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; From a7909022371dc8c70bdc4871a97cc49e34d78a6d Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 11 Jul 2024 10:38:03 +0800 Subject: [PATCH 0172/1523] drm/amdgpu/gfx11: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Reviewed-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 02efa475eb7e..4a9766635933 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1569,6 +1569,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -4646,6 +4653,7 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -5002,6 +5010,9 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; return 0; } @@ -6293,6 +6304,51 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -6369,6 +6425,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v11_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6747,6 +6812,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { .process = gfx_v11_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { + .set = gfx_v11_0_set_bad_op_fault_state, + .process = gfx_v11_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .set = gfx_v11_0_set_priv_inst_fault_state, .process = gfx_v11_0_priv_inst_irq, @@ -6764,6 +6834,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; From bc6c2a6f6495668e3cf0acbecf820b93ca03aef7 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 12 Jul 2024 18:14:52 -0400 Subject: [PATCH 0173/1523] drm/amdgpu/gfx10: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 74 ++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 66d80f3dc661..853084a2ce7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4740,6 +4740,13 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -7416,6 +7423,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* WA added for Vangogh asic fixing the SMU suspend failure * It needs to set power gating again during gfxoff control @@ -7726,6 +7734,10 @@ static int gfx_v10_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -9162,6 +9174,51 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -9237,6 +9294,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -9624,6 +9690,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { .process = gfx_v10_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = { + .set = gfx_v10_0_set_bad_op_fault_state, + .process = gfx_v10_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { .set = gfx_v10_0_set_priv_inst_fault_state, .process = gfx_v10_0_priv_inst_irq, @@ -9645,6 +9716,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; } From 5ebca62eb8ebff67e2c8c4903bdb4f3c07922114 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 12 Jul 2024 18:42:53 -0400 Subject: [PATCH 0174/1523] drm/amdgpu/gfx12: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. v2: update irq naming (drop priv) (Alex) Acked-by: Felix Kuehling Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 74 ++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index c74c8a60a23a..f932c7ff85e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1349,6 +1349,13 @@ static int gfx_v12_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -3592,6 +3599,7 @@ static int gfx_v12_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3712,6 +3720,10 @@ static int gfx_v12_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -4831,6 +4843,51 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -4907,6 +4964,15 @@ static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -5219,6 +5285,11 @@ static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = { .process = gfx_v12_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = { + .set = gfx_v12_0_set_bad_op_fault_state, + .process = gfx_v12_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = { .set = gfx_v12_0_set_priv_inst_fault_state, .process = gfx_v12_0_priv_inst_irq, @@ -5232,6 +5303,9 @@ static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs; } From 238352b4949bc5c724f6adc1c78d50f1d15e4759 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:50:26 -0400 Subject: [PATCH 0175/1523] drm/amdgpu/gfx9: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 65 +++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 97476fb2ca40..675a1a8e2515 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2182,6 +2182,13 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -3937,6 +3944,7 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) @@ -4747,6 +4755,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + r = gfx_v9_0_ecc_late_init(handle); if (r) return r; @@ -5990,6 +6002,42 @@ static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + + return 0; +} + static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -6163,6 +6211,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_0_fault(adev, entry); + return 0; +} + static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -7346,6 +7403,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { .process = gfx_v9_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { + .set = gfx_v9_0_set_bad_op_fault_state, + .process = gfx_v9_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .set = gfx_v9_0_set_priv_inst_fault_state, .process = gfx_v9_0_priv_inst_irq, @@ -7365,6 +7427,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; From bd4bea5ab2bda37ddb092a978218c4d9b46927e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 18:57:14 -0400 Subject: [PATCH 0176/1523] drm/amdgpu/gfx9.4.3: Enable bad opcode interrupt For the bad opcode case, it will cause CP/ME hang. The firmware will prevent the ME side from hanging by raising a bad opcode interrupt. And the driver needs to perform a vmid reset when receiving the interrupt. Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 69 +++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 43a3ef276b5f..98fe6c40da64 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -901,6 +901,13 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -2162,6 +2169,7 @@ static int gfx_v9_4_3_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { @@ -2327,6 +2335,10 @@ static int gfx_v9_4_3_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + if (adev->gfx.ras && adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer(adev); @@ -2964,6 +2976,46 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) { + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } + break; + default: + break; + } + + return 0; +} + static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -3116,6 +3168,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -4228,6 +4289,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { .process = gfx_v9_4_3_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = { + .set = gfx_v9_4_3_set_bad_op_fault_state, + .process = gfx_v9_4_3_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { .set = gfx_v9_4_3_set_priv_inst_fault_state, .process = gfx_v9_4_3_priv_inst_irq, @@ -4241,6 +4307,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; } From ccbfea78adf75d3d9e87aa739dab83254f5333fa Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 8 Jul 2024 23:18:57 +0200 Subject: [PATCH 0177/1523] Input: ads7846 - ratelimit the spi_sync error message In case the touch controller is not connected, this message keeps scrolling on the console indefinitelly. Ratelimit it to avoid filling kernel logs. " ads7846 spi2.1: spi_sync --> -22 " Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240708211913.171243-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 4247283c7271..f89c0dd15d8b 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -824,7 +824,7 @@ static void ads7846_read_state(struct ads7846 *ts) m = &ts->msg[msg_idx]; error = spi_sync(ts->spi, m); if (error) { - dev_err(&ts->spi->dev, "spi_sync --> %d\n", error); + dev_err_ratelimited(&ts->spi->dev, "spi_sync --> %d\n", error); packet->ignore = true; return; } From da897484557b34a54fabb81f6c223c19a69e546d Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Tue, 23 Jul 2024 21:33:30 -0700 Subject: [PATCH 0178/1523] Input: synaptics - enable SMBus for HP Elitebook 840 G2 The kernel reports that the touchpad for this device can support a different bus. With SMBus enabled the touchpad movement is smoother and three-finger gestures are recognized. Signed-off-by: Jonathan Denose Link: https://lore.kernel.org/r/20240719180612.1.Ib652dd808c274076f32cd7fc6c1160d2cf71753b@changeid Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 38191c3b31bf..380aa1614442 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -189,6 +189,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ From 76a791fa0926e504f262310cbd0c8f1a60e67ae8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 22 Jul 2024 16:39:37 +0100 Subject: [PATCH 0179/1523] drm/i915/dp: Make read-only array bw_gbps static const Don't populate the read-only array bw_gbps on the stack at run time, instead make it static const. Signed-off-by: Colin Ian King Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240722153937.574819-1-colin.i.king@gmail.com --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1e43e32e0519..116041f3886a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3420,7 +3420,7 @@ static void intel_dp_get_pcon_dsc_cap(struct intel_dp *intel_dp) static int intel_dp_pcon_get_frl_mask(u8 frl_bw_mask) { - int bw_gbps[] = {9, 18, 24, 32, 40, 48}; + static const int bw_gbps[] = {9, 18, 24, 32, 40, 48}; int i; for (i = ARRAY_SIZE(bw_gbps) - 1; i >= 0; i--) { From b049504e211e8f4dbcd40434f2dcab2215ea1039 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 12:44:57 -0400 Subject: [PATCH 0180/1523] drm/amdkfd: Validate user queue svm memory residency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Queue CWSR area maybe registered to GPU as svm memory, create queue to ensure svm mapped to GPU with KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED flag. Add queue_refcount to struct svm_range, to track queue CWSR area usage. Because unmap mmu notifier callback return value is ignored, if application unmap the CWSR area while queue is active, pr_warn message in dmesg log. To be safe, evict user queue. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 110 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 + 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 3fd386dcb011..67242ce051b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,7 @@ #include #include "kfd_priv.h" +#include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) { @@ -83,6 +84,100 @@ void uninit_queue(struct queue *q) kfree(q); } +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct list_head update_list; + struct svm_range *prange; + int ret = -EINVAL; + + INIT_LIST_HEAD(&update_list); + addr >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + mutex_lock(&p->svms.lock); + + /* + * range may split to multiple svm pranges aligned to granularity boundaery. + */ + while (size) { + uint32_t gpuid, gpuidx; + int r; + + prange = svm_range_from_addr(&p->svms, addr, NULL); + if (!prange) + break; + + if (!prange->mapped_to_gpu) + break; + + r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx); + if (r < 0) + break; + if (!test_bit(gpuidx, prange->bitmap_access) && + !test_bit(gpuidx, prange->bitmap_aip)) + break; + + if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) + break; + + list_add(&prange->update_list, &update_list); + + if (prange->last - prange->start + 1 >= size) { + size = 0; + break; + } + + size -= prange->last - prange->start + 1; + addr += prange->last - prange->start + 1; + } + if (size) { + pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1); + goto out_unlock; + } + + list_for_each_entry(prange, &update_list, update_list) + atomic_inc(&prange->queue_refcount); + ret = 0; + +out_unlock: + mutex_unlock(&p->svms.lock); + return ret; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct svm_range *prange, *pchild; + struct interval_tree_node *node; + unsigned long last; + + addr >>= PAGE_SHIFT; + last = addr + (size >> PAGE_SHIFT) - 1; + + mutex_lock(&p->svms.lock); + + node = interval_tree_iter_first(&p->svms.objects, addr, last); + while (node) { + struct interval_tree_node *next_node; + unsigned long next_start; + + prange = container_of(node, struct svm_range, it_node); + next_node = interval_tree_iter_next(node, addr, last); + next_start = min(node->last, last) + 1; + + if (atomic_add_unless(&prange->queue_refcount, -1, 0)) { + list_for_each_entry(pchild, &prange->child_list, child_list) + atomic_add_unless(&pchild->queue_refcount, -1, 0); + } + + node = next_node; + addr = next_start; + } + + mutex_unlock(&p->svms.lock); +} + int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size) { @@ -165,8 +260,17 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, &properties->cwsr_bo, 0); + if (!err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + + err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, + properties->ctx_save_restore_area_size); if (err) - goto out_err_unreserve; + goto out_err_release; + + return 0; out_unreserve: amdgpu_bo_unreserve(vm->root.bo); @@ -174,6 +278,7 @@ out_unreserve: out_err_unreserve: amdgpu_bo_unreserve(vm->root.bo); +out_err_release: kfd_queue_release_buffers(pdd, properties); return err; } @@ -195,5 +300,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope kfd_queue_buffer_put(vm, &properties->cwsr_bo); amdgpu_bo_unreserve(vm->root.bo); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, + properties->ctx_save_restore_area_size); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dc..2339bbdf452f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1051,6 +1051,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return 0; } @@ -1992,6 +1993,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return new; } @@ -2444,6 +2446,16 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, unsigned long s, l; bool unmap_parent; + if (atomic_read(&prange->queue_refcount)) { + int r; + + pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n", + prange->start << PAGE_SHIFT); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (r) + pr_debug("failed %d to quiesce KFD queues\n", r); + } + p = kfd_lookup_process_by_mm(mm); if (!p) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 70c1776611c4..747325a2ea89 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -137,6 +137,7 @@ struct svm_range { DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); bool mapped_to_gpu; + atomic_t queue_refcount; }; static inline void svm_range_lock(struct svm_range *prange) From 305cd109b761202d71f2f655ea369fe889ba1d01 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jun 2024 13:00:48 -0400 Subject: [PATCH 0181/1523] drm/amdkfd: Validate user queue update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure update queue new ring buffer is mapped on GPU with correct size. Decrease queue old ring_bo queue_refcount and increase new ring_bo queue_refcount. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../amd/amdkfd/kfd_process_queue_manager.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 4947f28b3afb..9995dbb43359 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -549,11 +549,41 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { + if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } + /* + * Update with NULL ring address is used to disable the queue + */ + if (p->queue_address && p->queue_size) { + struct kfd_process_device *pdd; + struct amdgpu_vm *vm; + struct queue *q = pqn->q; + int err; + + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) + return -ENODEV; + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, + p->queue_size)) { + pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", + p->queue_address, p->queue_size); + return -EFAULT; + } + + kfd_queue_buffer_put(vm, &pqn->q->properties.ring_bo); + amdgpu_bo_unreserve(vm->root.bo); + + pqn->q->properties.ring_bo = p->ring_bo; + } + pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; From 3b37e2725ab32c9055bec00ef41caa63839efd37 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 18 Jul 2024 13:18:53 +0800 Subject: [PATCH 0182/1523] drm/amdgpu: skip kfd init if GFX is not ready. avoid kfd init crash in that case. Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Tested-by: Jesse Zhang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bcacf2e35eba..730dae77570c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2471,6 +2471,7 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; struct pci_dev *parent; int i, r; bool total; @@ -2608,7 +2609,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; - amdgpu_amdkfd_device_probe(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (ip_block->status.valid != false) + amdgpu_amdkfd_device_probe(adev); + adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; From 0b071245ddd98539d4f7493bdd188417fcf2d629 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 23 Jul 2024 16:54:34 +0800 Subject: [PATCH 0183/1523] drm/amdgpu: add missed harvest check for VCN IP v4/v5 To prevent below probe failure, add a check for models with VCN IP v4.0.6 where VCN1 may be harvested. v2: Apply the same check to VCN IP v4.0 and v5.0. [ 54.070117] RIP: 0010:vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] [ 54.071055] Code: 80 fb ff 8d 82 00 80 fe ff 81 fe 00 06 00 00 0f 43 c2 49 69 d5 38 0d 00 00 48 8d 71 04 c1 e8 02 4c 01 f2 48 89 b2 50 f6 02 00 <89> 01 48 8b 82 50 f6 02 00 48 8d 48 04 48 89 8a 50 f6 02 00 c7 00 [ 54.072408] RSP: 0018:ffffb17985f736f8 EFLAGS: 00010286 [ 54.072793] RAX: 00000000000000d6 RBX: ffff99a82f680000 RCX: 0000000000000000 [ 54.073315] RDX: ffff99a82f680000 RSI: 0000000000000004 RDI: ffff99a82f680000 [ 54.073835] RBP: ffffb17985f73730 R08: 0000000000000001 R09: 0000000000000000 [ 54.074353] R10: 0000000000000008 R11: ffffb17983c05000 R12: 0000000000000000 [ 54.074879] R13: 0000000000000000 R14: ffff99a82f680000 R15: 0000000000000001 [ 54.075400] FS: 00007f8d9c79a000(0000) GS:ffff99ab2f140000(0000) knlGS:0000000000000000 [ 54.075988] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 54.076408] CR2: 0000000000000000 CR3: 0000000140c3a000 CR4: 0000000000750ef0 [ 54.076927] PKRU: 55555554 [ 54.077132] Call Trace: [ 54.077319] [ 54.077484] ? show_regs+0x69/0x80 [ 54.077747] ? __die+0x28/0x70 [ 54.077979] ? page_fault_oops+0x180/0x4b0 [ 54.078286] ? do_user_addr_fault+0x2d2/0x680 [ 54.078610] ? exc_page_fault+0x84/0x190 [ 54.078910] ? asm_exc_page_fault+0x2b/0x30 [ 54.079224] ? vcn_v4_0_5_start_dpg_mode+0x9be/0x36b0 [amdgpu] [ 54.079941] ? vcn_v4_0_5_start_dpg_mode+0xe6/0x36b0 [amdgpu] [ 54.080617] vcn_v4_0_5_set_powergating_state+0x82/0x19b0 [amdgpu] [ 54.081316] amdgpu_device_ip_set_powergating_state+0x64/0xc0 [amdgpu] [ 54.082057] amdgpu_vcn_ring_begin_use+0x6f/0x1d0 [amdgpu] [ 54.082727] amdgpu_ring_alloc+0x44/0x70 [amdgpu] [ 54.083351] amdgpu_vcn_dec_sw_ring_test_ring+0x40/0x110 [amdgpu] [ 54.084054] amdgpu_ring_test_helper+0x22/0x90 [amdgpu] [ 54.084698] vcn_v4_0_5_hw_init+0x87/0xc0 [amdgpu] [ 54.085307] amdgpu_device_init+0x1f96/0x2780 [amdgpu] [ 54.085951] amdgpu_driver_load_kms+0x1e/0xc0 [amdgpu] [ 54.086591] amdgpu_pci_probe+0x19f/0x550 [amdgpu] [ 54.087215] local_pci_probe+0x48/0xa0 [ 54.087509] pci_device_probe+0xc9/0x250 [ 54.087812] really_probe+0x1a4/0x3f0 [ 54.088101] __driver_probe_device+0x7d/0x170 [ 54.088443] driver_probe_device+0x24/0xa0 [ 54.088765] __driver_attach+0xdd/0x1d0 [ 54.089068] ? __pfx___driver_attach+0x10/0x10 [ 54.089417] bus_for_each_dev+0x8e/0xe0 [ 54.089718] driver_attach+0x22/0x30 [ 54.090000] bus_add_driver+0x120/0x220 [ 54.090303] driver_register+0x62/0x120 [ 54.090606] ? __pfx_amdgpu_init+0x10/0x10 [amdgpu] [ 54.091255] __pci_register_driver+0x62/0x70 [ 54.091593] amdgpu_init+0x67/0xff0 [amdgpu] [ 54.092190] do_one_initcall+0x5f/0x330 [ 54.092495] do_init_module+0x68/0x240 [ 54.092794] load_module+0x201c/0x2110 [ 54.093093] init_module_from_file+0x97/0xd0 [ 54.093428] ? init_module_from_file+0x97/0xd0 [ 54.093777] idempotent_init_module+0x11c/0x2a0 [ 54.094134] __x64_sys_finit_module+0x64/0xc0 [ 54.094476] do_syscall_64+0x58/0x120 [ 54.094767] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Signed-off-by: Tim Huang Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index f6d96a44d75f..776c539bfdda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1045,6 +1045,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1498,6 +1501,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index f45495de6875..8d75061f9f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -958,6 +958,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1162,6 +1165,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 070b56610c7d..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -721,6 +721,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -898,6 +901,9 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; From 08ae395ea22fb3d9b318c8bde28c0dfd2f5fa4d2 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 17:18:17 +0530 Subject: [PATCH 0184/1523] drm/amd/display: Add null check for set_output_gamma in dcn30_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn30_set_output_transfer_func function. Previously, set_output_gamma was being checked for nullity at line 386, but then it was being dereferenced without any nullity check at line 401. This could potentially lead to a null pointer dereference error if set_output_gamma is indeed null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a nullity check for set_output_gamma before the call to set_output_gamma at line 401. If set_output_gamma is null, we log an error message and do not call the function. This fix prevents a potential null pointer dereference error. drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:401 dcn30_set_output_transfer_func() error: we previously assumed 'mpc->funcs->set_output_gamma' could be null (see line 386) drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c 373 bool dcn30_set_output_transfer_func(struct dc *dc, 374 struct pipe_ctx *pipe_ctx, 375 const struct dc_stream_state *stream) 376 { 377 int mpcc_id = pipe_ctx->plane_res.hubp->inst; 378 struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; 379 const struct pwl_params *params = NULL; 380 bool ret = false; 381 382 /* program OGAM or 3DLUT only for the top pipe*/ 383 if (pipe_ctx->top_pipe == NULL) { 384 /*program rmu shaper and 3dlut in MPC*/ 385 ret = dcn30_set_mpc_shaper_3dlut(pipe_ctx, stream); 386 if (ret == false && mpc->funcs->set_output_gamma) { ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If this is NULL 387 if (stream->out_transfer_func.type == TF_TYPE_HWPWL) 388 params = &stream->out_transfer_func.pwl; 389 else if (pipe_ctx->stream->out_transfer_func.type == 390 TF_TYPE_DISTRIBUTED_POINTS && 391 cm3_helper_translate_curve_to_hw_format( 392 &stream->out_transfer_func, 393 &mpc->blender_params, false)) 394 params = &mpc->blender_params; 395 /* there are no ROM LUTs in OUTGAM */ 396 if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED) 397 BREAK_TO_DEBUGGER(); 398 } 399 } 400 --> 401 mpc->funcs->set_output_gamma(mpc, mpcc_id, params); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Then it will crash 402 return ret; 403 } Fixes: d99f13878d6f ("drm/amd/display: Add DCN3 HWSEQ") Reported-by: Dan Carpenter Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Hersen Wu Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 98a40d46aaae..42c52284a868 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -398,7 +398,11 @@ bool dcn30_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + else + DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__); + return ret; } From 47c0388b0589cb481c294dcb857d25a214c46eb3 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Fri, 19 Jul 2024 16:10:40 +0800 Subject: [PATCH 0185/1523] drm/amdgpu: reset vm state machine after gpu reset(vram lost) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Page table of compute VM in the VRAM will lost after gpu reset. VRAM won't be restored since compute VM has no shadows. [How] Use higher 32-bit of vm->generation to record a vram_lost_counter. Reset the VM state machine when vm->genertaion is not equal to the new generation token. v2: Check vm->generation instead of calling drm_sched_entity_error in amdgpu_vm_validate. v3: Use new generation token instead of vram_lost_counter for check. Signed-off-by: ZhenGuo Yin Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..a060c28f0877 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += lower_32_bits(vm->generation); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; From 517fff221c1e6b8a8db69e7a440116caee120ff5 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 26 Jun 2024 14:52:28 -0400 Subject: [PATCH 0186/1523] drm/amdkfd: Store queue cwsr area size to node properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the queue eop buffer size, cwsr area size, ctl stack size calculation from Thunk, store the value to KFD node properties. Those will be used to validate queue eop buffer size, cwsr area size, ctl stack size when creating KFD user compute queue. Those will be exposed to user space via sysfs KFD node properties, to remove the duplicate calculation code from Thunk. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 75 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 4 ++ 4 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c31589043d5b..b5cae48dff66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1295,6 +1295,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_ void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 67242ce051b5..adcda9730c9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,7 @@ #include #include "kfd_priv.h" +#include "kfd_topology.h" #include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) @@ -305,3 +306,77 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope properties->ctx_save_restore_area_size); return 0; } + +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define DEBUGGER_BYTES_ALIGN 64 +#define DEBUGGER_BYTES_PER_WAVE 32 + +static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) +{ + u32 vgpr_size = 0x40000; + + if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ + gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + vgpr_size = 0x80000; + else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ + gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ + gfxv == 120000 || /* GFX_VERSION_GFX1200 */ + gfxv == 120001) /* GFX_VERSION_GFX1201 */ + vgpr_size = 0x60000; + + return vgpr_size; +} + +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ + (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + +#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ + ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ + +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40 + +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) +{ + struct kfd_node_properties *props = &dev->node_props; + u32 gfxv = props->gfx_target_version; + u32 ctl_stack_size; + u32 wg_data_size; + u32 wave_num; + u32 cu_num; + + if (gfxv < 80001) /* GFX_VERSION_CARRIZO */ + return; + + cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); + wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ + min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) + : cu_num * 32; + + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; + ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, + PAGE_SIZE); + + if ((gfxv / 10000 * 10000) == 100000) { + /* HW design limits control stack size to 0x7000. + * This is insufficient for theoretical PM4 cases + * but sufficient for AQL, limited by SPI events. + */ + ctl_stack_size = min(ctl_stack_size, 0x7000); + } + + props->ctl_stack_size = ctl_stack_size; + props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); + props->cwsr_size = ctl_stack_size + wg_data_size; + + if (gfxv == 80002) /* GFX_VERSION_TONGA */ + props->eop_buffer_size = 0x8000; + else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + props->eop_buffer_size = 4096; + else if (gfxv >= 80000) + props->eop_buffer_size = 4096; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6f89b06f89d3..a9b3eda65a2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2120,6 +2120,8 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->gpu->adev->gmc.xgmi.connected_to_cpu) dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_queue_ctx_save_restore_size(dev); + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2d1c9d771bef..43ba0d32e5bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -74,6 +74,10 @@ struct kfd_node_properties { uint32_t num_sdma_xgmi_engines; uint32_t num_sdma_queues_per_engine; uint32_t num_cp_queues; + uint32_t cwsr_size; + uint32_t ctl_stack_size; + uint32_t eop_buffer_size; + uint32_t debug_memory_size; char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; From 629568d25fea8ece4f65073f039aeef4e240ab67 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 26 Jun 2024 15:03:05 -0400 Subject: [PATCH 0187/1523] drm/amdkfd: Validate queue cwsr area and eop buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating KFD user compute queue, check if queue eop buffer size, cwsr area size, ctl stack size equal to the size of KFD node properities. Check the entire cwsr area which may split into multiple svm ranges aligned to granularity boundary. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 46 +++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index adcda9730c9f..9807e8adf77d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -225,9 +225,15 @@ void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { + struct kfd_topology_device *topo_dev; struct amdgpu_vm *vm; + u32 total_cwsr_size; int err; + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); if (err) @@ -252,6 +258,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { + if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { + pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", + properties->eop_buf_bo->tbo.base.size, + topo_dev->node_props.eop_buffer_size); + goto out_err_unreserve; + } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, &properties->eop_buf_bo, properties->eop_ring_buffer_size); @@ -259,15 +271,33 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } + if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { + pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", + properties->ctl_stack_size, + topo_dev->node_props.ctl_stack_size); + goto out_err_unreserve; + } + + if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + properties->ctx_save_restore_area_size, + topo_dev->node_props.cwsr_size); + goto out_err_unreserve; + } + + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, - &properties->cwsr_bo, 0); + &properties->cwsr_bo, total_cwsr_size); if (!err) goto out_unreserve; amdgpu_bo_unreserve(vm->root.bo); err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, - properties->ctx_save_restore_area_size); + total_cwsr_size); if (err) goto out_err_release; @@ -286,7 +316,9 @@ out_err_release: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { + struct kfd_topology_device *topo_dev; struct amdgpu_vm *vm; + u32 total_cwsr_size; int err; vm = drm_priv_to_vm(pdd->drm_priv); @@ -302,8 +334,14 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope amdgpu_bo_unreserve(vm->root.bo); - kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, - properties->ctx_save_restore_area_size); + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); return 0; } From 31b42af516afa1e184d1a9f9dd4096c54044269a Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 22 Jul 2024 12:14:49 +0530 Subject: [PATCH 0188/1523] drm/i915/hdcp: Add encoder check in intel_hdcp_get_capability Sometimes during hotplug scenario or suspend/resume scenario encoder is not always initialized when intel_hdcp_get_capability add a check to avoid kernel null pointer dereference. Signed-off-by: Suraj Kandpal Reviewed-by: Dnyaneshwar Bhadane Link: https://patchwork.freedesktop.org/patch/msgid/20240722064451.3610512-2-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 3ebe035f382e..05402ae6b569 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -203,11 +203,16 @@ int intel_hdcp_read_valid_bksv(struct intel_digital_port *dig_port, /* Is HDCP1.4 capable on Platform and Sink */ bool intel_hdcp_get_capability(struct intel_connector *connector) { - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_digital_port *dig_port; const struct intel_hdcp_shim *shim = connector->hdcp.shim; bool capable = false; u8 bksv[5]; + if (!intel_attached_encoder(connector)) + return capable; + + dig_port = intel_attached_dig_port(connector); + if (!shim) return capable; From d34f4f058edf1235c103ca9c921dc54820d14d40 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 22 Jul 2024 12:14:50 +0530 Subject: [PATCH 0189/1523] drm/i915/hdcp: Add encoder check in hdcp2_get_capability Add encoder check in intel_hdcp2_get_capability to avoid null pointer error. Signed-off-by: Suraj Kandpal Reviewed-by: Dnyaneshwar Bhadane Link: https://patchwork.freedesktop.org/patch/msgid/20240722064451.3610512-3-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360..47f51a5ab493 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -675,8 +675,15 @@ static int intel_dp_hdcp2_get_capability(struct intel_connector *connector, bool *capable) { - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - struct drm_dp_aux *aux = &dig_port->dp.aux; + struct intel_digital_port *dig_port; + struct drm_dp_aux *aux; + + *capable = false; + if (!intel_attached_encoder(connector)) + return -EINVAL; + + dig_port = intel_attached_dig_port(connector); + aux = &dig_port->dp.aux; return _intel_dp_hdcp2_get_capability(aux, capable); } From b4224f6bae3801d589f815672ec62800a1501b0d Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 22 Jul 2024 12:14:51 +0530 Subject: [PATCH 0190/1523] drm/xe/hdcp: Check GSC structure validity Sometimes xe_gsc is not initialized when checked at HDCP capability check. Add gsc structure check to avoid null pointer error. Signed-off-by: Suraj Kandpal Reviewed-by: Dnyaneshwar Bhadane Link: https://patchwork.freedesktop.org/patch/msgid/20240722064451.3610512-4-suraj.kandpal@intel.com --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 14b8b4278317..3550f7360a64 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -40,10 +40,14 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; + struct xe_gsc *gsc = >->uc.gsc; bool ret = true; - if (!xe_uc_fw_is_enabled(>->uc.gsc.fw)) + if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) { + drm_dbg_kms(&xe->drm, + "GSC Components not ready for HDCP2.x\n"); return false; + } xe_pm_runtime_get(xe); if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { @@ -53,7 +57,7 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) goto out; } - if (!xe_gsc_proxy_init_done(>->uc.gsc)) + if (!xe_gsc_proxy_init_done(gsc)) ret = false; xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); From 3e307d6c28e7bc7d94b5699d0ed7fe07df6db094 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 24 Jul 2024 22:07:43 +0530 Subject: [PATCH 0191/1523] drm/i915/dp: Clear VSC SDP during post ddi disable routine Clear VSC SDP if intel_dp_set_infoframes is called from post ddi disable routine i.e with the variable of enable as false. This is to avoid an infoframes.enable mismatch issue which is caused when pipe is connected to eDp which has psr then connected to DPMST. In this case eDp's post ddi disable routine does not clear infoframes.enable VSC for the given pipe and DPMST does not recompute VSC SDP and write infoframes.enable which causes a mismatch. --v2 -Make the comment match the code [Jani] Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240724163743.3668407-1-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 116041f3886a..5d6568c8e186 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4369,8 +4369,11 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, if (!enable && HAS_DSC(dev_priv)) val &= ~VDIP_ENABLE_PPS; - /* When PSR is enabled, this routine doesn't disable VSC DIP */ - if (!crtc_state->has_psr) + /* + * This routine disables VSC DIP if the function is called + * to disable SDP or if it does not have PSR + */ + if (!enable || !crtc_state->has_psr) val &= ~VIDEO_DIP_ENABLE_VSC_HSW; intel_de_write(dev_priv, reg, val); From 8155566a26b8d6c1dd914f06a0c652e4e2f2adf1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:23:56 -0400 Subject: [PATCH 0192/1523] drm/amdgpu: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/atombios_encoders.c | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 25feab188dfe..ebf83fee43bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2065,26 +2065,29 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { adev->mode_info.bios_hardcoded_edid = edid; adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From 17c6baff3d5f65c8da164137a58742541a060b2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:31:58 -0400 Subject: [PATCH 0193/1523] drm/radeon: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: c324acd5032f ("drm/radeon/kms: parse the extended LCD info block") Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_atombios.c | 29 +++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 97c4e10d0550..168f3f94003b 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1717,26 +1717,29 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { rdev->mode_info.bios_hardcoded_edid = edid; rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From 75c3f06fd900e01a68b8ade17e6b6be64cfdc9ff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Jul 2024 14:24:28 +0100 Subject: [PATCH 0194/1523] drm/amd/display: Fix spelling mistake "tolarance" -> "tolerance" There is a spelling mistake in a dml2_printf message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 0b671c665373..5ba38d51382f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8267,7 +8267,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; From fdedd77b0eb31209c59107de66880ef0be21a77a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 24 Jul 2024 08:49:35 -0700 Subject: [PATCH 0195/1523] drm/amd/display: Reapply 2fde4fdddc1f Commit 2563391e57b5 ("drm/amd/display: DML2.1 resynchronization") blew away the compiler warning fix from commit 2fde4fdddc1f ("drm/amd/display: Avoid -Wenum-float-conversion in add_margin_and_round_to_dfs_grainularity()"), causing the warning to reappear. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c:183:58: error: arithmetic between enumeration type 'enum dentist_divider_range' and floating-point type 'double' [-Werror,-Wenum-float-conversion] 183 | divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Apply the fix again to resolve the warning. Fixes: 2563391e57b5 ("drm/amd/display: DML2.1 resynchronization") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 0021bbaa4b91..f19f6ebaae13 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -180,7 +180,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { From f3c681f0c3b171db923d6147785064962351e043 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 24 Jul 2024 15:37:49 +0800 Subject: [PATCH 0196/1523] drm/amd/display: use swap() in sort() Use existing swap() function rather than duplicating its implementation. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c:17:29-30: WARNING opportunity for swap(). Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9573 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 717536d7bb30..8e68a8094658 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -7,16 +7,12 @@ static void sort(double *list_a, int list_a_size) { - double temp; // For all elements b[i] in list_b[] for (int i = 0; i < list_a_size - 1; i++) { // Find the first element of list_a that's larger than b[i] for (int j = i; j < list_a_size - 1; j++) { - if (list_a[j] > list_a[j + 1]) { - temp = list_a[j]; - list_a[j] = list_a[j + 1]; - list_a[j + 1] = temp; - } + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); } } } From 60c30ba7ba2064066ec462236666058cbbf619c1 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 4 Jul 2024 08:14:15 +0800 Subject: [PATCH 0197/1523] drm/amdgpu/pm: support gpu_metrics sysfs interface for smu v14.0.2/3 support gpu_metrics sysfs interface for smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 86 ++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca..e1a27903c80a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, From e06b71b2313a00579ba64a1cc43ad29d64cb8d4c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 21 May 2024 13:22:15 -0400 Subject: [PATCH 0198/1523] drm/amdkfd: allow users to target recommended SDMA engines Certain GPUs have better copy performance over xGMI on specific SDMA engines depending on the source and destination GPU. Allow users to create SDMA queues on these recommended engines. Close to 2x overall performance has been observed with this optimization. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 52 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + include/uapi/linux/kfd_ioctl.h | 6 ++- 7 files changed, 116 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 65a37ac5a0f0..0622ebd7e8ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -255,6 +255,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -333,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; + + if (q_properties.sdma_engine_id > max_sdma_eng_id) { + err = -EINVAL; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; + } + } + if (!pdd->qpd.proc_doorbells) { err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { @@ -387,6 +402,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: kfd_queue_release_buffers(pdd, &q_properties); err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index fdc76c24b2e7..f0bfeb35246f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1532,6 +1532,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_xgmi_sdma_engines(dqm->dev); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int i, num_queues, num_engines, eng_offset = 0, start_engine; + bool free_bit_found = false, is_xgmi = false; + + if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { + num_queues = get_num_sdma_queues(dqm); + num_engines = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA; + } else { + num_queues = get_num_xgmi_sdma_queues(dqm); + num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); + eng_offset = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; + is_xgmi = true; + } + + /* Scan available bit based on target engine ID. */ + start_engine = q->properties.sdma_engine_id - eng_offset; + for (i = start_engine; i < num_queues; i += num_engines) { + + if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) + continue; + + clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); + q->sdma_id = i; + q->properties.sdma_queue_id = q->sdma_id / num_engines; + free_bit_found = true; + break; + } + + if (!free_bit_found) { + dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", + q->properties.sdma_engine_id); + return -ENOMEM; + } } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1784,7 +1819,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA || - q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || + q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { dqm_lock(dqm); retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b5cae48dff66..4190fa339913 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -414,13 +414,16 @@ enum kfd_unmap_queues_filter { * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. * * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. + * + * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_DIQ, - KFD_QUEUE_TYPE_SDMA_XGMI + KFD_QUEUE_TYPE_SDMA_XGMI, + KFD_QUEUE_TYPE_SDMA_BY_ENG_ID }; enum kfd_queue_format { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9995dbb43359..f732ee35b531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -366,6 +366,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index a9b3eda65a2c..40771f8752cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -292,6 +292,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, iolink->max_bandwidth); sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", iolink->rec_transfer_size); + sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask", + iolink->rec_sdma_eng_id_mask); sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); return offs; @@ -1265,6 +1267,55 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, } } +#define REC_SDMA_NUM_GPU 8 +static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = { + { -1, 14, 12, 2, 4, 8, 10, 6 }, + { 14, -1, 2, 10, 8, 4, 6, 12 }, + { 10, 2, -1, 12, 14, 6, 4, 8 }, + { 2, 12, 10, -1, 6, 14, 8, 4 }, + { 4, 8, 14, 6, -1, 10, 12, 2 }, + { 8, 4, 6, 14, 12, -1, 2, 10 }, + { 10, 6, 4, 8, 12, 2, -1, 14 }, + { 6, 12, 8, 4, 2, 10, 14, -1 }}; + +static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + struct kfd_node *gpu = outbound_link->gpu; + struct amdgpu_device *adev = gpu->adev; + int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && + adev->aid_mask && num_xgmi_nodes && + (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == + AMDGPU_SPX_PARTITION_MODE) && + (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + + if (support_rec_eng) { + int src_socket_id = adev->gmc.xgmi.physical_node_id; + int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + + outbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + inbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + } else { + int num_sdma_eng = kfd_get_num_sdma_engines(gpu); + int i, eng_offset = 0; + + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { + eng_offset = num_sdma_eng; + num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + } + + for (i = 0; i < num_sdma_eng; i++) { + outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + } + } +} + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { struct kfd_iolink_properties *link, *inbound_link; @@ -1303,6 +1354,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 43ba0d32e5bd..155b5c410af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -125,6 +125,7 @@ struct kfd_iolink_properties { uint32_t min_bandwidth; uint32_t max_bandwidth; uint32_t rec_transfer_size; + uint32_t rec_sdma_eng_id_mask; uint32_t flags; struct kfd_node *gpu; struct kobject *kobj; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 285a36601dc9..71a7ce5f2d4c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { From 8f28c465a455563917aa15bf5ef40016b2a665d6 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 25 Jul 2024 09:57:12 +0800 Subject: [PATCH 0199/1523] drm/amd/display: remove unneeded semicolon No functional modification involved. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:481:2-3: Unneeded semicolon. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3783:168-169: Unneeded semicolon. ./drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3782:166-167: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9575 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 5ba38d51382f..13f2c80bad4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -478,7 +478,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode default: DML2_ASSERT(0); return 256; - }; + } } static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) @@ -3779,8 +3779,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) From 0823dc64586ba5ea13a7d200a5d33e4c5fa45950 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 1 Jul 2024 11:31:59 +0800 Subject: [PATCH 0200/1523] vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler remap_pfn_page() should not be called in the fault handler as it may change the vma->flags which may trigger lockdep warning since the vma write lock is not held. Actually there's no need to modify the vma->flags as it has been set in the mmap(). So this patch switches to use vmf_insert_pfn() instead. Reported-by: Dragos Tatulea Tested-by: Dragos Tatulea Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap") Cc: stable@vger.kernel.org Signed-off-by: Jason Wang Message-Id: <20240701033159.18133-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Michal Kubiak --- drivers/vhost/vdpa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 63a53680a85c..6b9c12acf438 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1483,13 +1483,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - PFN_DOWN(notify.addr), PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { From b34ce4a59cfe9cd0d6f870e6408e8ec88a964585 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 Jul 2024 22:03:32 +0200 Subject: [PATCH 0201/1523] power: supply: axp288_charger: Fix constant_charge_voltage writes info->max_cv is in millivolts, divide the microvolt value being written to constant_charge_voltage by 1000 *before* clamping it to info->max_cv. Before this fix the code always tried to set constant_charge_voltage to max_cv / 1000 = 4 millivolt, which ends up in setting it to 4.1V which is the lowest supported value. Fixes: 843735b788a4 ("power: axp288_charger: axp288 charger driver") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240717200333.56669-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp288_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index b5903193e2f9..aea17289a178 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -337,8 +337,8 @@ static int axp288_charger_usb_set_property(struct power_supply *psy, } break; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: - scaled_val = min(val->intval, info->max_cv); - scaled_val = DIV_ROUND_CLOSEST(scaled_val, 1000); + scaled_val = DIV_ROUND_CLOSEST(val->intval, 1000); + scaled_val = min(scaled_val, info->max_cv); ret = axp288_charger_set_cv(info, scaled_val); if (ret < 0) { dev_warn(&info->pdev->dev, "set charge voltage failed\n"); From 81af7f2342d162e24ac820c10e68684d9f927663 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 Jul 2024 22:03:33 +0200 Subject: [PATCH 0202/1523] power: supply: axp288_charger: Round constant_charge_voltage writes down Round constant_charge_voltage writes down to the first supported lower value, rather then rounding them up to the first supported higher value. This fixes e.g. writing 4250000 resulting in a value of 4350000 which might be dangerous, instead writing 4250000 will now result in a safe 4200000 value. Fixes: 843735b788a4 ("power: axp288_charger: axp288 charger driver") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240717200333.56669-2-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp288_charger.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index aea17289a178..ac05942e4e6a 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -178,18 +178,18 @@ static inline int axp288_charger_set_cv(struct axp288_chrg_info *info, int cv) u8 reg_val; int ret; - if (cv <= CV_4100MV) { - reg_val = CHRG_CCCV_CV_4100MV; - cv = CV_4100MV; - } else if (cv <= CV_4150MV) { - reg_val = CHRG_CCCV_CV_4150MV; - cv = CV_4150MV; - } else if (cv <= CV_4200MV) { - reg_val = CHRG_CCCV_CV_4200MV; - cv = CV_4200MV; - } else { + if (cv >= CV_4350MV) { reg_val = CHRG_CCCV_CV_4350MV; cv = CV_4350MV; + } else if (cv >= CV_4200MV) { + reg_val = CHRG_CCCV_CV_4200MV; + cv = CV_4200MV; + } else if (cv >= CV_4150MV) { + reg_val = CHRG_CCCV_CV_4150MV; + cv = CV_4150MV; + } else { + reg_val = CHRG_CCCV_CV_4100MV; + cv = CV_4100MV; } reg_val = reg_val << CHRG_CCCV_CV_BIT_POS; From bf9d5cb588755ee41ac12a8976dccf44ae18281b Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 15 Jul 2024 14:57:06 +0200 Subject: [PATCH 0203/1523] power: supply: qcom_battmgr: return EAGAIN when firmware service is not up The driver returns -ENODEV when the firmware battmrg service hasn't started yet, while per-se -ENODEV is fine, we usually use -EAGAIN to tell the user to retry again later. And the power supply core uses -EGAIN when the device isn't initialized, let's use the same return. This notably causes an infinite spam of: thermal thermal_zoneXX: failed to read out thermal zone (-19) because the thermal core doesn't understand -ENODEV, but only considers -EAGAIN as a non-fatal error. While it didn't appear until now, commit [1] fixes thermal core and no more ignores thermal zones returning an error at first temperature update. [1] 5725f40698b9 ("thermal: core: Call monitor_thermal_zone() if zone temperature is invalid") Link: https://lore.kernel.org/all/2ed4c630-204a-4f80-a37f-f2ca838eb455@linaro.org/ Cc: stable@vger.kernel.org Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Signed-off-by: Neil Armstrong Tested-by: Stephan Gerhold Reviewed-by: Stephan Gerhold Link: https://lore.kernel.org/r/20240715-topic-sm8x50-upstream-fix-battmgr-temp-tz-warn-v1-1-16e842ccead7@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index ec163d1bcd18..44c6301f5f17 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -486,7 +486,7 @@ static int qcom_battmgr_bat_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -683,7 +683,7 @@ static int qcom_battmgr_ac_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); if (ret) @@ -748,7 +748,7 @@ static int qcom_battmgr_usb_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -867,7 +867,7 @@ static int qcom_battmgr_wls_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); From d6cca7631a4b54a8995e3bc53e5afb11d3b0c8ff Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 12 Jul 2024 12:00:03 +0200 Subject: [PATCH 0204/1523] power: supply: qcom_battmgr: Ignore extra __le32 in info payload Some newer ADSP firmware versions on X1E80100 report an extra __le32 at the end of the battery information request payload, causing qcom_battmgr to fail to initialize. Adjust the check to ignore the extra field in the info payload so we can support both old and newer firmware versions. Tested-by: Srinivas Kandagatla Signed-off-by: Stephan Gerhold Tested-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240712-x1e80100-battmgr-v1-1-a253d767f493@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 44c6301f5f17..a99937cd733b 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -1007,7 +1007,9 @@ static void qcom_battmgr_sc8280xp_callback(struct qcom_battmgr *battmgr, battmgr->error = 0; break; case BATTMGR_BAT_INFO: - if (payload_len != sizeof(resp->info)) { + /* some firmware versions report an extra __le32 at the end of the payload */ + if (payload_len != sizeof(resp->info) && + payload_len != (sizeof(resp->info) + sizeof(__le32))) { dev_warn(battmgr->dev, "invalid payload length for battery information request: %zd\n", payload_len); From 01aa8c869d0cdaf603f42dc1d2302b164c25353a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 27 Jul 2024 16:58:24 +0100 Subject: [PATCH 0205/1523] blk-throttle: remove more latency dead-code The struct 'latency_bucket' and the #define 'request_bucket_index' are unused since commit bf20ab538c81 ("blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW") and the 'LATENCY_BUCKET_SIZE' #define was only used by the 'request_bucket_index' define. Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20240727155824.1000042-1-linux@treblig.org Signed-off-by: Jens Axboe --- block/blk-throttle.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index dc6140fa3de0..6943ec720f39 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) -/* We measure latency for request size from <= 4k to >= 1M */ -#define LATENCY_BUCKET_SIZE 9 - -struct latency_bucket { - unsigned long total_latency; /* ns / 1024 */ - int samples; -}; - struct throtl_data { /* service tree for active throtl groups */ @@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) return tg->iops[rw]; } -#define request_bucket_index(sectors) \ - clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) - /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported From f3392e662efdc095f10109f588aa4f3be86f7eb5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 23 Jul 2024 13:08:55 +0530 Subject: [PATCH 0206/1523] drm/amdgpu: add vcn ip dump ptr in vcn global struct Add pointer to the vcn ip dump in the vcn global structure to be accessible for all vcn version via global adev. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..f127eccf59d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From 50d10d9271f6c6542196c54275091c7b2c6edf97 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 22:35:56 +0530 Subject: [PATCH 0207/1523] drm/amdgpu: add macro to calculate offset with instance Add macro definition which calculate offset of the register with index override. This is useful in case when there is an array of registers which is common for all instances. To read registers in that case it is easy to define registers once and the index value is manually passed to calculate proper offset of register for each instance. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 282584a48be0..ef7c603b50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -93,6 +93,10 @@ struct soc15_ras_field_entry { #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) +/* Over ride the instance id */ +#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \ + (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } From 58d283801d06d4434df6625ed6e6b8d2ba47fe65 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:35:41 +0530 Subject: [PATCH 0208/1523] drm/amdgpu: add vcn_v3_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v3_0. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 149 +++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..d0bca93f8226 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,115 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_RBC_IB_VMID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SCRATCH1) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +235,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +357,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +404,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2324,32 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool reg_safe; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + reg_safe = (RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS) & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (reg_safe) + for (j = 0; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,7 +2368,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, .print_ip_state = NULL, }; From cd162ae9bc3ba91eb630a1321afd3d1dde5f2000 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:48:28 +0530 Subject: [PATCH 0209/1523] drm/amdgpu: add print support for vcn_v3_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v3_0. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index d0bca93f8226..9e1cbeee10db 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2324,6 +2324,32 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + drm_printf(p, "\nActive Instance:VCN%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } +} + static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2369,7 +2395,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From fbfb5f0342253d92c4e446588c428a9d90c3f610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 24 Jul 2024 09:24:02 +0200 Subject: [PATCH 0210/1523] drm/amdgpu: fix contiguous handling for IB parsing v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we won't get correct access to the IB. v2: keep setting AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS to avoid problems in the VRAM backend. Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3501 Fixes: e362b7c8f8c7 ("drm/amdgpu: Modify the contiguous flags behaviour") Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Tested-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index cde2f4548a62..1e167d925b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1782,7 +1782,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1797,13 +1797,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } From 25dd25f86eca77b1ce4a611432f57b006fa91615 Mon Sep 17 00:00:00 2001 From: Venkata Narendra Kumar Gutta Date: Tue, 18 Jun 2024 18:16:52 -0700 Subject: [PATCH 0211/1523] drm/amdgpu: Add MFD support for ISP I2C bus ISP I2C bus device can't be enumerated via ACPI mechanism since it shares the memory map with the AMDGPU. So use the MFD mechanism for registering the ISP I2C device and add the required resources. Signed-off-by: Venkata Narendra Kumar Gutta Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h | 1 + drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c | 57 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h | 11 +++++ drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 57 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h | 11 +++++ 5 files changed, 113 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 44e2ea8c9728..b03664c66dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -49,6 +49,7 @@ struct amdgpu_isp { const struct isp_funcs *funcs; struct mfd_cell *isp_cell; struct resource *isp_res; + struct resource *isp_i2c_res; struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c index aac107898bae..964c29ef25dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -42,23 +42,23 @@ static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = { static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP410_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC; + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +83,53 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP410_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_0_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE; + + isp->isp_res[2].name = "isp_gpio_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET + + ISP410_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_0_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP410_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h index 315f2822410c..7db24c0f1080 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP410_MEM_RES 2 +#define MAX_ISP410_SENSOR_RES 1 #define MAX_ISP410_INT_SRC 8 +#define ISP410_PHY0_OFFSET 0x66700 +#define ISP410_PHY0_SIZE 0xD30 + +#define ISP410_I2C0_OFFSET 0x66400 +#define ISP410_I2C0_SIZE 0x100 + +#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP410_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4e17fa03f7b5..b56f27295468 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -42,23 +42,24 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP411_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC; + + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +84,52 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP411_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_1_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE; + + isp->isp_res[2].name = "isp_4_1_1_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET + + ISP411_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_1_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP411_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h index dfb9522c9d6a..40887ddeb08c 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP411_MEM_RES 2 +#define MAX_ISP411_SENSOR_RES 1 #define MAX_ISP411_INT_SRC 8 +#define ISP411_PHY0_OFFSET 0x66700 +#define ISP411_PHY0_SIZE 0xD30 + +#define ISP411_I2C0_OFFSET 0x66400 +#define ISP411_I2C0_SIZE 0x100 + +#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP411_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); #endif From 8e4ed3cf1642df0c4456443d865cff61a9598aa8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jul 2024 08:14:56 +0530 Subject: [PATCH 0212/1523] drm/amd/display: Add null check for pipe_ctx->plane_state in dcn20_program_pipe This commit addresses a null pointer dereference issue in the `dcn20_program_pipe` function. The issue could occur when `pipe_ctx->plane_state` is null. The fix adds a check to ensure `pipe_ctx->plane_state` is not null before accessing. This prevents a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn20/dcn20_hwseq.c:1925 dcn20_program_pipe() error: we previously assumed 'pipe_ctx->plane_state' could be null (see line 1877) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 270e337ae27b..5a6064999033 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1922,22 +1922,29 @@ static void dcn20_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->plane_state->update_flags.bits.hdr_mult) + if (pipe_ctx->update_flags.bits.enable || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } } if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.gamma_change) || + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.lut_3d)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1947,7 +1954,8 @@ static void dcn20_program_pipe( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || pipe_ctx->stream->update_flags.bits.out_tf || - pipe_ctx->plane_state->update_flags.bits.output_tf_change) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1971,7 +1979,7 @@ static void dcn20_program_pipe( } /* Set ABM pipe after other pipe configurations done */ - if (pipe_ctx->plane_state->visible) { + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { if (pipe_ctx->stream_res.abm) { dc->hwss.set_pipe(pipe_ctx); pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, From 66d71a72539e173a9b00ca0b1852cbaa5f5bf1ad Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jul 2024 07:23:48 +0530 Subject: [PATCH 0213/1523] drm/amd/display: Add null check for top_pipe_to_program in commit_planes_for_stream This commit addresses a null pointer dereference issue in the `commit_planes_for_stream` function at line 4140. The issue could occur when `top_pipe_to_program` is null. The fix adds a check to ensure `top_pipe_to_program` is not null before accessing its stream_res. This prevents a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:4140 commit_planes_for_stream() error: we previously assumed 'top_pipe_to_program' could be null (see line 3906) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b8a6c062426d..95d6e29d5e47 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4137,7 +4137,8 @@ static void commit_planes_for_stream(struct dc *dc, } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); From 7c5b344537a143d15385992e41a50a9c5125e93c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 26 Jul 2024 12:17:12 +0530 Subject: [PATCH 0214/1523] drm/amdkfd: Fix missing error code in kfd_queue_acquire_buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fix involves setting 'err' to '-EINVAL' before each 'goto out_err_unreserve'. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:265 kfd_queue_acquire_buffers() warn: missing error code 'err' drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c 226 int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) 227 { 228 struct kfd_topology_device *topo_dev; 229 struct amdgpu_vm *vm; 230 u32 total_cwsr_size; 231 int err; 232 233 topo_dev = kfd_topology_device_by_id(pdd->dev->id); 234 if (!topo_dev) 235 return -EINVAL; 236 237 vm = drm_priv_to_vm(pdd->drm_priv); 238 err = amdgpu_bo_reserve(vm->root.bo, false); 239 if (err) 240 return err; 241 242 err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); 243 if (err) 244 goto out_err_unreserve; 245 246 err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); 247 if (err) 248 goto out_err_unreserve; 249 250 err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, 251 &properties->ring_bo, properties->queue_size); 252 if (err) 253 goto out_err_unreserve; 254 255 /* only compute queue requires EOP buffer and CWSR area */ 256 if (properties->type != KFD_QUEUE_TYPE_COMPUTE) 257 goto out_unreserve; This is clearly a success path. 258 259 /* EOP buffer is not required for all ASICs */ 260 if (properties->eop_ring_buffer_address) { 261 if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { 262 pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", 263 properties->eop_buf_bo->tbo.base.size, 264 topo_dev->node_props.eop_buffer_size); --> 265 goto out_err_unreserve; This has err in the label name. err = -EINVAL? 266 } 267 err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, 268 &properties->eop_buf_bo, 269 properties->eop_ring_buffer_size); 270 if (err) 271 goto out_err_unreserve; 272 } 273 274 if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { 275 pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", 276 properties->ctl_stack_size, 277 topo_dev->node_props.ctl_stack_size); 278 goto out_err_unreserve; err? 279 } 280 281 if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { 282 pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", 283 properties->ctx_save_restore_area_size, 284 topo_dev->node_props.cwsr_size); 285 goto out_err_unreserve; err? Not sure. 286 } 287 288 total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) 289 * NUM_XCC(pdd->dev->xcc_mask); 290 total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); 291 292 err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, 293 &properties->cwsr_bo, total_cwsr_size); 294 if (!err) 295 goto out_unreserve; 296 297 amdgpu_bo_unreserve(vm->root.bo); 298 299 err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, 300 total_cwsr_size); 301 if (err) 302 goto out_err_release; 303 304 return 0; 305 306 out_unreserve: 307 amdgpu_bo_unreserve(vm->root.bo); 308 return 0; 309 310 out_err_unreserve: 311 amdgpu_bo_unreserve(vm->root.bo); 312 out_err_release: 313 kfd_queue_release_buffers(pdd, properties); 314 return err; 315 } Fixes: 629568d25fea ("drm/amdkfd: Validate queue cwsr area and eop buffer size") Reported-by: Dan Carpenter Cc: Philip Yang Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 9807e8adf77d..63795f0cd55a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -262,6 +262,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", properties->eop_buf_bo->tbo.base.size, topo_dev->node_props.eop_buffer_size); + err = -EINVAL; goto out_err_unreserve; } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, @@ -275,6 +276,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", properties->ctl_stack_size, topo_dev->node_props.ctl_stack_size); + err = -EINVAL; goto out_err_unreserve; } @@ -282,6 +284,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); + err = -EINVAL; goto out_err_unreserve; } From f9e6759888866748f31b6b6c2142a481d587f51f Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Sun, 14 Jul 2024 16:31:05 -0400 Subject: [PATCH 0215/1523] drm/amd/display: roll back quality EASF and ISHARP and dc dependency changes [Why] Seeing several regressions related to quality EASF and ISHARP changes and removing dc dependency changes. [How] Roll back SPL changes Signed-off-by: Samson Tam Reviewed-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 2 + .../gpu/drm/amd/display/dc/dc_spl_translate.c | 49 +- .../gpu/drm/amd/display/dc/dc_spl_translate.h | 1 - .../dc/dml2/dml21/dml21_translation_helper.c | 8 - .../display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 646 +++--- .../dc/resource/dcn401/dcn401_resource.c | 7 - drivers/gpu/drm/amd/display/dc/spl/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 1304 +++++-------- .../drm/amd/display/dc/spl/dc_spl_filters.c | 15 - .../drm/amd/display/dc/spl/dc_spl_filters.h | 15 - .../display/dc/spl/dc_spl_isharp_filters.c | 427 +--- .../display/dc/spl/dc_spl_isharp_filters.h | 33 +- .../display/dc/spl/dc_spl_scl_easf_filters.c | 1726 ----------------- .../display/dc/spl/dc_spl_scl_easf_filters.h | 38 - .../amd/display/dc/spl/dc_spl_scl_filters.c | 92 +- .../amd/display/dc/spl/dc_spl_scl_filters.h | 55 +- .../display/dc/spl/dc_spl_scl_filters_old.c | 25 + .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 43 +- .../gpu/drm/amd/display/dc/spl/spl_debug.h | 23 - .../drm/amd/display/dc/spl/spl_fixpt31_32.c | 518 ----- .../drm/amd/display/dc/spl/spl_fixpt31_32.h | 546 ------ 21 files changed, 1025 insertions(+), 4550 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h create mode 100644 drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_debug.h delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c delete mode 100644 drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 5c9091f2a8b2..4f5b23520365 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1511,6 +1511,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; + spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 339d092e711c..bcc596724a4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -42,26 +42,26 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; - scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; - scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; + scaling_quality->h_taps = spl_scaling_quality->h_taps; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; + scaling_quality->v_taps = spl_scaling_quality->v_taps; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct ratio *spl_ratios) + const struct spl_ratios *spl_ratios) { - ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); - ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); - ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); - ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); + ratios->horz = spl_ratios->horz; + ratios->vert = spl_ratios->vert; + ratios->horz_c = spl_ratios->horz_c; + ratios->vert_c = spl_ratios->vert_c; } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct init *spl_inits) + const struct spl_inits *spl_inits) { - inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); - inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); - inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); - inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); + inits->h = spl_inits->h; + inits->v = spl_inits->v; + inits->h_c = spl_inits->h_c; + inits->v_c = spl_inits->v_c; } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -170,15 +170,6 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; - - spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; - /* Check if it is stream is in fullscreen and if its HDR. - * Use this to determine sharpness levels - */ - spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); - spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); - } /// @brief Translate SPL output parameters to pipe context @@ -187,15 +178,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index eaa5c5373b28..c73d640c3632 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,7 +6,6 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" -#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 4164cda90b2a..0f34688e4058 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,14 +788,6 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; - } else if ((plane_state->ctx->dc->config.use_spl == true) && - (plane->composition.scaler_info.enabled == false)) { - /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then - * allow case where ratio is 1 but taps > 1 - */ - if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || - (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) - plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 3a3745597f0c..88d24e36fe00 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,8 +280,7 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode, - bool force_coeffs_update) + bool chroma_coef_mode) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -344,7 +343,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if ((filter_updated) || (force_coeffs_update)) { + if (filter_updated) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -656,226 +655,6 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height); } -/** - * dpp401_dscl_program_easf_v - Program EASF_V - * - * @dpp_base: High level DPP struct - * @scl_data: scalaer_data info - * - * This is the primary function to program vertical EASF registers - * - */ -static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) -{ - struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - - PERF_TRACE(); - /* DSCL_EASF_V_MODE */ - REG_SET_3(DSCL_EASF_V_MODE, 0, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, - SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - - if (!scl_data->dscl_prog_data.easf_v_en) { - PERF_TRACE(); - return; - } - - /* DSCL_EASF_V_BF_CNTL */ - REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, - SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, - SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, - SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ - REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ - REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, - SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ - REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, - SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - /* DSCL_EASF_V_BF1_PWL_SEGn */ - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, - SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, - SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, - SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, - SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, - SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, - SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, - SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, - SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - /* DSCL_EASF_V_BF3_PWL_SEGn */ - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, - SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, - SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, - SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, - SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, - SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, - SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); - PERF_TRACE(); -} -/** - * dpp401_dscl_program_easf_h - Program EASF_H - * - * @dpp_base: High level DPP struct - * @scl_data: scalaer_data info - * - * This is the primary function to program horizontal EASF registers - * - */ -static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) -{ - struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - - PERF_TRACE(); - /* DSCL_EASF_H_MODE */ - REG_SET_3(DSCL_EASF_H_MODE, 0, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, - SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - - if (!scl_data->dscl_prog_data.easf_h_en) { - PERF_TRACE(); - return; - } - - /* DSCL_EASF_H_BF_CNTL */ - REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, - SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ - REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ - REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, - SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ - REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, - SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - /* DSCL_EASF_H_BF1_PWL_SEGn */ - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, - SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, - SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, - SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, - SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, - SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, - SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, - SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, - SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - /* DSCL_EASF_H_BF3_PWL_SEGn */ - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, - SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, - SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, - SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, - SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, - SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, - SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); - PERF_TRACE(); -} /** * dpp401_dscl_program_easf - Program EASF * @@ -890,19 +669,261 @@ static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_d struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - /* DSCL_SC_MODE */ - REG_SET_2(DSCL_SC_MODE, 0, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, + REG_UPDATE(DSCL_SC_MODE, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); + REG_UPDATE(DSCL_SC_MODE, SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); + /* DSCL_EASF_V_MODE */ + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); + REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); + REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + /* DSCL_EASF_H_MODE */ + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); + REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); + REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, + REG_UPDATE(DSCL_SC_MATRIX_C0C1, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); + REG_UPDATE(DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, + REG_UPDATE(DSCL_SC_MATRIX_C2C3, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); + REG_UPDATE(DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); - dpp401_dscl_program_easf_v(dpp_base, scl_data); - dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -937,11 +958,10 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); - /* LUT data write is auto-indexed. Write index once */ - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -957,74 +977,107 @@ static void dpp401_dscl_set_isharp_filter( * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data, - bool *bs_coeffs_updated) + const struct scaler_data *scl_data) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - *bs_coeffs_updated = false; PERF_TRACE(); - /* ISHARP_MODE */ - REG_SET_6(ISHARP_MODE, 0, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); - - /* Skip remaining register programming if ISHARP is disabled */ - if (!scl_data->dscl_prog_data.isharp_en) { - PERF_TRACE(); - return; - } - - /* ISHARP_NOISEDET_THRESHOLD */ - REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, + /* ISHARP_EN */ + REG_UPDATE(ISHARP_MODE, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en); + /* ISHARP_NOISEDET_EN */ + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); + /* ISHARP_NOISEDET_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + /* ISHARP_NOISEDET_UTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + /* ISHARP_NOISEDET_DTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - - /* ISHARP_NOISE_GAIN_PWL */ - REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, + REG_UPDATE(ISHARP_MODE, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + /* ISHARP_NOISEDET_UTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + /* ISHARP_NOISEDET_DTHRE */ + REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); + /* ISHARP_NOISEDET_PWL_START_IN */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); + /* ISHARP_NOISEDET_PWL_END_IN */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); + /* ISHARP_NOISEDET_PWL_SLOPE */ + REG_UPDATE(ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - + /* ISHARP_LBA_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], + REG_UPDATE(ISHARP_LBA_PWL_SEG0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); + REG_UPDATE(ISHARP_LBA_PWL_SEG0, + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); + REG_UPDATE(ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], + REG_UPDATE(ISHARP_LBA_PWL_SEG1, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); + REG_UPDATE(ISHARP_LBA_PWL_SEG1, + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); + REG_UPDATE(ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], + REG_UPDATE(ISHARP_LBA_PWL_SEG2, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); + REG_UPDATE(ISHARP_LBA_PWL_SEG2, + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); + REG_UPDATE(ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], + REG_UPDATE(ISHARP_LBA_PWL_SEG3, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); + REG_UPDATE(ISHARP_LBA_PWL_SEG3, + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); + REG_UPDATE(ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], + REG_UPDATE(ISHARP_LBA_PWL_SEG4, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); + REG_UPDATE(ISHARP_LBA_PWL_SEG4, + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); + REG_UPDATE(ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], + REG_UPDATE(ISHARP_LBA_PWL_SEG5, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); + REG_UPDATE(ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); + /* ISHARP_FMT_MODE */ + REG_UPDATE(ISHARP_MODE, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); + /* ISHARP_FMT_NORM */ + REG_UPDATE(ISHARP_MODE, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - - /* ISHARP_NLDELTA_SOFT_CLIP */ - REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, + /* ISHARP_NLDELTA_SCLIP_EN_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); + /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); + /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); + /* ISHARP_NLDELTA_SCLIP_EN_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); + /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); + /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ + REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ @@ -1034,14 +1087,12 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); - *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); - *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1072,7 +1123,6 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; - bool bs_coeffs_updated = false; if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; @@ -1132,7 +1182,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); + dpp401_dscl_program_isharp(dpp_base, scl_data); return; } @@ -1159,18 +1209,12 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - /* ISharp configuration - * - B&S coeffs are written to same coeff RAM as WB scaler coeffs - * - coeff RAM toggle is in EASF programming - * - if we are only programming B&S coeffs, then need to reprogram - * WB scaler coeffs and toggle coeff RAM together - */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data, &bs_coeffs_updated); - - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); + /* isharp configuration */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 5ee20753572e..9fcdf06d6aa4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,9 +76,6 @@ #include "dml2/dml2_wrapper.h" -#include "spl/dc_spl_scl_easf_filters.h" -#include "spl/dc_spl_isharp_filters.h" - #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -2126,10 +2123,6 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; - /* SPL */ - spl_init_easf_filter_coeffs(); - spl_init_blur_scale_coeffs(); - return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 05764d4d4604..f8df85ea4d32 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index b8858ea7c776..9eccdb38bed4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,11 +4,9 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" -#include "dc_spl_scl_easf_filters.h" #include "dc_spl_isharp_filters.h" -#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -109,26 +107,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct spl_fixed31_32 temp; + struct fixed31_32 temp; - temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + spl_fixpt_round(temp); + rec_out.x = stream_dst->x + dc_fixpt_round(temp); - temp = spl_fixpt_from_fraction( + temp = dc_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; - temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + spl_fixpt_round(temp); + rec_out.y = stream_dst->y + dc_fixpt_round(temp); - temp = spl_fixpt_from_fraction( + temp = dc_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -146,7 +144,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - SPL_ASSERT(mpc_slice_count == 1 || + ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -159,7 +157,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - SPL_ASSERT(mpc_rec.height % 2 == 0); + ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -199,7 +197,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -342,23 +340,20 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_ /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_scratch->scl_data.recout = shift_rec( + spl_out->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_scratch->scl_data.recout.height -= + spl_out->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_scratch->scl_data.recout.height -= + spl_out->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_scratch->scl_data.recout, 0, + memset(&spl_out->scl_data.recout, 0, sizeof(struct spl_rect)); } - /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, - struct spl_scratch *spl_scratch, - struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -369,75 +364,59 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - spl_swap(surf_src.height, surf_src.width); + swap(surf_src.height, surf_src.width); - spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( + spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( + spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_scratch->scl_data.ratios.horz.value *= 2; + spl_out->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_scratch->scl_data.ratios.vert.value *= 2; + spl_out->scl_data.ratios.vert.value *= 2; - spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( - spl_scratch->scl_data.ratios.vert.value * in_h, out_h); - spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( - spl_scratch->scl_data.ratios.horz.value * in_w, out_w); + spl_out->scl_data.ratios.vert.value = div64_s64( + spl_out->scl_data.ratios.vert.value * in_h, out_h); + spl_out->scl_data.ratios.horz.value = div64_s64( + spl_out->scl_data.ratios.horz.value * in_w, out_w); - spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; - spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; + spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; + spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_scratch->scl_data.ratios.horz_c.value /= 2; - spl_scratch->scl_data.ratios.vert_c.value /= 2; + spl_out->scl_data.ratios.horz_c.value /= 2; + spl_out->scl_data.ratios.vert_c.value /= 2; } - spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.horz, 19); - spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.vert, 19); - spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.horz_c, 19); - spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( - spl_scratch->scl_data.ratios.vert_c, 19); - - /* - * Coefficient table and some registers are different based on ratio - * that is output/input. Currently we calculate input/output - * Store 1/ratio in recip_ratio for those lookups - */ - spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( - spl_scratch->scl_data.ratios.horz); - spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( - spl_scratch->scl_data.ratios.vert); - spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( - spl_scratch->scl_data.ratios.horz_c); - spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( - spl_scratch->scl_data.ratios.vert_c); + spl_out->scl_data.ratios.horz = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz, 19); + spl_out->scl_data.ratios.vert = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert, 19); + spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz_c, 19); + spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert_c, 19); } - /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) { - spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, - spl_scratch->scl_data.recout.width)); - spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, - spl_scratch->scl_data.recout.height)); - spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, - spl_scratch->scl_data.recout.width)); - spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, - spl_scratch->scl_data.recout.height)); + spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, + spl_out->scl_data.recout.height)); + spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, + spl_out->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); - spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } } - static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -461,7 +440,6 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } - /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -471,13 +449,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct spl_fixed31_32 ratio, - struct spl_fixed31_32 init_adj, - struct spl_fixed31_32 *init, + struct fixed31_32 ratio, + struct fixed31_32 init_adj, + struct fixed31_32 *init, int *vp_offset, int *vp_size) { - struct spl_fixed31_32 temp; + struct fixed31_32 temp; int int_part; /* @@ -490,33 +468,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = spl_fixpt_floor(temp); + temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = dc_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = spl_fixpt_add(*init, init_adj); - *init = spl_fixpt_truncate(*init, 19); + *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = dc_fixpt_add(*init, init_adj); + *init = dc_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = spl_fixpt_floor(*init); + int_part = dc_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = spl_fixpt_add_int(*init, int_part); + *init = dc_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = spl_fixpt_floor(temp); + temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = dc_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -531,16 +509,15 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_420BPP8) && - (format <= SPL_PIXEL_FORMAT_420BPP10)) + if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && + (format <= SPL_PIXEL_FORMAT_VIDEO_END)) return true; return false; } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, - struct spl_scratch *spl_scratch) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -551,11 +528,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; - struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; + struct fixed31_32 init_adj_h = dc_fixpt_zero; + struct fixed31_32 init_adj_v = dc_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -578,8 +555,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, &flip_horz_scan_dir); if (orthogonal_rotation) { - spl_swap(src.width, src.height); - spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); + swap(src.width, src.height); + swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -591,17 +568,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_zero; - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_from_fraction(sign, 2); break; case CHROMA_COSITING_NONE: - init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = dc_fixpt_from_fraction(sign, 2); + init_adj_v = dc_fixpt_from_fraction(sign, 2); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = spl_fixpt_zero; - init_adj_v = spl_fixpt_zero; + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_zero; break; } } @@ -609,60 +586,59 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_scratch->scl_data.recout.width, + spl_out->scl_data.recout.width, src.width, - spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.ratios.horz, - spl_fixpt_zero, - &spl_scratch->scl_data.inits.h, - &spl_scratch->scl_data.viewport.x, - &spl_scratch->scl_data.viewport.width); + spl_out->scl_data.taps.h_taps, + spl_out->scl_data.ratios.horz, + dc_fixpt_zero, + &spl_out->scl_data.inits.h, + &spl_out->scl_data.viewport.x, + &spl_out->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_scratch->scl_data.recout.width, + spl_out->scl_data.recout.width, src.width / vpc_div, - spl_scratch->scl_data.taps.h_taps_c, - spl_scratch->scl_data.ratios.horz_c, + spl_out->scl_data.taps.h_taps_c, + spl_out->scl_data.ratios.horz_c, init_adj_h, - &spl_scratch->scl_data.inits.h_c, - &spl_scratch->scl_data.viewport_c.x, - &spl_scratch->scl_data.viewport_c.width); + &spl_out->scl_data.inits.h_c, + &spl_out->scl_data.viewport_c.x, + &spl_out->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_scratch->scl_data.recout.height, + spl_out->scl_data.recout.height, src.height, - spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.ratios.vert, - spl_fixpt_zero, - &spl_scratch->scl_data.inits.v, - &spl_scratch->scl_data.viewport.y, - &spl_scratch->scl_data.viewport.height); + spl_out->scl_data.taps.v_taps, + spl_out->scl_data.ratios.vert, + dc_fixpt_zero, + &spl_out->scl_data.inits.v, + &spl_out->scl_data.viewport.y, + &spl_out->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_scratch->scl_data.recout.height, + spl_out->scl_data.recout.height, src.height / vpc_div, - spl_scratch->scl_data.taps.v_taps_c, - spl_scratch->scl_data.ratios.vert_c, + spl_out->scl_data.taps.v_taps_c, + spl_out->scl_data.ratios.vert_c, init_adj_v, - &spl_scratch->scl_data.inits.v_c, - &spl_scratch->scl_data.viewport_c.y, - &spl_scratch->scl_data.viewport_c.height); + &spl_out->scl_data.inits.v_c, + &spl_out->scl_data.viewport_c.y, + &spl_out->scl_data.viewport_c.height); if (orthogonal_rotation) { - spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); - spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); - spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); - spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); + swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); } - spl_scratch->scl_data.viewport.x += src.x; - spl_scratch->scl_data.viewport.y += src.y; - SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; - spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; + spl_out->scl_data.viewport.x += src.x; + spl_out->scl_data.viewport.y += src.y; + ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_out->scl_data.viewport_c.x += src.x / vpc_div; + spl_out->scl_data.viewport_c.y += src.y / vpc_div; } - static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -671,7 +647,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -689,7 +665,6 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } - static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -698,7 +673,6 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } - static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -707,21 +681,17 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } - static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data, - bool enable_isharp, bool enable_easf) + const struct spl_scaler_data *data) { - const long long one = spl_fixpt_one.value; + const long long one = dc_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; - /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale - && !enable_isharp) + && !spl_in->basic_out.always_scale) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -730,204 +700,69 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - - /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV - * downscale without EASF - */ - if ((!enable_isharp) && (!enable_easf)) { - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; - } + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; return SCL_MODE_SCALING_420_YCBCR_ENABLE; } - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || - (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || - (tf_type == SPL_TF_TYPE_BYPASS)) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; -} - -/* Enable EASF ?*/ -static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) -{ - int vratio = 0; - int hratio = 0; - bool skip_easf = false; - bool lls_enable_easf = true; - - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - - vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); - - if (!lls_enable_easf || spl_in->disable_easf) - skip_easf = true; - - /* - * No EASF support for downscaling > 2:1 - * EASF support for upscaling or downscaling up to 2:1 - */ - if ((vratio > 2) || (hratio > 2)) - skip_easf = true; - - /* Check for linear scaling or EASF preferred */ - if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) - skip_easf = true; - - return skip_easf; -} - -/* Check if video is in fullscreen mode */ -static bool spl_is_video_fullscreen(struct spl_in *spl_in) -{ - if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) - return true; - return false; -} - -static bool spl_get_isharp_en(struct spl_in *spl_in, - struct spl_scratch *spl_scratch) -{ - bool enable_isharp = false; - int vratio = 0; - int hratio = 0; - struct spl_taps taps = spl_scratch->scl_data.taps; - bool fullscreen = spl_is_video_fullscreen(spl_in); - - vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); - - /* Return if adaptive sharpness is disabled */ - if (spl_in->adaptive_sharpness.enable == false) - return enable_isharp; - - /* No iSHARP support for downscaling */ - if (vratio > 1 || hratio > 1) - return enable_isharp; - - // Scaling is up to 1:1 (no scaling) or upscaling - - /* - * Apply sharpness to all RGB surfaces and to - * NV12/P010 surfaces if in fullscreen - */ - if (spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) - return enable_isharp; - - /* - * Apply sharpness if supports horizontal taps 4,6 AND - * vertical taps 3, 4, 6 - */ - if ((taps.h_taps == 4 || taps.h_taps == 6) && - (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) - enable_isharp = true; - - return enable_isharp; -} - /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, - const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, - bool *enable_isharp) + int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, + const struct spl_taps *in_taps) { int num_part_y, num_part_c; int max_taps_y, max_taps_c; int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - bool skip_easf = false; - if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && + if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && max_downscale_src_width != 0 && - spl_scratch->scl_data.viewport.width > max_downscale_src_width) + spl_out->scl_data.viewport.width > max_downscale_src_width) return false; - - /* Check if we are using EASF or not */ - skip_easf = enable_easf(spl_in, spl_scratch); - /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (skip_easf) { - if (in_taps->h_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) - spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz), 8); - else - spl_scratch->scl_data.taps.h_taps = 4; - } else - spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) - spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps = 4; - } else - spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) - spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert_c, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps_c = 4; - } else - spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) - spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz_c), 8); - else - spl_scratch->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + if (in_taps->h_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) + spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); else - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; - } else { - if (spl_is_yuv420(spl_in->basic_in.format)) { - spl_scratch->scl_data.taps.h_taps = 6; - spl_scratch->scl_data.taps.v_taps = 6; - spl_scratch->scl_data.taps.h_taps_c = 4; - spl_scratch->scl_data.taps.v_taps_c = 4; - } else { /* RGB */ - spl_scratch->scl_data.taps.h_taps = 6; - spl_scratch->scl_data.taps.v_taps = 6; - spl_scratch->scl_data.taps.h_taps_c = 6; - spl_scratch->scl_data.taps.v_taps_c = 6; - } - } + spl_out->scl_data.taps.h_taps = 4; + } else + spl_out->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) + spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert, 2)), 8); + else + spl_out->scl_data.taps.v_taps = 4; + } else + spl_out->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) + spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert_c, 2)), 8); + else + spl_out->scl_data.taps.v_taps_c = 4; + } else + spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) + spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + else + spl_out->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + else + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; /*Ensure we can support the requested number of vtaps*/ - min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); - min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); + min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -936,16 +771,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -954,108 +789,48 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_scratch->scl_data.taps.v_taps > max_taps_y) - spl_scratch->scl_data.taps.v_taps = max_taps_y; + if (spl_out->scl_data.taps.v_taps > max_taps_y) + spl_out->scl_data.taps.v_taps = max_taps_y; - if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) - spl_scratch->scl_data.taps.v_taps_c = max_taps_c; + if (spl_out->scl_data.taps.v_taps_c > max_taps_c) + spl_out->scl_data.taps.v_taps_c = max_taps_c; + if (spl_in->prefer_easf) { + // EASF can be enabled only for taps 3,4,6 + // If optimal no of taps is 5, then set it to 4 + // If optimal no of taps is 7 or 8, then set it to 6 + if (spl_out->scl_data.taps.v_taps == 5) + spl_out->scl_data.taps.v_taps = 4; + if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) + spl_out->scl_data.taps.v_taps = 6; - if (!skip_easf) { - /* - * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 - * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV - * - * If LB does not support 3, 4, or 6 taps, then disable EASF_V - * and only enable EASF_H. So for RGB, support 6x2, 4x2 - * and for NL YUV420, support 6x2 for Y and 4x2 for UV - * - * All other cases, have to disable EASF_V and EASF_H - * - * If optimal no of taps is 5, then set it to 4 - * If optimal no of taps is 7 or 8, then fine since max tap is 6 - * - */ - if (spl_scratch->scl_data.taps.v_taps == 5) - spl_scratch->scl_data.taps.v_taps = 4; + if (spl_out->scl_data.taps.v_taps_c == 5) + spl_out->scl_data.taps.v_taps_c = 4; + if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) + spl_out->scl_data.taps.v_taps_c = 6; - if (spl_scratch->scl_data.taps.v_taps_c == 5) - spl_scratch->scl_data.taps.v_taps_c = 4; + if (spl_out->scl_data.taps.h_taps == 5) + spl_out->scl_data.taps.h_taps = 4; + if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) + spl_out->scl_data.taps.h_taps = 6; - if (spl_scratch->scl_data.taps.h_taps == 5) - spl_scratch->scl_data.taps.h_taps = 4; + if (spl_out->scl_data.taps.h_taps_c == 5) + spl_out->scl_data.taps.h_taps_c = 4; + if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) + spl_out->scl_data.taps.h_taps_c = 6; - if (spl_scratch->scl_data.taps.h_taps_c == 5) - spl_scratch->scl_data.taps.h_taps_c = 4; - - if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_scratch->scl_data.taps.h_taps <= 4) || - (spl_scratch->scl_data.taps.h_taps_c <= 3)) { - *enable_easf_v = false; - *enable_easf_h = false; - } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || - (spl_scratch->scl_data.taps.v_taps_c <= 3)) { - *enable_easf_v = false; - *enable_easf_h = true; - } else { - *enable_easf_v = true; - *enable_easf_h = true; - } - SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && - (spl_scratch->scl_data.taps.v_taps_c > 1)); - } else { /* RGB */ - if (spl_scratch->scl_data.taps.h_taps <= 3) { - *enable_easf_v = false; - *enable_easf_h = false; - } else if (spl_scratch->scl_data.taps.v_taps < 3) { - *enable_easf_v = false; - *enable_easf_h = true; - } else { - *enable_easf_v = true; - *enable_easf_h = true; - } - SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); - } - } else { - *enable_easf_v = false; - *enable_easf_h = false; } // end of if prefer_easf - - /* Sharpener requires scaler to be enabled, including for 1:1 - * Check if ISHARP can be enabled - * If ISHARP is not enabled, for 1:1, set taps to 1 and disable - * EASF - * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if - * EASF is not enabled - */ - - *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); - if (!*enable_isharp && !spl_in->basic_out.always_scale) { - if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { - spl_scratch->scl_data.taps.h_taps = 1; - spl_scratch->scl_data.taps.v_taps = 1; - - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) - spl_scratch->scl_data.taps.h_taps_c = 1; - - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) - spl_scratch->scl_data.taps.v_taps_c = 1; - - *enable_easf_v = false; - *enable_easf_h = false; - } else { - if ((!*enable_easf_h) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) - spl_scratch->scl_data.taps.h_taps_c = 1; - - if ((!*enable_easf_v) && - (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) - spl_scratch->scl_data.taps.v_taps_c = 1; - } + if (!spl_in->basic_out.always_scale) { + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) + spl_out->scl_data.taps.h_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) + spl_out->scl_data.taps.v_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) + spl_out->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) + spl_out->scl_data.taps.v_taps_c = 1; } return true; } - static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { @@ -1073,38 +848,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct spl_fixed31_32 bot; + struct fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - spl_fixpt_u0d19(scl_data->inits.h) << 5; + dc_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - spl_fixpt_floor(scl_data->inits.h); + dc_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - spl_fixpt_u0d19(scl_data->inits.h_c) << 5; + dc_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - spl_fixpt_floor(scl_data->inits.h_c); + dc_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - spl_fixpt_u0d19(scl_data->inits.v) << 5; + dc_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - spl_fixpt_floor(scl_data->inits.v); + dc_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - spl_fixpt_u0d19(scl_data->inits.v_c) << 5; + dc_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - spl_fixpt_floor(scl_data->inits.v_c); + dc_fixpt_floor(scl_data->inits.v_c); - bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); - bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); + bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); + bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -1115,28 +890,79 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } - +static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} +static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); +} +#ifdef CONFIG_DRM_AMD_DC_FP +static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if ((taps == 3) || (taps == 4) || (taps == 6)) + return spl_get_filter_isharp_bs_4tap_64p(); + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} +static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( + data->taps.h_taps); + dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( + data->taps.v_taps); +} +#endif /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, - struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_scratch->scl_data; + const struct spl_scaler_data *data = &spl_out->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; - bool enable_easf = enable_easf_v || enable_easf_h; - // Set values for recout - dscl_prog_data->recout = spl_scratch->scl_data.recout; + dscl_prog_data->recout = spl_out->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, - enable_easf); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -1147,101 +973,103 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_scratch->scl_data.viewport; + dscl_prog_data->viewport = spl_out->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); + spl_set_filters_data(dscl_prog_data, data); } - -/* Set EASF data */ -static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, - bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format, enum system_setup setup) +/* Enable EASF ?*/ +static bool enable_easf(int scale_ratio, int taps, + enum linear_light_scaling lls_pref, bool prefer_easf) { - struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; + // Is downscaling > 6:1 ? + if (scale_ratio > 6) { + // END - No EASF support for downscaling > 6:1 + return false; + } + // Is upscaling or downscaling up to 2:1? + if (scale_ratio <= 2) { + // Is linear scaling or EASF preferred? + if (lls_pref == LLS_PREF_YES || prefer_easf) { + // LB support taps 3, 4, 6 + if (taps == 3 || taps == 4 || taps == 6) { + // END - EASF supported + return true; + } + } + } + // END - EASF not supported + return false; +} +/* Set EASF data */ +static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, + bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format) +{ + if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + else + dscl_prog_data->easf_matrix_mode = 0; + if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 0; + dscl_prog_data->easf_v_sharp_factor = 1; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - /* 2-bit, BF3 chroma mode correction calculation mode */ - dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ minCoef ]*/ + dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTiltMaxVal ]*/ + 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ dnTiltSlope ]*/ + 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt1Slope ]*/ + 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt2Slope ]*/ + 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10 [ upTilt2Offset ]*/ + 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ + 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] dscl_prog_data->easf_v_ringest_eventap_gain1 = - spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); - /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 dscl_prog_data->easf_v_ringest_eventap_gain2 = - spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, - spl_scratch->scl_data.recip_ratios.vert); + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control - - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1262,41 +1090,13 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } else { - dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control - - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1315,11 +1115,11 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } } else dscl_prog_data->easf_v_en = false; @@ -1327,63 +1127,52 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 0; + dscl_prog_data->easf_h_sharp_factor = 1; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - /* 2-bit, BF3 chroma mode correction calculation mode */ - dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ + dscl_prog_data->easf_h_bf3_mode = + 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] dscl_prog_data->easf_h_ringest_eventap_gain1 = - spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); - /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 dscl_prog_data->easf_h_ringest_eventap_gain2 = - spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, - spl_scratch->scl_data.recip_ratios.horz); + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control - - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1401,40 +1190,12 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } else { - dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control - - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 - dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1452,36 +1213,25 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - if (setup == HDR_L) { - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient - } else { // SDR_L - dscl_prog_data->easf_matrix_c0 = - 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) - dscl_prog_data->easf_matrix_c1 = - 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) - dscl_prog_data->easf_matrix_c2 = - 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient - } + dscl_prog_data->easf_matrix_c0 = + 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) + dscl_prog_data->easf_matrix_c1 = + 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) + dscl_prog_data->easf_matrix_c2 = + 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1493,43 +1243,27 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } - - if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - /* - * 2-bit, BF3 chroma mode correction calculation mode - * Needs to be disabled for YUV420 mode - * Override lookup value - */ - dscl_prog_data->easf_v_bf3_mode = 0; - dscl_prog_data->easf_h_bf3_mode = 0; - } else - dscl_prog_data->easf_matrix_mode = 0; - } - /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; - else if (data->taps.v_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; - else if (data->taps.v_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; + if (dscl_prog_data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, - enum system_setup setup) + const struct spl_scaler_data *data) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1538,12 +1272,10 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } dscl_prog_data->isharp_en = 1; // ISHARP_EN + dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (data->taps.h_taps == 6) { - dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ - spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ - } else - dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN + if (dscl_prog_data->taps.h_taps == 6) + spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1552,67 +1284,50 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ + if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - if (setup == SDR_L) { - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 312; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - } else { - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = 0x1EC; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -20 - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + switch (adp_sharpness.sharpness) { + case SHARPNESS_LOW: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); + break; + case SHARPNESS_MID: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); + break; + case SHARPNESS_HIGH: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); + break; + default: + BREAK_TO_DEBUGGER(); } - spl_build_isharp_1dlut_from_reference_curve(ratio, setup); - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut( - adp_sharpness.sharpness); - // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1631,7 +1346,62 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table +#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); +#endif +} +static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, + int vscale_ratio, int hscale_ratio, struct spl_taps taps, + enum spl_pixel_format format) +{ + bool enable_isharp = false; + + if (adp_sharpness.enable == false) + return enable_isharp; // Return if adaptive sharpness is disabled + // Is downscaling ? + if (vscale_ratio > 1 || hscale_ratio > 1) { + // END - No iSHARP support for downscaling + return enable_isharp; + } + // Scaling is up to 1:1 (no scaling) or upscaling + + /* Only apply sharpness to NV12 and not P010 */ + if (format != SPL_PIXEL_FORMAT_420BPP8) + return enable_isharp; + + // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 + if (taps.h_taps == 4 || taps.h_taps == 6 || + taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { + // END - iSHARP supported + enable_isharp = true; + } + return enable_isharp; +} + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) +{ + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if (tf_type == SPL_TF_TYPE_PREDEFINED) { + if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || + (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) + *lls_pref = LLS_PREF_NO; + else + *lls_pref = LLS_PREF_YES; + return true; + } else if (tf_type == SPL_TF_TYPE_BYPASS) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; } /* Calculate scaler parameters */ @@ -1640,71 +1410,67 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; + bool lls_enable_easf = true; int vratio = 0; int hratio = 0; - struct spl_scratch spl_scratch; - struct spl_fixed31_32 isharp_scale_ratio; - enum system_setup setup; - bool enable_isharp = false; - const struct spl_scaler_data *data = &spl_scratch.scl_data; - - memset(&spl_scratch, 0, sizeof(struct spl_scratch)); - spl_scratch.scl_data.h_active = spl_in->h_active; - spl_scratch.scl_data.v_active = spl_in->v_active; + const struct spl_scaler_data *data = &spl_out->scl_data; // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, &spl_scratch, spl_out); + spl_calculate_recout(spl_in, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); + spl_calculate_scaling_ratios(spl_in, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, &spl_scratch); + spl_calculate_viewport_size(spl_in, spl_out); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, - &enable_easf_h, &enable_isharp); + spl_out, &spl_in->scaling_quality); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, &spl_scratch); + spl_calculate_inits_and_viewports(spl_in, spl_out); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); // Clamp - spl_clamp_viewport(&spl_scratch.scl_data.viewport); + spl_clamp_viewport(&spl_out->scl_data.viewport); if (!res) return res; - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); - if (spl_in->lls_pref == LLS_PREF_YES) { - if (spl_in->is_hdr_on) - setup = HDR_L; - else - setup = SDR_L; + // Save all calculated parameters in dscl_prog_data structure to program hw registers + spl_set_dscl_prog_data(spl_in, spl_out); + + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + if (!lls_enable_easf || spl_in->disable_easf) { + enable_easf_v = false; + enable_easf_h = false; } else { - if (spl_in->is_hdr_on) - setup = HDR_NL; - else - setup = SDR_NL; + /* Enable EASF on vertical? */ + enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); + /* Enable EASF on horizontal? */ + enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); } // Set EASF - spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format, setup); + spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format); // Set iSHARP - vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); - hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); - if (vratio <= hratio) - isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; - else - isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; - + bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, + spl_out->scl_data.taps, spl_in->basic_in.format); spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); + spl_in->lls_pref, spl_in->basic_in.format, data); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c deleted file mode 100644 index 99238644e0a1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dc_spl_filters.h" - -void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, - uint16_t *s1_12_filter, int num_taps) -{ - int num_entries = NUM_PHASES_COEFF * num_taps; - int i; - - for (i = 0; i < num_entries; i++) - *(s1_12_filter + i) = *(s1_10_filter + i) * 4; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h deleted file mode 100644 index 20439cdbdb10..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -/* Copyright 2024 Advanced Micro Devices, Inc. */ - -#ifndef __DC_SPL_FILTERS_H__ -#define __DC_SPL_FILTERS_H__ - -#include "dc_spl_types.h" - -#define NUM_PHASES_COEFF 33 - -void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, - uint16_t *s1_12_filter, int num_taps); - -#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index d483f259512e..a5d9a6223d06 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,9 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" -#include "spl_debug.h" -#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -233,53 +230,6 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; -//======================================== -// Delta Gain 1DLUT -// LUT content is packed as 4-bytes into one DWORD/entry -// A_start = 0.000000 -// A_end = 10.000000 -// A_gain = 3.000000 -// B_start = 11.000000 -// B_end = 127.000000 -// C_start = 40.000000 -// C_end = 127.000000 -//======================================== -static const uint32_t filter_isharp_1D_lut_3p0x[32] = { -0x03010000, -0x0F0B0805, -0x211E1813, -0x2B292624, -0x3533302E, -0x3E3C3A37, -0x46444240, -0x4D4B4A48, -0x5352504F, -0x59575655, -0x5D5C5B5A, -0x61605F5E, -0x64646362, -0x66666565, -0x68686767, -0x68686868, -0x68686868, -0x67676868, -0x65656666, -0x62636464, -0x5E5F6061, -0x5A5B5C5D, -0x55565759, -0x4F505253, -0x484A4B4D, -0x40424446, -0x373A3C3E, -0x2E303335, -0x2426292B, -0x191B1E21, -0x0D101316, -0x0003060A, -}; - -//======================================== // Wide scaler coefficients //======================================================== // gen_scaler_coeffs.m @@ -334,7 +284,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // Blur & Scale LPF // S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -369,228 +319,6 @@ static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; -//======================================================== -// gen_BlurScale_coeffs.m -// 25-Apr-2022 -// 4 -// 64 -// Blur & Scale LPF -// S1.10 -//======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[132] = { -0x00E5, 0x0237, 0x00E4, 0x0000, -0x00DE, 0x0237, 0x00EB, 0x0000, -0x00D7, 0x0236, 0x00F2, 0x0001, -0x00D0, 0x0235, 0x00FA, 0x0001, -0x00C9, 0x0234, 0x0101, 0x0002, -0x00C2, 0x0233, 0x0108, 0x0003, -0x00BB, 0x0232, 0x0110, 0x0003, -0x00B5, 0x0230, 0x0117, 0x0004, -0x00AE, 0x022E, 0x011F, 0x0005, -0x00A8, 0x022C, 0x0126, 0x0006, -0x00A2, 0x022A, 0x012D, 0x0007, -0x009C, 0x0228, 0x0134, 0x0008, -0x0096, 0x0225, 0x013C, 0x0009, -0x0090, 0x0222, 0x0143, 0x000B, -0x008A, 0x021F, 0x014B, 0x000C, -0x0085, 0x021C, 0x0151, 0x000E, -0x007F, 0x0218, 0x015A, 0x000F, -0x007A, 0x0215, 0x0160, 0x0011, -0x0074, 0x0211, 0x0168, 0x0013, -0x006F, 0x020D, 0x016F, 0x0015, -0x006A, 0x0209, 0x0176, 0x0017, -0x0065, 0x0204, 0x017E, 0x0019, -0x0060, 0x0200, 0x0185, 0x001B, -0x005C, 0x01FB, 0x018C, 0x001D, -0x0057, 0x01F6, 0x0193, 0x0020, -0x0053, 0x01F1, 0x019A, 0x0022, -0x004E, 0x01EC, 0x01A1, 0x0025, -0x004A, 0x01E6, 0x01A8, 0x0028, -0x0046, 0x01E1, 0x01AF, 0x002A, -0x0042, 0x01DB, 0x01B6, 0x002D, -0x003F, 0x01D5, 0x01BB, 0x0031, -0x003B, 0x01CF, 0x01C2, 0x0034, -0x0037, 0x01C9, 0x01C9, 0x0037, -}; -//======================================================== -// gen_BlurScale_coeffs.m -// 09-Jun-2022 -// 3 -// 64 -// Blur & Scale LPF -// S1.10 -//======================================================== -static const uint16_t filter_isharp_bs_3tap_64p[99] = { -0x0200, 0x0200, 0x0000, -0x01F6, 0x0206, 0x0004, -0x01EC, 0x020B, 0x0009, -0x01E2, 0x0211, 0x000D, -0x01D8, 0x0216, 0x0012, -0x01CE, 0x021C, 0x0016, -0x01C4, 0x0221, 0x001B, -0x01BA, 0x0226, 0x0020, -0x01B0, 0x022A, 0x0026, -0x01A6, 0x022F, 0x002B, -0x019C, 0x0233, 0x0031, -0x0192, 0x0238, 0x0036, -0x0188, 0x023C, 0x003C, -0x017E, 0x0240, 0x0042, -0x0174, 0x0244, 0x0048, -0x016A, 0x0248, 0x004E, -0x0161, 0x024A, 0x0055, -0x0157, 0x024E, 0x005B, -0x014D, 0x0251, 0x0062, -0x0144, 0x0253, 0x0069, -0x013A, 0x0256, 0x0070, -0x0131, 0x0258, 0x0077, -0x0127, 0x025B, 0x007E, -0x011E, 0x025C, 0x0086, -0x0115, 0x025E, 0x008D, -0x010B, 0x0260, 0x0095, -0x0102, 0x0262, 0x009C, -0x00F9, 0x0263, 0x00A4, -0x00F0, 0x0264, 0x00AC, -0x00E7, 0x0265, 0x00B4, -0x00DF, 0x0264, 0x00BD, -0x00D6, 0x0265, 0x00C5, -0x00CD, 0x0266, 0x00CD, -}; - -/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ -static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; -static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; -static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_nl[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 2, 1}, - {11, 10, 175, 100}, - {1075, 1000, 15, 10}, - {105, 100, 125, 100}, - {1025, 1000, 1, 1}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 35, 10}, - {11, 10, 32, 10}, - {1075, 1000, 29, 10}, - {105, 100, 26, 10}, - {1025, 1000, 23, 10}, - {1, 1, 2, 1}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_sdr_l[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 15, 10}, - {11, 10, 135, 100}, - {1075, 1000, 12, 10}, - {105, 100, 105, 100}, - {1025, 1000, 9, 10}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 25, 10}, - {11, 10, 23, 10}, - {1075, 1000, 21, 10}, - {105, 100, 19, 10}, - {1025, 1000, 17, 10}, - {1, 1, 15, 10}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_nl[3][6] = { - { /* LOW */ - {1125, 1000, 5, 10}, - {11, 10, 4, 10}, - {1075, 1000, 3, 10}, - {105, 100, 2, 10}, - {1025, 1000, 1, 10}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 1, 1}, - {11, 10, 9, 10}, - {1075, 1000, 8, 10}, - {105, 100, 7, 10}, - {1025, 1000, 6, 10}, - {1, 1, 5, 10}, - }, - { /* HIGH */ - {1125, 1000, 15, 10}, - {11, 10, 14, 10}, - {1075, 1000, 13, 10}, - {105, 100, 12, 10}, - {1025, 1000, 11, 10}, - {1, 1, 1, 1}, - }, -}; - -struct scale_ratio_to_sharpness_level_lookup scale_to_sharp_hdr_l[3][6] = { - { /* LOW */ - {1125, 1000, 75, 100}, - {11, 10, 6, 10}, - {1075, 1000, 45, 100}, - {105, 100, 3, 10}, - {1025, 1000, 15, 100}, - {1, 1, 0, 1}, - }, - { /* MID */ - {1125, 1000, 15, 10}, - {11, 10, 135, 100}, - {1075, 1000, 12, 10}, - {105, 100, 105, 100}, - {1025, 1000, 9, 10}, - {1, 1, 75, 100}, - }, - { /* HIGH */ - {1125, 1000, 25, 10}, - {11, 10, 23, 10}, - {1075, 1000, 21, 10}, - {105, 100, 19, 10}, - {1025, 1000, 17, 10}, - {1, 1, 15, 10}, - }, -}; - -/* Pre-generated 1DLUT for LOW for given setup and sharpness level */ -uint32_t filter_isharp_1D_lut_pregen[3][32] = { - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -611,160 +339,11 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } -const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) -{ - return filter_isharp_1D_lut_3p0x; -} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) +const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) { - return filter_isharp_bs_4tap_in_6_64p_s1_12; + return filter_isharp_bs_4tap_64p; } -uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) -{ - return filter_isharp_bs_4tap_64p_s1_12; -} -uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) -{ - return filter_isharp_bs_3tap_64p_s1_12; -} - -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup) -{ - uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; - struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level, ratio_level; - int i, j; - struct scale_ratio_to_sharpness_level_lookup *setup_lookup_ptr; - int num_sharp_ramp_levels; - int size_1dlut; - int sharp_calc_int; - uint32_t filter_pregen_store[32]; - - /* - * Given scaling ratio and current system setup, build pregenerated - * 1DLUT tables for three sharpness levels - LOW, MID, HIGH - */ - for (i = 0; i < 3; i++) { - /* - * Based on setup ( HDR/SDR, L/NL ), get base scale ratio to - * sharpness curve - */ - switch (setup) { - case HDR_L: - setup_lookup_ptr = scale_to_sharp_hdr_l[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_l[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case HDR_NL: - setup_lookup_ptr = scale_to_sharp_hdr_nl[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_hdr_nl[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case SDR_L: - setup_lookup_ptr = scale_to_sharp_sdr_l[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_l[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - case SDR_NL: - default: - setup_lookup_ptr = scale_to_sharp_sdr_nl[i]; - num_sharp_ramp_levels = sizeof(scale_to_sharp_sdr_nl[i])/ - sizeof(struct scale_ratio_to_sharpness_level_lookup); - break; - } - - /* - * Compare desired scaling ratio and find adjusted sharpness from - * base scale ratio to sharpness curve - */ - j = 0; - sharp_level = spl_fixpt_zero; - while (j < num_sharp_ramp_levels) { - ratio_level = spl_fixpt_from_fraction(setup_lookup_ptr->ratio_numer, - setup_lookup_ptr->ratio_denom); - if (ratio.value >= ratio_level.value) { - sharp_level = spl_fixpt_from_fraction(setup_lookup_ptr->sharpness_numer, - setup_lookup_ptr->sharpness_denom); - break; - } - setup_lookup_ptr++; - j++; - } - - /* - * Calculate LUT_128_gained with this equation: - * - * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) - * where LUT_128[i] is contents of 3p0x isharp 1dlut - * where sharpLevel is desired sharpness level - * where iGain is base sharpness level 3.0 - * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level - */ - byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; - byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; - size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); - memset(byte_ptr_1dlut_dst, 0, size_1dlut); - for (j = 0; j < size_1dlut; j++) { - sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); - sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); - sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); - sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); - sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); - sharp_calc_int = spl_fixpt_floor(sharp_calc); - if (sharp_calc_int > 255) - sharp_calc_int = 255; - *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; - - byte_ptr_1dlut_src++; - byte_ptr_1dlut_dst++; - } - - /* Compare if filter has change, if so update */ - if (memcmp((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut) != 0) - memcpy((void *)filter_isharp_1D_lut_pregen[i], (void *)filter_pregen_store, size_1dlut); - } -} - -uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness) -{ - return filter_isharp_1D_lut_pregen[sharpness]; -} - -void spl_init_blur_scale_coeffs(void) -{ - convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, - filter_isharp_bs_3tap_64p_s1_12, 3); - convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, - filter_isharp_bs_4tap_64p_s1_12, 4); - convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, - filter_isharp_bs_4tap_in_6_64p_s1_12, 6); -} - -uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if (taps == 3) - return spl_get_filter_isharp_bs_3tap_64p(); - else if (taps == 4) - return spl_get_filter_isharp_bs_4tap_64p(); - else if (taps == 6) - return spl_get_filter_isharp_bs_4tap_in_6_64p(); - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - -void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = - spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); - - dscl_prog_data->filter_blur_scale_v = - spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); -} - diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 6cb000bf9d53..1aaf4c50c1bc 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -12,37 +12,6 @@ const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); -uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); -uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); -uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); +const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); -uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); - -struct scale_ratio_to_sharpness_level_lookup { - unsigned int ratio_numer; - unsigned int ratio_denom; - unsigned int sharpness_numer; - unsigned int sharpness_denom; -}; - -struct sharpness_level_mapping { - unsigned int level; - unsigned int level_numer; - unsigned int level_denom; -}; - -enum system_setup { - SDR_NL = 0, - SDR_L, - HDR_NL, - HDR_L -}; - -void spl_init_blur_scale_coeffs(void); -void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data); - -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup); -uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum explicit_sharpness sharpness); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c deleted file mode 100644 index 09bf82f7d468..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c +++ /dev/null @@ -1,1726 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "spl_debug.h" -#include "dc_spl_filters.h" -#include "dc_spl_scl_filters.h" -#include "dc_spl_scl_easf_filters.h" - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.3_p_10qb_ -// 3 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F6, 0x0206, 0x0004, - 0x01EC, 0x020B, 0x0009, - 0x01E2, 0x0211, 0x000D, - 0x01D8, 0x0216, 0x0012, - 0x01CE, 0x021C, 0x0016, - 0x01C4, 0x0221, 0x001B, - 0x01BA, 0x0226, 0x0020, - 0x01B0, 0x022A, 0x0026, - 0x01A6, 0x022F, 0x002B, - 0x019C, 0x0233, 0x0031, - 0x0192, 0x0238, 0x0036, - 0x0188, 0x023C, 0x003C, - 0x017E, 0x0240, 0x0042, - 0x0174, 0x0244, 0x0048, - 0x016A, 0x0248, 0x004E, - 0x0161, 0x024A, 0x0055, - 0x0157, 0x024E, 0x005B, - 0x014D, 0x0251, 0x0062, - 0x0144, 0x0253, 0x0069, - 0x013A, 0x0256, 0x0070, - 0x0131, 0x0258, 0x0077, - 0x0127, 0x025B, 0x007E, - 0x011E, 0x025C, 0x0086, - 0x0115, 0x025E, 0x008D, - 0x010B, 0x0260, 0x0095, - 0x0102, 0x0262, 0x009C, - 0x00F9, 0x0263, 0x00A4, - 0x00F0, 0x0264, 0x00AC, - 0x00E7, 0x0265, 0x00B4, - 0x00DF, 0x0264, 0x00BD, - 0x00D6, 0x0265, 0x00C5, - 0x00CD, 0x0266, 0x00CD, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.4_p_10qb_ -// 3 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F6, 0x0206, 0x0004, - 0x01EB, 0x020E, 0x0007, - 0x01E1, 0x0214, 0x000B, - 0x01D7, 0x021A, 0x000F, - 0x01CD, 0x0220, 0x0013, - 0x01C2, 0x0226, 0x0018, - 0x01B8, 0x022C, 0x001C, - 0x01AE, 0x0231, 0x0021, - 0x01A3, 0x0237, 0x0026, - 0x0199, 0x023C, 0x002B, - 0x018F, 0x0240, 0x0031, - 0x0185, 0x0245, 0x0036, - 0x017A, 0x024A, 0x003C, - 0x0170, 0x024F, 0x0041, - 0x0166, 0x0253, 0x0047, - 0x015C, 0x0257, 0x004D, - 0x0152, 0x025A, 0x0054, - 0x0148, 0x025E, 0x005A, - 0x013E, 0x0261, 0x0061, - 0x0134, 0x0264, 0x0068, - 0x012B, 0x0266, 0x006F, - 0x0121, 0x0269, 0x0076, - 0x0117, 0x026C, 0x007D, - 0x010E, 0x026E, 0x0084, - 0x0104, 0x0270, 0x008C, - 0x00FB, 0x0271, 0x0094, - 0x00F2, 0x0272, 0x009C, - 0x00E9, 0x0273, 0x00A4, - 0x00E0, 0x0274, 0x00AC, - 0x00D7, 0x0275, 0x00B4, - 0x00CE, 0x0275, 0x00BD, - 0x00C5, 0x0276, 0x00C5, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.5_p_10qb_ -// 3 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F5, 0x0209, 0x0002, - 0x01EA, 0x0211, 0x0005, - 0x01DF, 0x021A, 0x0007, - 0x01D4, 0x0222, 0x000A, - 0x01C9, 0x022A, 0x000D, - 0x01BE, 0x0232, 0x0010, - 0x01B3, 0x0239, 0x0014, - 0x01A8, 0x0241, 0x0017, - 0x019D, 0x0248, 0x001B, - 0x0192, 0x024F, 0x001F, - 0x0187, 0x0255, 0x0024, - 0x017C, 0x025C, 0x0028, - 0x0171, 0x0262, 0x002D, - 0x0166, 0x0268, 0x0032, - 0x015B, 0x026E, 0x0037, - 0x0150, 0x0273, 0x003D, - 0x0146, 0x0278, 0x0042, - 0x013B, 0x027D, 0x0048, - 0x0130, 0x0282, 0x004E, - 0x0126, 0x0286, 0x0054, - 0x011B, 0x028A, 0x005B, - 0x0111, 0x028D, 0x0062, - 0x0107, 0x0290, 0x0069, - 0x00FD, 0x0293, 0x0070, - 0x00F3, 0x0296, 0x0077, - 0x00E9, 0x0298, 0x007F, - 0x00DF, 0x029A, 0x0087, - 0x00D5, 0x029C, 0x008F, - 0x00CC, 0x029D, 0x0097, - 0x00C3, 0x029E, 0x009F, - 0x00BA, 0x029E, 0x00A8, - 0x00B1, 0x029E, 0x00B1, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.6_p_10qb_ -// 3 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F4, 0x020B, 0x0001, - 0x01E8, 0x0216, 0x0002, - 0x01DC, 0x0221, 0x0003, - 0x01D0, 0x022B, 0x0005, - 0x01C4, 0x0235, 0x0007, - 0x01B8, 0x0240, 0x0008, - 0x01AC, 0x0249, 0x000B, - 0x01A0, 0x0253, 0x000D, - 0x0194, 0x025C, 0x0010, - 0x0188, 0x0265, 0x0013, - 0x017C, 0x026E, 0x0016, - 0x0170, 0x0277, 0x0019, - 0x0164, 0x027F, 0x001D, - 0x0158, 0x0287, 0x0021, - 0x014C, 0x028F, 0x0025, - 0x0140, 0x0297, 0x0029, - 0x0135, 0x029D, 0x002E, - 0x0129, 0x02A4, 0x0033, - 0x011D, 0x02AB, 0x0038, - 0x0112, 0x02B0, 0x003E, - 0x0107, 0x02B5, 0x0044, - 0x00FC, 0x02BA, 0x004A, - 0x00F1, 0x02BF, 0x0050, - 0x00E6, 0x02C3, 0x0057, - 0x00DB, 0x02C7, 0x005E, - 0x00D1, 0x02CA, 0x0065, - 0x00C7, 0x02CC, 0x006D, - 0x00BD, 0x02CE, 0x0075, - 0x00B3, 0x02D0, 0x007D, - 0x00A9, 0x02D2, 0x0085, - 0x00A0, 0x02D2, 0x008E, - 0x0097, 0x02D2, 0x0097, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.7_p_10qb_ -// 3 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F3, 0x020D, 0x0000, - 0x01E5, 0x021B, 0x0000, - 0x01D8, 0x0228, 0x0000, - 0x01CB, 0x0235, 0x0000, - 0x01BD, 0x0243, 0x0000, - 0x01B0, 0x024F, 0x0001, - 0x01A2, 0x025C, 0x0002, - 0x0195, 0x0268, 0x0003, - 0x0187, 0x0275, 0x0004, - 0x017A, 0x0280, 0x0006, - 0x016D, 0x028C, 0x0007, - 0x015F, 0x0298, 0x0009, - 0x0152, 0x02A2, 0x000C, - 0x0145, 0x02AD, 0x000E, - 0x0138, 0x02B7, 0x0011, - 0x012B, 0x02C0, 0x0015, - 0x011E, 0x02CA, 0x0018, - 0x0111, 0x02D3, 0x001C, - 0x0105, 0x02DB, 0x0020, - 0x00F8, 0x02E3, 0x0025, - 0x00EC, 0x02EA, 0x002A, - 0x00E0, 0x02F1, 0x002F, - 0x00D5, 0x02F6, 0x0035, - 0x00C9, 0x02FC, 0x003B, - 0x00BE, 0x0301, 0x0041, - 0x00B3, 0x0305, 0x0048, - 0x00A8, 0x0309, 0x004F, - 0x009E, 0x030C, 0x0056, - 0x0094, 0x030E, 0x005E, - 0x008A, 0x0310, 0x0066, - 0x0081, 0x0310, 0x006F, - 0x0077, 0x0312, 0x0077, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.8_p_10qb_ -// 3 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { - 0x0200, 0x0200, 0x0000, - 0x01F1, 0x0210, 0x0FFF, - 0x01E2, 0x0220, 0x0FFE, - 0x01D2, 0x0232, 0x0FFC, - 0x01C3, 0x0241, 0x0FFC, - 0x01B4, 0x0251, 0x0FFB, - 0x01A4, 0x0262, 0x0FFA, - 0x0195, 0x0271, 0x0FFA, - 0x0186, 0x0281, 0x0FF9, - 0x0176, 0x0291, 0x0FF9, - 0x0167, 0x02A0, 0x0FF9, - 0x0158, 0x02AE, 0x0FFA, - 0x0149, 0x02BD, 0x0FFA, - 0x013A, 0x02CB, 0x0FFB, - 0x012C, 0x02D7, 0x0FFD, - 0x011D, 0x02E5, 0x0FFE, - 0x010F, 0x02F1, 0x0000, - 0x0101, 0x02FD, 0x0002, - 0x00F3, 0x0308, 0x0005, - 0x00E5, 0x0313, 0x0008, - 0x00D8, 0x031D, 0x000B, - 0x00CB, 0x0326, 0x000F, - 0x00BE, 0x032F, 0x0013, - 0x00B2, 0x0337, 0x0017, - 0x00A6, 0x033E, 0x001C, - 0x009A, 0x0345, 0x0021, - 0x008F, 0x034A, 0x0027, - 0x0084, 0x034F, 0x002D, - 0x0079, 0x0353, 0x0034, - 0x006F, 0x0356, 0x003B, - 0x0065, 0x0358, 0x0043, - 0x005C, 0x0359, 0x004B, - 0x0053, 0x035A, 0x0053, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_0.9_p_10qb_ -// 3 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { - 0x0200, 0x0200, 0x0000, - 0x01EE, 0x0214, 0x0FFE, - 0x01DC, 0x0228, 0x0FFC, - 0x01CA, 0x023C, 0x0FFA, - 0x01B9, 0x024F, 0x0FF8, - 0x01A7, 0x0262, 0x0FF7, - 0x0195, 0x0276, 0x0FF5, - 0x0183, 0x028A, 0x0FF3, - 0x0172, 0x029C, 0x0FF2, - 0x0160, 0x02AF, 0x0FF1, - 0x014F, 0x02C2, 0x0FEF, - 0x013E, 0x02D4, 0x0FEE, - 0x012D, 0x02E5, 0x0FEE, - 0x011C, 0x02F7, 0x0FED, - 0x010C, 0x0307, 0x0FED, - 0x00FB, 0x0318, 0x0FED, - 0x00EC, 0x0327, 0x0FED, - 0x00DC, 0x0336, 0x0FEE, - 0x00CD, 0x0344, 0x0FEF, - 0x00BE, 0x0352, 0x0FF0, - 0x00B0, 0x035E, 0x0FF2, - 0x00A2, 0x036A, 0x0FF4, - 0x0095, 0x0375, 0x0FF6, - 0x0088, 0x037F, 0x0FF9, - 0x007B, 0x0388, 0x0FFD, - 0x006F, 0x0391, 0x0000, - 0x0064, 0x0397, 0x0005, - 0x0059, 0x039D, 0x000A, - 0x004E, 0x03A3, 0x000F, - 0x0045, 0x03A6, 0x0015, - 0x003B, 0x03A9, 0x001C, - 0x0033, 0x03AA, 0x0023, - 0x002A, 0x03AC, 0x002A, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 3t_64p_LanczosEd_p_1_p_10qb_ -// 3 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { - 0x0200, 0x0200, 0x0000, - 0x01EB, 0x0217, 0x0FFE, - 0x01D5, 0x022F, 0x0FFC, - 0x01C0, 0x0247, 0x0FF9, - 0x01AB, 0x025E, 0x0FF7, - 0x0196, 0x0276, 0x0FF4, - 0x0181, 0x028D, 0x0FF2, - 0x016C, 0x02A5, 0x0FEF, - 0x0158, 0x02BB, 0x0FED, - 0x0144, 0x02D1, 0x0FEB, - 0x0130, 0x02E8, 0x0FE8, - 0x011C, 0x02FE, 0x0FE6, - 0x0109, 0x0313, 0x0FE4, - 0x00F6, 0x0328, 0x0FE2, - 0x00E4, 0x033C, 0x0FE0, - 0x00D2, 0x034F, 0x0FDF, - 0x00C0, 0x0363, 0x0FDD, - 0x00B0, 0x0374, 0x0FDC, - 0x009F, 0x0385, 0x0FDC, - 0x0090, 0x0395, 0x0FDB, - 0x0081, 0x03A4, 0x0FDB, - 0x0072, 0x03B3, 0x0FDB, - 0x0064, 0x03C0, 0x0FDC, - 0x0057, 0x03CC, 0x0FDD, - 0x004B, 0x03D6, 0x0FDF, - 0x003F, 0x03E0, 0x0FE1, - 0x0034, 0x03E8, 0x0FE4, - 0x002A, 0x03EF, 0x0FE7, - 0x0020, 0x03F5, 0x0FEB, - 0x0017, 0x03FA, 0x0FEF, - 0x000F, 0x03FD, 0x0FF4, - 0x0007, 0x03FF, 0x0FFA, - 0x0000, 0x0400, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.3_p_10qb_ -// 4 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { - 0x0104, 0x01F8, 0x0104, 0x0000, - 0x00FE, 0x01F7, 0x010A, 0x0001, - 0x00F8, 0x01F6, 0x010F, 0x0003, - 0x00F2, 0x01F5, 0x0114, 0x0005, - 0x00EB, 0x01F4, 0x011B, 0x0006, - 0x00E5, 0x01F3, 0x0120, 0x0008, - 0x00DF, 0x01F2, 0x0125, 0x000A, - 0x00DA, 0x01F0, 0x012A, 0x000C, - 0x00D4, 0x01EE, 0x0130, 0x000E, - 0x00CE, 0x01ED, 0x0135, 0x0010, - 0x00C8, 0x01EB, 0x013A, 0x0013, - 0x00C2, 0x01E9, 0x0140, 0x0015, - 0x00BD, 0x01E7, 0x0145, 0x0017, - 0x00B7, 0x01E5, 0x014A, 0x001A, - 0x00B1, 0x01E2, 0x0151, 0x001C, - 0x00AC, 0x01E0, 0x0155, 0x001F, - 0x00A7, 0x01DD, 0x015A, 0x0022, - 0x00A1, 0x01DB, 0x015F, 0x0025, - 0x009C, 0x01D8, 0x0165, 0x0027, - 0x0097, 0x01D5, 0x016A, 0x002A, - 0x0092, 0x01D2, 0x016E, 0x002E, - 0x008C, 0x01CF, 0x0174, 0x0031, - 0x0087, 0x01CC, 0x0179, 0x0034, - 0x0083, 0x01C9, 0x017D, 0x0037, - 0x007E, 0x01C5, 0x0182, 0x003B, - 0x0079, 0x01C2, 0x0187, 0x003E, - 0x0074, 0x01BE, 0x018C, 0x0042, - 0x0070, 0x01BA, 0x0190, 0x0046, - 0x006B, 0x01B7, 0x0195, 0x0049, - 0x0066, 0x01B3, 0x019A, 0x004D, - 0x0062, 0x01AF, 0x019E, 0x0051, - 0x005E, 0x01AB, 0x01A2, 0x0055, - 0x005A, 0x01A6, 0x01A6, 0x005A, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.4_p_10qb_ -// 4 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { - 0x00FB, 0x0209, 0x00FC, 0x0000, - 0x00F5, 0x0209, 0x0101, 0x0001, - 0x00EE, 0x0208, 0x0108, 0x0002, - 0x00E8, 0x0207, 0x010E, 0x0003, - 0x00E2, 0x0206, 0x0114, 0x0004, - 0x00DB, 0x0205, 0x011A, 0x0006, - 0x00D5, 0x0204, 0x0120, 0x0007, - 0x00CF, 0x0203, 0x0125, 0x0009, - 0x00C9, 0x0201, 0x012C, 0x000A, - 0x00C3, 0x01FF, 0x0132, 0x000C, - 0x00BD, 0x01FD, 0x0138, 0x000E, - 0x00B7, 0x01FB, 0x013E, 0x0010, - 0x00B1, 0x01F9, 0x0144, 0x0012, - 0x00AC, 0x01F7, 0x0149, 0x0014, - 0x00A6, 0x01F4, 0x0150, 0x0016, - 0x00A0, 0x01F2, 0x0156, 0x0018, - 0x009B, 0x01EF, 0x015C, 0x001A, - 0x0095, 0x01EC, 0x0162, 0x001D, - 0x0090, 0x01E9, 0x0168, 0x001F, - 0x008B, 0x01E6, 0x016D, 0x0022, - 0x0085, 0x01E3, 0x0173, 0x0025, - 0x0080, 0x01DF, 0x0179, 0x0028, - 0x007B, 0x01DC, 0x017E, 0x002B, - 0x0076, 0x01D8, 0x0184, 0x002E, - 0x0071, 0x01D4, 0x018A, 0x0031, - 0x006D, 0x01D1, 0x018E, 0x0034, - 0x0068, 0x01CD, 0x0193, 0x0038, - 0x0063, 0x01C8, 0x019A, 0x003B, - 0x005F, 0x01C4, 0x019E, 0x003F, - 0x005B, 0x01C0, 0x01A3, 0x0042, - 0x0056, 0x01BB, 0x01A9, 0x0046, - 0x0052, 0x01B7, 0x01AD, 0x004A, - 0x004E, 0x01B2, 0x01B2, 0x004E, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.5_p_10qb_ -// 4 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { - 0x00E5, 0x0236, 0x00E5, 0x0000, - 0x00DE, 0x0235, 0x00ED, 0x0000, - 0x00D7, 0x0235, 0x00F4, 0x0000, - 0x00D0, 0x0235, 0x00FB, 0x0000, - 0x00C9, 0x0234, 0x0102, 0x0001, - 0x00C2, 0x0233, 0x010A, 0x0001, - 0x00BC, 0x0232, 0x0111, 0x0001, - 0x00B5, 0x0230, 0x0119, 0x0002, - 0x00AE, 0x022F, 0x0121, 0x0002, - 0x00A8, 0x022D, 0x0128, 0x0003, - 0x00A2, 0x022B, 0x012F, 0x0004, - 0x009B, 0x0229, 0x0137, 0x0005, - 0x0095, 0x0226, 0x013F, 0x0006, - 0x008F, 0x0224, 0x0146, 0x0007, - 0x0089, 0x0221, 0x014E, 0x0008, - 0x0083, 0x021E, 0x0155, 0x000A, - 0x007E, 0x021B, 0x015C, 0x000B, - 0x0078, 0x0217, 0x0164, 0x000D, - 0x0072, 0x0213, 0x016D, 0x000E, - 0x006D, 0x0210, 0x0173, 0x0010, - 0x0068, 0x020C, 0x017A, 0x0012, - 0x0063, 0x0207, 0x0182, 0x0014, - 0x005E, 0x0203, 0x0189, 0x0016, - 0x0059, 0x01FE, 0x0191, 0x0018, - 0x0054, 0x01F9, 0x0198, 0x001B, - 0x0050, 0x01F4, 0x019F, 0x001D, - 0x004B, 0x01EF, 0x01A6, 0x0020, - 0x0047, 0x01EA, 0x01AC, 0x0023, - 0x0043, 0x01E4, 0x01B3, 0x0026, - 0x003F, 0x01DF, 0x01B9, 0x0029, - 0x003B, 0x01D9, 0x01C0, 0x002C, - 0x0037, 0x01D3, 0x01C6, 0x0030, - 0x0033, 0x01CD, 0x01CD, 0x0033, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.6_p_10qb_ -// 4 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { - 0x00C8, 0x026F, 0x00C9, 0x0000, - 0x00C0, 0x0270, 0x00D1, 0x0FFF, - 0x00B8, 0x0270, 0x00D9, 0x0FFF, - 0x00B1, 0x0270, 0x00E1, 0x0FFE, - 0x00A9, 0x026F, 0x00EB, 0x0FFD, - 0x00A2, 0x026E, 0x00F3, 0x0FFD, - 0x009A, 0x026D, 0x00FD, 0x0FFC, - 0x0093, 0x026C, 0x0105, 0x0FFC, - 0x008C, 0x026A, 0x010F, 0x0FFB, - 0x0085, 0x0268, 0x0118, 0x0FFB, - 0x007E, 0x0265, 0x0122, 0x0FFB, - 0x0078, 0x0263, 0x012A, 0x0FFB, - 0x0071, 0x0260, 0x0134, 0x0FFB, - 0x006B, 0x025C, 0x013E, 0x0FFB, - 0x0065, 0x0259, 0x0147, 0x0FFB, - 0x005F, 0x0255, 0x0151, 0x0FFB, - 0x0059, 0x0251, 0x015A, 0x0FFC, - 0x0054, 0x024D, 0x0163, 0x0FFC, - 0x004E, 0x0248, 0x016D, 0x0FFD, - 0x0049, 0x0243, 0x0176, 0x0FFE, - 0x0044, 0x023E, 0x017F, 0x0FFF, - 0x003F, 0x0238, 0x0189, 0x0000, - 0x003A, 0x0232, 0x0193, 0x0001, - 0x0036, 0x022C, 0x019C, 0x0002, - 0x0031, 0x0226, 0x01A5, 0x0004, - 0x002D, 0x021F, 0x01AF, 0x0005, - 0x0029, 0x0218, 0x01B8, 0x0007, - 0x0025, 0x0211, 0x01C1, 0x0009, - 0x0022, 0x020A, 0x01C9, 0x000B, - 0x001E, 0x0203, 0x01D2, 0x000D, - 0x001B, 0x01FB, 0x01DA, 0x0010, - 0x0018, 0x01F3, 0x01E3, 0x0012, - 0x0015, 0x01EB, 0x01EB, 0x0015, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.7_p_10qb_ -// 4 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { - 0x00A3, 0x02B9, 0x00A4, 0x0000, - 0x009A, 0x02BA, 0x00AD, 0x0FFF, - 0x0092, 0x02BA, 0x00B6, 0x0FFE, - 0x0089, 0x02BA, 0x00C1, 0x0FFC, - 0x0081, 0x02B9, 0x00CB, 0x0FFB, - 0x0079, 0x02B8, 0x00D5, 0x0FFA, - 0x0071, 0x02B7, 0x00DF, 0x0FF9, - 0x0069, 0x02B5, 0x00EA, 0x0FF8, - 0x0062, 0x02B3, 0x00F4, 0x0FF7, - 0x005B, 0x02B0, 0x00FF, 0x0FF6, - 0x0054, 0x02AD, 0x010B, 0x0FF4, - 0x004D, 0x02A9, 0x0117, 0x0FF3, - 0x0046, 0x02A5, 0x0123, 0x0FF2, - 0x0040, 0x02A1, 0x012D, 0x0FF2, - 0x003A, 0x029C, 0x0139, 0x0FF1, - 0x0034, 0x0297, 0x0145, 0x0FF0, - 0x002F, 0x0292, 0x0150, 0x0FEF, - 0x0029, 0x028C, 0x015C, 0x0FEF, - 0x0024, 0x0285, 0x0169, 0x0FEE, - 0x001F, 0x027F, 0x0174, 0x0FEE, - 0x001B, 0x0278, 0x017F, 0x0FEE, - 0x0016, 0x0270, 0x018D, 0x0FED, - 0x0012, 0x0268, 0x0199, 0x0FED, - 0x000E, 0x0260, 0x01A4, 0x0FEE, - 0x000B, 0x0258, 0x01AF, 0x0FEE, - 0x0007, 0x024F, 0x01BC, 0x0FEE, - 0x0004, 0x0246, 0x01C7, 0x0FEF, - 0x0001, 0x023D, 0x01D3, 0x0FEF, - 0x0FFE, 0x0233, 0x01DF, 0x0FF0, - 0x0FFC, 0x0229, 0x01EA, 0x0FF1, - 0x0FFA, 0x021F, 0x01F4, 0x0FF3, - 0x0FF8, 0x0215, 0x01FF, 0x0FF4, - 0x0FF6, 0x020A, 0x020A, 0x0FF6, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.8_p_10qb_ -// 4 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { - 0x0075, 0x0315, 0x0076, 0x0000, - 0x006C, 0x0316, 0x007F, 0x0FFF, - 0x0062, 0x0316, 0x008A, 0x0FFE, - 0x0059, 0x0315, 0x0096, 0x0FFC, - 0x0050, 0x0314, 0x00A1, 0x0FFB, - 0x0048, 0x0312, 0x00AD, 0x0FF9, - 0x0040, 0x0310, 0x00B8, 0x0FF8, - 0x0038, 0x030D, 0x00C5, 0x0FF6, - 0x0030, 0x030A, 0x00D1, 0x0FF5, - 0x0029, 0x0306, 0x00DE, 0x0FF3, - 0x0022, 0x0301, 0x00EB, 0x0FF2, - 0x001C, 0x02FC, 0x00F8, 0x0FF0, - 0x0015, 0x02F7, 0x0106, 0x0FEE, - 0x0010, 0x02F1, 0x0112, 0x0FED, - 0x000A, 0x02EA, 0x0121, 0x0FEB, - 0x0005, 0x02E3, 0x012F, 0x0FE9, - 0x0000, 0x02DB, 0x013D, 0x0FE8, - 0x0FFB, 0x02D3, 0x014C, 0x0FE6, - 0x0FF7, 0x02CA, 0x015A, 0x0FE5, - 0x0FF3, 0x02C1, 0x0169, 0x0FE3, - 0x0FF0, 0x02B7, 0x0177, 0x0FE2, - 0x0FEC, 0x02AD, 0x0186, 0x0FE1, - 0x0FE9, 0x02A2, 0x0196, 0x0FDF, - 0x0FE7, 0x0297, 0x01A4, 0x0FDE, - 0x0FE4, 0x028C, 0x01B3, 0x0FDD, - 0x0FE2, 0x0280, 0x01C2, 0x0FDC, - 0x0FE0, 0x0274, 0x01D0, 0x0FDC, - 0x0FDF, 0x0268, 0x01DE, 0x0FDB, - 0x0FDD, 0x025B, 0x01EE, 0x0FDA, - 0x0FDC, 0x024E, 0x01FC, 0x0FDA, - 0x0FDB, 0x0241, 0x020A, 0x0FDA, - 0x0FDB, 0x0233, 0x0218, 0x0FDA, - 0x0FDA, 0x0226, 0x0226, 0x0FDA, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_0.9_p_10qb_ -// 4 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { - 0x003F, 0x0383, 0x003E, 0x0000, - 0x0034, 0x0383, 0x004A, 0x0FFF, - 0x002B, 0x0383, 0x0054, 0x0FFE, - 0x0021, 0x0381, 0x0061, 0x0FFD, - 0x0019, 0x037F, 0x006C, 0x0FFC, - 0x0010, 0x037C, 0x0079, 0x0FFB, - 0x0008, 0x0378, 0x0086, 0x0FFA, - 0x0001, 0x0374, 0x0093, 0x0FF8, - 0x0FFA, 0x036E, 0x00A1, 0x0FF7, - 0x0FF3, 0x0368, 0x00B0, 0x0FF5, - 0x0FED, 0x0361, 0x00BF, 0x0FF3, - 0x0FE8, 0x035A, 0x00CD, 0x0FF1, - 0x0FE2, 0x0352, 0x00DC, 0x0FF0, - 0x0FDE, 0x0349, 0x00EB, 0x0FEE, - 0x0FD9, 0x033F, 0x00FC, 0x0FEC, - 0x0FD5, 0x0335, 0x010D, 0x0FE9, - 0x0FD2, 0x032A, 0x011D, 0x0FE7, - 0x0FCF, 0x031E, 0x012E, 0x0FE5, - 0x0FCC, 0x0312, 0x013F, 0x0FE3, - 0x0FCA, 0x0305, 0x0150, 0x0FE1, - 0x0FC8, 0x02F8, 0x0162, 0x0FDE, - 0x0FC6, 0x02EA, 0x0174, 0x0FDC, - 0x0FC5, 0x02DC, 0x0185, 0x0FDA, - 0x0FC4, 0x02CD, 0x0197, 0x0FD8, - 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, - 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, - 0x0FC3, 0x029F, 0x01CD, 0x0FD1, - 0x0FC3, 0x028E, 0x01E0, 0x0FCF, - 0x0FC3, 0x027E, 0x01F2, 0x0FCD, - 0x0FC4, 0x026D, 0x0203, 0x0FCC, - 0x0FC5, 0x025C, 0x0215, 0x0FCA, - 0x0FC6, 0x024B, 0x0227, 0x0FC8, - 0x0FC7, 0x0239, 0x0239, 0x0FC7, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 03-Apr-2024 -// 4t_64p_LanczosEd_p_1_p_10qb_ -// 4 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { - 0x0000, 0x0400, 0x0000, 0x0000, - 0x0FF6, 0x03FF, 0x000B, 0x0000, - 0x0FED, 0x03FE, 0x0015, 0x0000, - 0x0FE4, 0x03FB, 0x0022, 0x0FFF, - 0x0FDC, 0x03F7, 0x002E, 0x0FFF, - 0x0FD5, 0x03F2, 0x003B, 0x0FFE, - 0x0FCE, 0x03EC, 0x0048, 0x0FFE, - 0x0FC8, 0x03E5, 0x0056, 0x0FFD, - 0x0FC3, 0x03DC, 0x0065, 0x0FFC, - 0x0FBE, 0x03D3, 0x0075, 0x0FFA, - 0x0FB9, 0x03C9, 0x0085, 0x0FF9, - 0x0FB6, 0x03BE, 0x0094, 0x0FF8, - 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, - 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, - 0x0FAD, 0x0397, 0x00CA, 0x0FF2, - 0x0FAB, 0x0389, 0x00DC, 0x0FF0, - 0x0FAA, 0x0379, 0x00EF, 0x0FEE, - 0x0FA9, 0x0369, 0x0102, 0x0FEC, - 0x0FA9, 0x0359, 0x0115, 0x0FE9, - 0x0FA9, 0x0348, 0x0129, 0x0FE6, - 0x0FA9, 0x0336, 0x013D, 0x0FE4, - 0x0FA9, 0x0323, 0x0153, 0x0FE1, - 0x0FAA, 0x0310, 0x0168, 0x0FDE, - 0x0FAC, 0x02FD, 0x017C, 0x0FDB, - 0x0FAD, 0x02E9, 0x0192, 0x0FD8, - 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, - 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, - 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, - 0x0FB5, 0x0296, 0x01E9, 0x0FCC, - 0x0FB8, 0x0281, 0x01FE, 0x0FC9, - 0x0FBA, 0x026C, 0x0214, 0x0FC6, - 0x0FBD, 0x0256, 0x022A, 0x0FC3, - 0x0FC0, 0x0240, 0x0240, 0x0FC0, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.3_p_10qb_ -// 6 -// 64 -// input/output = 0.300000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { - 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, - 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, - 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, - 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, - 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, - 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, - 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, - 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, - 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, - 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, - 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, - 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, - 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, - 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, - 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, - 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, - 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, - 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, - 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, - 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, - 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, - 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, - 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, - 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, - 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, - 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, - 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, - 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, - 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, - 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, - 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, - 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, - 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.4_p_10qb_ -// 6 -// 64 -// input/output = 0.400000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { - 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, - 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, - 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, - 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, - 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, - 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, - 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, - 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, - 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, - 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, - 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, - 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, - 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, - 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, - 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, - 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, - 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, - 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, - 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, - 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, - 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, - 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, - 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, - 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, - 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, - 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, - 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, - 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, - 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, - 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, - 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, - 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, - 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.5_p_10qb_ -// 6 -// 64 -// input/output = 0.500000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { - 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, - 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, - 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, - 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, - 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, - 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, - 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, - 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, - 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, - 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, - 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, - 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, - 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, - 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, - 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, - 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, - 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, - 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, - 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, - 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, - 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, - 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, - 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, - 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, - 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, - 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, - 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, - 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, - 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, - 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, - 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, - 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, - 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.6_p_10qb_ -// 6 -// 64 -// input/output = 0.600000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { - 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, - 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, - 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, - 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, - 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, - 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, - 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, - 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, - 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, - 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, - 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, - 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, - 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, - 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, - 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, - 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, - 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, - 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, - 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, - 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, - 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, - 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, - 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, - 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, - 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, - 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, - 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, - 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, - 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, - 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, - 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, - 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, - 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.7_p_10qb_ -// 6 -// 64 -// input/output = 0.700000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { - 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, - 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, - 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, - 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, - 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, - 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, - 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, - 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, - 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, - 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, - 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, - 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, - 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, - 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, - 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, - 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, - 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, - 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, - 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, - 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, - 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, - 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, - 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, - 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, - 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, - 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, - 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, - 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, - 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, - 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, - 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, - 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, - 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.8_p_10qb_ -// 6 -// 64 -// input/output = 0.800000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { - 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, - 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, - 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, - 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, - 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, - 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, - 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, - 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, - 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, - 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, - 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, - 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, - 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, - 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, - 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, - 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, - 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, - 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, - 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, - 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, - 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, - 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, - 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, - 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, - 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, - 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, - 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, - 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, - 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, - 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, - 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, - 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, - 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_0.9_p_10qb_ -// 6 -// 64 -// input/output = 0.900000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { - 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, - 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, - 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, - 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, - 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, - 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, - 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, - 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, - 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, - 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, - 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, - 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, - 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, - 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, - 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, - 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, - 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, - 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, - 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, - 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, - 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, - 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, - 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, - 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, - 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, - 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, - 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, - 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, - 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, - 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, - 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, - 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, - 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, -}; - -//======================================================== -// gen_scaler_coeffs_cnf_file.m -// make_test_script.m -// 02-Apr-2024 -// 6t_64p_LanczosEd_p_1_p_10qb_ -// 6 -// 64 -// input/output = 1.000000000000 -// LanczosEd -// S1.10 -//======================================================== -static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { - 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, - 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, - 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, - 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, - 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, - 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, - 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, - 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, - 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, - 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, - 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, - 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, - 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, - 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, - 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, - 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, - 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, - 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, - 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, - 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, - 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, - 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, - 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, - 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, - 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, - 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, - 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, - 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, - 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, - 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, - 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, - 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, - 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, -}; - -/* Converted scaler coeff tables from S1.10 to S1.12 */ -static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; -static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; -static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; -static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; -static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; -static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; - -struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0002}, -}; - -struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0002}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { - {3, 10, 0x4100}, - {4, 10, 0x4100}, - {5, 10, 0x4100}, - {6, 10, 0x4100}, - {7, 10, 0x4100}, - {8, 10, 0x4100}, - {9, 10, 0x4100}, - {1, 1, 0x4100}, - {-1, -1, 0x4100}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { - {3, 10, 0x4000}, - {4, 10, 0x4000}, - {5, 10, 0x4000}, - {6, 10, 0x4000}, - {7, 10, 0x4000}, - {8, 10, 0x4000}, - {9, 10, 0x4000}, - {1, 1, 0x4000}, - {-1, -1, 0x4000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x251F}, - {5, 10, 0x291F}, - {6, 10, 0xA51F}, - {7, 10, 0xA51F}, - {8, 10, 0xAA66}, - {9, 10, 0xA51F}, - {1, 1, 0xA640}, - {-1, -1, 0xA640}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x9600}, - {5, 10, 0xA460}, - {6, 10, 0xA8E0}, - {7, 10, 0xAC00}, - {8, 10, 0xAD20}, - {9, 10, 0xAFC0}, - {1, 1, 0xB058}, - {-1, -1, 0xB058}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { - {3, 10, 0x4100}, - {4, 10, 0x4100}, - {5, 10, 0x4100}, - {6, 10, 0x4100}, - {7, 10, 0x4100}, - {8, 10, 0x4100}, - {9, 10, 0x4100}, - {1, 1, 0x4100}, - {-1, -1, 0x4100}, -}; - -struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { - {3, 10, 0x4000}, - {4, 10, 0x4000}, - {5, 10, 0x4000}, - {6, 10, 0x4000}, - {7, 10, 0x4000}, - {8, 10, 0x4000}, - {9, 10, 0x4000}, - {1, 1, 0x4000}, - {-1, -1, 0x4000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x0000}, - {1, 1, 0x0000}, - {-1, -1, 0x0000}, -}; - -struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x9900}, - {7, 10, 0xA100}, - {8, 10, 0xA8C0}, - {9, 10, 0xAB20}, - {1, 1, 0xAC00}, - {-1, -1, 0xAC00}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x4100}, - {9, 10, 0x9F00}, - {1, 1, 0xA4C0}, - {-1, -1, 0xA8D8}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x4000}, - {9, 10, 0x24FE}, - {1, 1, 0x2D64}, - {-1, -1, 0x3ADB}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { - {3, 10, 0x3800}, - {4, 10, 0x3800}, - {5, 10, 0x3800}, - {6, 10, 0x3800}, - {7, 10, 0x3800}, - {8, 10, 0x3886}, - {9, 10, 0x3940}, - {1, 1, 0x3A4E}, - {-1, -1, 0x3B66}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { - {3, 10, 0x3800}, - {4, 10, 0x3800}, - {5, 10, 0x3800}, - {6, 10, 0x3800}, - {7, 10, 0x3800}, - {8, 10, 0x36F4}, - {9, 10, 0x359C}, - {1, 1, 0x3360}, - {-1, -1, 0x2F20}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x359C}, - {1, 1, 0x31F0}, - {-1, -1, 0x1F00}, -}; - -struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { - {3, 10, 0x0000}, - {4, 10, 0x0000}, - {5, 10, 0x0000}, - {6, 10, 0x0000}, - {7, 10, 0x0000}, - {8, 10, 0x0000}, - {9, 10, 0x9F00}, - {1, 1, 0xA400}, - {-1, -1, 0x9E00}, -}; - -void spl_init_easf_filter_coeffs(void) -{ - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, - easf_filter_3tap_64p_ratio_0_30_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, - easf_filter_3tap_64p_ratio_0_40_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, - easf_filter_3tap_64p_ratio_0_50_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, - easf_filter_3tap_64p_ratio_0_60_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, - easf_filter_3tap_64p_ratio_0_70_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, - easf_filter_3tap_64p_ratio_0_80_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, - easf_filter_3tap_64p_ratio_0_90_s1_12, 3); - convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, - easf_filter_3tap_64p_ratio_1_00_s1_12, 3); - - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, - easf_filter_4tap_64p_ratio_0_30_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, - easf_filter_4tap_64p_ratio_0_40_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, - easf_filter_4tap_64p_ratio_0_50_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, - easf_filter_4tap_64p_ratio_0_60_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, - easf_filter_4tap_64p_ratio_0_70_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, - easf_filter_4tap_64p_ratio_0_80_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, - easf_filter_4tap_64p_ratio_0_90_s1_12, 4); - convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, - easf_filter_4tap_64p_ratio_1_00_s1_12, 4); - - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, - easf_filter_6tap_64p_ratio_0_30_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, - easf_filter_6tap_64p_ratio_0_40_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, - easf_filter_6tap_64p_ratio_0_50_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, - easf_filter_6tap_64p_ratio_0_60_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, - easf_filter_6tap_64p_ratio_0_70_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, - easf_filter_6tap_64p_ratio_0_80_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, - easf_filter_6tap_64p_ratio_0_90_s1_12, 6); - convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, - easf_filter_6tap_64p_ratio_1_00_s1_12, 6); -} - -uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_3tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_3tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_3tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_3tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_3tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_3tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_3tap_64p_ratio_0_90_s1_12; - else - return easf_filter_3tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_4tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_4tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_4tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_4tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_4tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_4tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_4tap_64p_ratio_0_90_s1_12; - else - return easf_filter_4tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) -{ - if (ratio.value < spl_fixpt_from_fraction(3, 10).value) - return easf_filter_6tap_64p_ratio_0_30_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) - return easf_filter_6tap_64p_ratio_0_40_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) - return easf_filter_6tap_64p_ratio_0_50_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) - return easf_filter_6tap_64p_ratio_0_60_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) - return easf_filter_6tap_64p_ratio_0_70_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) - return easf_filter_6tap_64p_ratio_0_80_s1_12; - else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) - return easf_filter_6tap_64p_ratio_0_90_s1_12; - else - return easf_filter_6tap_64p_ratio_1_00_s1_12; -} - -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) -{ - if (taps == 6) - return spl_get_easf_filter_6tap_64p(ratio); - else if (taps == 4) - return spl_get_easf_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_easf_filter_3tap_64p(ratio); - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - -void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data, bool enable_easf_v, - bool enable_easf_h) -{ - /* - * Old coefficients calculated scaling ratio = input / output - * New coefficients are calculated based on = output / input - */ - if (enable_easf_h) { - dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.h_taps, data->recip_ratios.horz); - - dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.h_taps_c, data->recip_ratios.horz_c); - } else { - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - } - if (enable_easf_v) { - dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.v_taps, data->recip_ratios.vert); - - dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( - data->taps.v_taps_c, data->recip_ratios.vert_c); - } else { - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); - } -} - -static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, - struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, - unsigned int num_entries) -{ - unsigned int count = 0; - uint32_t value = 0; - struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; - - lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); - value = lookup_table_index_ptr->reg_value; - - while (count < num_entries) { - - lookup_table_index_ptr = (lookup_table_base_ptr + count); - if (lookup_table_index_ptr->numer < 0) - break; - - if (ratio.value < spl_fixpt_from_fraction( - lookup_table_index_ptr->numer, - lookup_table_index_ptr->denom).value) { - value = lookup_table_index_ptr->reg_value; - break; - } - - count++; - } - return value; -} -uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_v_bf3_mode_lookup, num_entries); - return value; -} -uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_h_bf3_mode_lookup, num_entries); - return value; -} -uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain6_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain6_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain4_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_reducer_gain4_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_gain_ring6_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring6_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_gain_ring6_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring6_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 4) { - num_entries = sizeof(easf_gain_ring4_4tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring4_4tap_lookup, num_entries); - } else if (taps == 6) { - num_entries = sizeof(easf_gain_ring4_6tap_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_gain_ring4_6tap_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_dntilt_uptilt_offset_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt_maxval_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_dntilt_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt1_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt2_slope_lookup, num_entries); - } else - value = 0; - return value; -} -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) -{ - uint32_t value; - unsigned int num_entries; - - if (taps == 3) { - num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / - sizeof(struct scale_ratio_to_reg_value_lookup); - value = spl_easf_get_scale_ratio_to_reg_value(ratio, - easf_3tap_uptilt2_offset_lookup, num_entries); - } else - value = 0; - return value; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h deleted file mode 100644 index 8bb2b8108e38..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -/* Copyright 2024 Advanced Micro Devices, Inc. */ - -#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ -#define __DC_SPL_SCL_EASF_FILTERS_H__ - -#include "dc_spl_types.h" - -struct scale_ratio_to_reg_value_lookup { - int numer; - int denom; - const uint32_t reg_value; -}; - -void spl_init_easf_filter_coeffs(void); -uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); -uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); -void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data, bool enable_easf_v, - bool enable_easf_h); - -uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); -uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); -uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); -uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); - -#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index b9a7b77a7167..e2baaf584139 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,8 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" -#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 @@ -1319,97 +1317,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) { - if (ratio.value < spl_fixpt_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1424,29 +1422,3 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } - -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - SPL_BREAK_TO_DEBUGGER(); - return NULL; - } -} - diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 48202bc4f81e..6d96aca53b24 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,16 +7,53 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p_upscale(void); +const uint16_t *spl_get_filter_3tap_16p_116(void); +const uint16_t *spl_get_filter_3tap_16p_149(void); +const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_get_filter_4tap_16p_upscale(void); +const uint16_t *spl_get_filter_4tap_16p_116(void); +const uint16_t *spl_get_filter_4tap_16p_149(void); +const uint16_t *spl_get_filter_4tap_16p_183(void); + +const uint16_t *spl_get_filter_3tap_64p_upscale(void); +const uint16_t *spl_get_filter_3tap_64p_116(void); +const uint16_t *spl_get_filter_3tap_64p_149(void); +const uint16_t *spl_get_filter_3tap_64p_183(void); + +const uint16_t *spl_get_filter_4tap_64p_upscale(void); +const uint16_t *spl_get_filter_4tap_64p_116(void); +const uint16_t *spl_get_filter_4tap_64p_149(void); +const uint16_t *spl_get_filter_4tap_64p_183(void); + +const uint16_t *spl_get_filter_5tap_64p_upscale(void); +const uint16_t *spl_get_filter_5tap_64p_116(void); +const uint16_t *spl_get_filter_5tap_64p_149(void); +const uint16_t *spl_get_filter_5tap_64p_183(void); + +const uint16_t *spl_get_filter_6tap_64p_upscale(void); +const uint16_t *spl_get_filter_6tap_64p_116(void); +const uint16_t *spl_get_filter_6tap_64p_149(void); +const uint16_t *spl_get_filter_6tap_64p_183(void); + +const uint16_t *spl_get_filter_7tap_64p_upscale(void); +const uint16_t *spl_get_filter_7tap_64p_116(void); +const uint16_t *spl_get_filter_7tap_64p_149(void); +const uint16_t *spl_get_filter_7tap_64p_183(void); + +const uint16_t *spl_get_filter_8tap_64p_upscale(void); +const uint16_t *spl_get_filter_8tap_64p_116(void); +const uint16_t *spl_get_filter_8tap_64p_149(void); +const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c new file mode 100644 index 000000000000..bb0e1b80ec3c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c @@ -0,0 +1,25 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 1438a86826a4..caaa9ced2ec4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,15 +2,14 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "os_types.h" // swap +#ifndef ASSERT +#define ASSERT(_bool) ((void *)0) +#endif +#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -#include "spl_os_types.h" // swap -#ifndef SPL_ASSERT -#define SPL_ASSERT(_bool) ((void *)0) -#endif -#include "spl_fixpt31_32.h" // fixed31_32 and related functions - enum lb_memory_config { /* Enable all 3 pieces of memory */ LB_MEMORY_CONFIG_0 = 0, @@ -39,16 +38,16 @@ struct spl_rect { }; struct spl_ratios { - struct spl_fixed31_32 horz; - struct spl_fixed31_32 vert; - struct spl_fixed31_32 horz_c; - struct spl_fixed31_32 vert_c; + struct fixed31_32 horz; + struct fixed31_32 vert; + struct fixed31_32 horz_c; + struct fixed31_32 vert_c; }; struct spl_inits { - struct spl_fixed31_32 h; - struct spl_fixed31_32 h_c; - struct spl_fixed31_32 v; - struct spl_fixed31_32 v_c; + struct fixed31_32 h; + struct fixed31_32 h_c; + struct fixed31_32 v; + struct fixed31_32 v_c; }; struct spl_taps { @@ -81,8 +80,6 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, - SPL_PIXEL_FORMAT_422BPP8, - SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -138,7 +135,6 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; - struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -408,16 +404,11 @@ struct dscl_prog_data { const uint16_t *filter_blur_scale_h; }; -/* SPL input and output definitions */ -// SPL scratch struct -struct spl_scratch { - // Pack all SPL outputs in scl_data - struct spl_scaler_data scl_data; -}; - /* SPL input and output definitions */ // SPL outputs struct struct spl_out { + // Pack all SPL outputs in scl_data + struct spl_scaler_data scl_data; // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -500,10 +491,6 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; - bool is_fullscreen; - bool is_hdr_on; - int h_active; - int v_active; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h deleted file mode 100644 index a36239ab8d1c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright 1997-2004 Advanced Micro Devices, Inc. All rights reserved. */ - -#ifndef SPL_DEBUG_H -#define SPL_DEBUG_H - -#ifdef SPL_ASSERT -#undef SPL_ASSERT -#endif -#define SPL_ASSERT(b) - -#define SPL_ASSERT_CRITICAL(expr) do {if (expr)/* Do nothing */; } while (0) - -#ifdef SPL_DALMSG -#undef SPL_DALMSG -#endif -#define SPL_DALMSG(b) - -#ifdef SPL_DAL_ASSERT_MSG -#undef SPL_DAL_ASSERT_MSG -#endif -#define SPL_DAL_ASSERT_MSG(b, m) - -#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c deleted file mode 100644 index 2bb1de88aef7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "spl_fixpt31_32.h" - -static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; -static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; -static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; - -static inline unsigned long long abs_i64( - long long arg) -{ - if (arg > 0) - return (unsigned long long)arg; - else - return (unsigned long long)(-arg); -} - -/* - * @brief - * result = dividend / divisor - * *remainder = dividend % divisor - */ -static inline unsigned long long complete_integer_division_u64( - unsigned long long dividend, - unsigned long long divisor, - unsigned long long *remainder) -{ - unsigned long long result; - - ASSERT(divisor); - - result = spl_div64_u64_rem(dividend, divisor, remainder); - - return result; -} - - -#define FRACTIONAL_PART_MASK \ - ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) - -#define GET_INTEGER_PART(x) \ - ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) - -#define GET_FRACTIONAL_PART(x) \ - (FRACTIONAL_PART_MASK & (x)) - -struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) -{ - struct spl_fixed31_32 res; - - bool arg1_negative = numerator < 0; - bool arg2_negative = denominator < 0; - - unsigned long long arg1_value = arg1_negative ? -numerator : numerator; - unsigned long long arg2_value = arg2_negative ? -denominator : denominator; - - unsigned long long remainder; - - /* determine integer part */ - - unsigned long long res_value = complete_integer_division_u64( - arg1_value, arg2_value, &remainder); - - ASSERT(res_value <= LONG_MAX); - - /* determine fractional part */ - { - unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; - - do { - remainder <<= 1; - - res_value <<= 1; - - if (remainder >= arg2_value) { - res_value |= 1; - remainder -= arg2_value; - } - } while (--i != 0); - } - - /* round up LSB */ - { - unsigned long long summand = (remainder << 1) >= arg2_value; - - ASSERT(res_value <= LLONG_MAX - summand); - - res_value += summand; - } - - res.value = (long long)res_value; - - if (arg1_negative ^ arg2_negative) - res.value = -res.value; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - bool arg1_negative = arg1.value < 0; - bool arg2_negative = arg2.value < 0; - - unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; - unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; - - unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); - unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); - - unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); - unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); - - unsigned long long tmp; - - res.value = arg1_int * arg2_int; - - ASSERT(res.value <= (long long)LONG_MAX); - - res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; - - tmp = arg1_int * arg2_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg2_int * arg1_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg1_fra * arg2_fra; - - tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (unsigned long long)spl_fixpt_half.value); - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - if (arg1_negative ^ arg2_negative) - res.value = -res.value; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res; - - unsigned long long arg_value = abs_i64(arg.value); - - unsigned long long arg_int = GET_INTEGER_PART(arg_value); - - unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); - - unsigned long long tmp; - - res.value = arg_int * arg_int; - - ASSERT(res.value <= (long long)LONG_MAX); - - res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; - - tmp = arg_int * arg_fra; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - tmp = arg_fra * arg_fra; - - tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (unsigned long long)spl_fixpt_half.value); - - ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); - - res.value += tmp; - - return res; -} - -struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) -{ - /* - * @note - * Good idea to use Newton's method - */ - - ASSERT(arg.value); - - return spl_fixpt_from_fraction( - spl_fixpt_one.value, - arg.value); -} - -struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 square; - - struct spl_fixed31_32 res = spl_fixpt_one; - - int n = 27; - - struct spl_fixed31_32 arg_norm = arg; - - if (spl_fixpt_le( - spl_fixpt_two_pi, - spl_fixpt_abs(arg))) { - arg_norm = spl_fixpt_sub( - arg_norm, - spl_fixpt_mul_int( - spl_fixpt_two_pi, - (int)spl_div64_s64( - arg_norm.value, - spl_fixpt_two_pi.value))); - } - - square = spl_fixpt_sqr(arg_norm); - - do { - res = spl_fixpt_sub( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - square, - res), - n * (n - 1))); - - n -= 2; - } while (n > 2); - - if (arg.value != arg_norm.value) - res = spl_fixpt_div( - spl_fixpt_mul(res, arg_norm), - arg); - - return res; -} - -struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) -{ - return spl_fixpt_mul( - arg, - spl_fixpt_sinc(arg)); -} - -struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) -{ - /* TODO implement argument normalization */ - - const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); - - struct spl_fixed31_32 res = spl_fixpt_one; - - int n = 26; - - do { - res = spl_fixpt_sub( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - square, - res), - n * (n - 1))); - - n -= 2; - } while (n != 0); - - return res; -} - -/* - * @brief - * result = exp(arg), - * where abs(arg) < 1 - * - * Calculated as Taylor series. - */ -static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) -{ - unsigned int n = 9; - - struct spl_fixed31_32 res = spl_fixpt_from_fraction( - n + 2, - n + 1); - /* TODO find correct res */ - - ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); - - do - res = spl_fixpt_add( - spl_fixpt_one, - spl_fixpt_div_int( - spl_fixpt_mul( - arg, - res), - n)); - while (--n != 1); - - return spl_fixpt_add( - spl_fixpt_one, - spl_fixpt_mul( - arg, - res)); -} - -struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) -{ - /* - * @brief - * Main equation is: - * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), - * where m = round(x / ln(2)), r = x - m * ln(2) - */ - - if (spl_fixpt_le( - spl_fixpt_ln2_div_2, - spl_fixpt_abs(arg))) { - int m = spl_fixpt_round( - spl_fixpt_div( - arg, - spl_fixpt_ln2)); - - struct spl_fixed31_32 r = spl_fixpt_sub( - arg, - spl_fixpt_mul_int( - spl_fixpt_ln2, - m)); - - ASSERT(m != 0); - - ASSERT(spl_fixpt_lt( - spl_fixpt_abs(r), - spl_fixpt_one)); - - if (m > 0) - return spl_fixpt_shl( - fixed31_32_exp_from_taylor_series(r), - (unsigned char)m); - else - return spl_fixpt_div_int( - fixed31_32_exp_from_taylor_series(r), - 1LL << -m); - } else if (arg.value != 0) - return fixed31_32_exp_from_taylor_series(arg); - else - return spl_fixpt_one; -} - -struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); - /* TODO improve 1st estimation */ - - struct spl_fixed31_32 error; - - ASSERT(arg.value > 0); - /* TODO if arg is negative, return NaN */ - /* TODO if arg is zero, return -INF */ - - do { - struct spl_fixed31_32 res1 = spl_fixpt_add( - spl_fixpt_sub( - res, - spl_fixpt_one), - spl_fixpt_div( - arg, - spl_fixpt_exp(res))); - - error = spl_fixpt_sub( - res, - res1); - - res = res1; - /* TODO determine max_allowed_error based on quality of exp() */ - } while (abs_i64(error.value) > 100ULL); - - return res; -} - - -/* this function is a generic helper to translate fixed point value to - * specified integer format that will consist of integer_bits integer part and - * fractional_bits fractional part. For example it is used in - * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional - * part in 32 bits. It is used in hw programming (scaler) - */ - -static inline unsigned int ux_dy( - long long value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - /* 1. create mask of integer part */ - unsigned int result = (1 << integer_bits) - 1; - /* 2. mask out fractional part */ - unsigned int fractional_part = FRACTIONAL_PART_MASK & value; - /* 3. shrink fixed point integer part to be of integer_bits width*/ - result &= GET_INTEGER_PART(value); - /* 4. make space for fractional part to be filled in after integer */ - result <<= fractional_bits; - /* 5. shrink fixed point fractional part to of fractional_bits width*/ - fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; - /* 6. merge the result */ - return result | fractional_part; -} - -static inline unsigned int clamp_ux_dy( - long long value, - unsigned int integer_bits, - unsigned int fractional_bits, - unsigned int min_clamp) -{ - unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); - - if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) - return (1 << (integer_bits + fractional_bits)) - 1; - else if (truncated_val > min_clamp) - return truncated_val; - else - return min_clamp; -} - -unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 4, 19); -} - -unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 3, 19); -} - -unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 2, 19); -} - -unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) -{ - return ux_dy(arg.value, 0, 19); -} - -unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) -{ - return clamp_ux_dy(arg.value, 0, 14, 1); -} - -unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) -{ - return clamp_ux_dy(arg.value, 0, 10, 1); -} - -int spl_fixpt_s4d19(struct spl_fixed31_32 arg) -{ - if (arg.value < 0) - return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); - else - return ux_dy(arg.value, 4, 19); -} - -struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; - struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; - long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; - - fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - frac_mask = frac_mask << fractional_bits; - fixpt_int_value.value = value & frac_mask; - fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - fixpt_value.value |= fixpt_int_value.value; - return fixpt_value; -} - -struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, - unsigned int frac_value, - unsigned int integer_bits, - unsigned int fractional_bits) -{ - struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); - - fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); - return fixpt_value; -} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h deleted file mode 100644 index 27ec6d416b7c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h +++ /dev/null @@ -1,546 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __SPL_FIXED31_32_H__ -#define __SPL_FIXED31_32_H__ - -#include "os_types.h" -#include "spl_os_types.h" // swap -#ifndef ASSERT -#define ASSERT(_bool) ((void *)0) -#endif - -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807ll -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1ll) -#endif - -#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 -#ifndef LLONG_MIN -#define LLONG_MIN (1LL<<63) -#endif -#ifndef LLONG_MAX -#define LLONG_MAX (-1LL>>1) -#endif - -/* - * @brief - * Arithmetic operations on real numbers - * represented as fixed-point numbers. - * There are: 1 bit for sign, - * 31 bit for integer part, - * 32 bits for fractional part. - * - * @note - * Currently, overflows and underflows are asserted; - * no special result returned. - */ - -struct spl_fixed31_32 { - long long value; -}; - - -/* - * @brief - * Useful constants - */ - -static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; -static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; -static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; -static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; - -/* - * @brief - * Initialization routines - */ - -/* - * @brief - * result = numerator / denominator - */ -struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); - -/* - * @brief - * result = arg - */ -static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) -{ - struct spl_fixed31_32 res; - - res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; - - return res; -} - -/* - * @brief - * Unary operators - */ - -/* - * @brief - * result = -arg - */ -static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) -{ - struct spl_fixed31_32 res; - - res.value = -arg.value; - - return res; -} - -/* - * @brief - * result = abs(arg) := (arg >= 0) ? arg : -arg - */ -static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) -{ - if (arg.value < 0) - return spl_fixpt_neg(arg); - else - return arg; -} - -/* - * @brief - * Binary relational operators - */ - -/* - * @brief - * result = arg1 < arg2 - */ -static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value < arg2.value; -} - -/* - * @brief - * result = arg1 <= arg2 - */ -static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value <= arg2.value; -} - -/* - * @brief - * result = arg1 == arg2 - */ -static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return arg1.value == arg2.value; -} - -/* - * @brief - * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value <= arg2.value) - return arg1; - else - return arg2; -} - -/* - * @brief - * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 - */ -static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value <= arg2.value) - return arg2; - else - return arg1; -} - -/* - * @brief - * | min_value, when arg <= min_value - * result = | arg, when min_value < arg < max_value - * | max_value, when arg >= max_value - */ -static inline struct spl_fixed31_32 spl_fixpt_clamp( - struct spl_fixed31_32 arg, - struct spl_fixed31_32 min_value, - struct spl_fixed31_32 max_value) -{ - if (spl_fixpt_le(arg, min_value)) - return min_value; - else if (spl_fixpt_le(max_value, arg)) - return max_value; - else - return arg; -} - -/* - * @brief - * Binary shift operators - */ - -/* - * @brief - * result = arg << shift - */ -static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) -{ - ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || - ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); - - arg.value = arg.value << shift; - - return arg; -} - -/* - * @brief - * result = arg >> shift - */ -static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) -{ - bool negative = arg.value < 0; - - if (negative) - arg.value = -arg.value; - arg.value = arg.value >> shift; - if (negative) - arg.value = -arg.value; - return arg; -} - -/* - * @brief - * Binary additive operators - */ - -/* - * @brief - * result = arg1 + arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || - ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); - - res.value = arg1.value + arg2.value; - - return res; -} - -/* - * @brief - * result = arg1 + arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); -} - -/* - * @brief - * result = arg1 - arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - struct spl_fixed31_32 res; - - ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || - ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); - - res.value = arg1.value - arg2.value; - - return res; -} - -/* - * @brief - * result = arg1 - arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); -} - - -/* - * @brief - * Binary multiplicative operators - */ - -/* - * @brief - * result = arg1 * arg2 - */ -struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); - - -/* - * @brief - * result = arg1 * arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) -{ - return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); -} - -/* - * @brief - * result = square(arg) := arg * arg - */ -struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); - -/* - * @brief - * result = arg1 / arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) -{ - return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); -} - -/* - * @brief - * result = arg1 / arg2 - */ -static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - return spl_fixpt_from_fraction(arg1.value, arg2.value); -} - -/* - * @brief - * Reciprocal function - */ - -/* - * @brief - * result = reciprocal(arg) := 1 / arg - * - * @note - * No special actions taken in case argument is zero. - */ -struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); - -/* - * @brief - * Trigonometric functions - */ - -/* - * @brief - * result = sinc(arg) := sin(arg) / arg - * - * @note - * Argument specified in radians, - * internally it's normalized to [-2pi...2pi] range. - */ -struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); - -/* - * @brief - * result = sin(arg) - * - * @note - * Argument specified in radians, - * internally it's normalized to [-2pi...2pi] range. - */ -struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); - -/* - * @brief - * result = cos(arg) - * - * @note - * Argument specified in radians - * and should be in [-2pi...2pi] range - - * passing arguments outside that range - * will cause incorrect result! - */ -struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); - -/* - * @brief - * Transcendent functions - */ - -/* - * @brief - * result = exp(arg) - * - * @note - * Currently, function is verified for abs(arg) <= 1. - */ -struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); - -/* - * @brief - * result = log(arg) - * - * @note - * Currently, abs(arg) should be less than 1. - * No normalization is done. - * Currently, no special actions taken - * in case of invalid argument(s). Take care! - */ -struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); - -/* - * @brief - * Power function - */ - -/* - * @brief - * result = pow(arg1, arg2) - * - * @note - * Currently, abs(arg1) should be less than 1. Take care! - */ -static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) -{ - if (arg1.value == 0) - return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; - - return spl_fixpt_exp( - spl_fixpt_mul( - spl_fixpt_log(arg1), - arg2)); -} - -/* - * @brief - * Rounding functions - */ - -/* - * @brief - * result = floor(arg) := greatest integer lower than or equal to arg - */ -static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* - * @brief - * result = round(arg) := integer nearest to arg - */ -static inline int spl_fixpt_round(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - const long long summand = spl_fixpt_half.value; - - ASSERT(LLONG_MAX - (long long)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* - * @brief - * result = ceil(arg) := lowest integer greater than or equal to arg - */ -static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) -{ - unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; - - const long long summand = spl_fixpt_one.value - - spl_fixpt_epsilon.value; - - ASSERT(LLONG_MAX - (long long)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); - else - return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); -} - -/* the following two function are used in scaler hw programming to convert fixed - * point value to format 2 bits from integer part and 19 bits from fractional - * part. The same applies for u0d19, 0 bits from integer part and 19 bits from - * fractional - */ - -unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); - -unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); - -int spl_fixpt_s4d19(struct spl_fixed31_32 arg); - -static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) -{ - bool negative = arg.value < 0; - - if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { - ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); - return arg; - } - - if (negative) - arg.value = -arg.value; - arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); - if (negative) - arg.value = -arg.value; - return arg; -} - -struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); -struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, - unsigned int frac_value, - unsigned int integer_bits, - unsigned int fractional_bits); - -#endif From 7fb363c57522b704e156fc4c5fbcb7ee133fe304 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 11 Jul 2024 14:31:27 -0400 Subject: [PATCH 0216/1523] drm/amd/display: Let drm_crtc_vblank_on/off manage interrupts [Why] We manage interrupts for CRTCs in two places: 1. In manage_dm_interrupts(), when CRTC get enabled or disabled 2. When drm_vblank_get/put() starts or kills the vblank counter, calling into amdgpu_dm_crtc_set_vblank() The interrupts managed by these twp places should be identical. [How] Since manage_dm_interrupts() already use drm_crtc_vblank_on/off(), just move all CRTC interrupt management into amdgpu_dm_crtc_set_vblank(). This has the added benefit of disabling all CRTC and HUBP interrupts when there are no vblank requestors. Note that there is a TODO item - unchanged from when it was first introduced - to properly identify the HUBP instance from the OTG instance, rather than just assume direct mapping. Signed-off-by: Leo Li Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 +------------- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 48 +++++++++++++++++-- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fa8d455de7f5..42ca9b52fe5e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8246,42 +8246,10 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, bool enable) { - /* - * We have no guarantee that the frontend index maps to the same - * backend index - some even map to more than one. - * - * TODO: Use a different interrupt or check DC itself for the mapping. - */ - int irq_type = - amdgpu_display_crtc_idx_to_irq_type( - adev, - acrtc->crtc_id); - - if (enable) { + if (enable) drm_crtc_vblank_on(&acrtc->base); - amdgpu_irq_get( - adev, - &adev->pageflip_irq, - irq_type); -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_get( - adev, - &adev->vline0_irq, - irq_type); -#endif - } else { -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - amdgpu_irq_put( - adev, - &adev->vline0_irq, - irq_type); -#endif - amdgpu_irq_put( - adev, - &adev->pageflip_irq, - irq_type); + else drm_crtc_vblank_off(&acrtc->base); - } } static void dm_update_pflip_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 99014339aaa3..1fe038616d9f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -286,11 +286,14 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct amdgpu_display_manager *dm = &adev->dm; struct vblank_control_work *work; + int irq_type; int rc = 0; if (acrtc->otg_inst == -1) goto skip; + irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); + if (enable) { /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) @@ -303,13 +306,52 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - rc = (enable) - ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) - : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); + /* crtc vblank or vstartup interrupt */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc); + } if (rc) return rc; + /* + * hubp surface flip interrupt + * + * We have no guarantee that the frontend index maps to the same + * backend index - some even map to more than one. + * + * TODO: Use a different interrupt or check DC itself for the mapping. + */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc); + } + + if (rc) + return rc; + +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* crtc vline0 interrupt, only available on DCN+ */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) { + if (enable) { + rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc); + } + + if (rc) + return rc; + } +#endif skip: if (amdgpu_in_reset(adev)) return 0; From 17e68f89132b9ee4b144358b49e5df404b314181 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 11 Jul 2024 14:38:11 -0400 Subject: [PATCH 0217/1523] drm/amd/display: Run idle optimizations at end of vblank handler [Why & How] 1. After allowing idle optimizations, hw programming is disallowed. 2. Before hw programming, we need to disallow idle optimizations. Otherwise, in scenario 1, we will immediately kick hw out of idle optimizations with register access. Scenario 2 is less of a concern, since any register access will kick hw out of idle optimizations. But we'll do it early for correctness. Signed-off-by: Leo Li Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1fe038616d9f..a2cf2c066a76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -251,9 +251,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) else if (dm->active_vblank_irq_count) dm->active_vblank_irq_count--; - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + if (dm->active_vblank_irq_count > 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): false\n"); + dc_allow_idle_optimizations(dm->dc, false); + } /* * Control PSR based on vblank requirements from OS @@ -272,6 +273,11 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->stream->link->replay_settings.replay_feature_enabled); } + if (dm->active_vblank_irq_count == 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): true\n"); + dc_allow_idle_optimizations(dm->dc, true); + } + mutex_unlock(&dm->dc_lock); dc_stream_release(vblank_work->stream); From 0e4b858285e633ea6bc7335fd66529955d023af9 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 12 Jul 2024 12:50:48 -0400 Subject: [PATCH 0218/1523] drm/amd/display: Add two dmmuy I2C entry for GPIO port mapping issue [Why] When only 4 I2C is declared, two dummies are required to correctly map GPIO port. [How] Add one more I2C dummy entry to match GPIO port. Signed-off-by: Chris Park Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/gpio/dcn401/hw_factory_dcn401.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c index 46415cab23ab..928abca18a18 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c @@ -86,7 +86,13 @@ static const struct ddc_registers ddc_data_regs_dcn[] = { ddc_data_regs_dcn2(2), ddc_data_regs_dcn2(3), ddc_data_regs_dcn2(4), -// ddc_data_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, @@ -107,7 +113,13 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = { ddc_clk_regs_dcn2(2), ddc_clk_regs_dcn2(3), ddc_clk_regs_dcn2(4), -// ddc_clk_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, From 076362d931d0d5ed01a3d1cd4d066f2e6e7f86f8 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 26 Jul 2024 15:09:59 +0530 Subject: [PATCH 0219/1523] drm/amdgpu: print VCN instance dump for valid instance VCN dump is dependent on power state of the ip. Dump is valid if VCN was powered up at the time of ip dump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 28 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 9e1cbeee10db..c2278cc49dd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2329,7 +2329,7 @@ static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, j; uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off; + uint32_t inst_off, is_powered; if (!adev->vcn.ip_dump) return; @@ -2342,11 +2342,17 @@ static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) } inst_off = i * reg_count; - drm_printf(p, "\nActive Instance:VCN%d\n", i); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } } } @@ -2354,7 +2360,7 @@ static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, j; - bool reg_safe; + bool is_powered; uint32_t inst_off; uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); @@ -2366,11 +2372,13 @@ static void vcn_v3_0_dump_ip_state(void *handle) continue; inst_off = i * reg_count; - reg_safe = (RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS) & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - if (reg_safe) - for (j = 0; j < reg_count; j++) + if (is_powered) + for (j = 1; j < reg_count; j++) adev->vcn.ip_dump[inst_off + j] = RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); } From fcb3a4fb8255149a73afeb3d8f2397eaac3a46b0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 15 Jul 2024 15:52:46 -0400 Subject: [PATCH 0220/1523] drm/amd/display: Request 0MHz dispclk for zero display case [Why] If we aren't entering RCG/IPS2 or CLKSTOP is not supported by PMFW then we should be requesting a dispclk value of 0MHz to PMFW. Currenly we run at max clock since there's an assumption in APU clock table formulation where we can run at any DISPCLK at any state so the real clock value ends up as 1200Mhz - the maximum. [How] Set to 0 instead of the minimum value in the state array. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index d5dcc8b77281..866b0abcff1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -575,7 +575,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s unsigned int lowest_state_idx = 0; out_clks.p_state_supported = true; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; From 680caca6feee720d0a6cb00f892c0d2d46dc4075 Mon Sep 17 00:00:00 2001 From: Bhuvanachandra Pinninti Date: Tue, 16 Jul 2024 18:53:03 +0530 Subject: [PATCH 0221/1523] drm/amd/display: Refactor for dio Moved files to respective folders to improve DIO code. Signed-off-by: Bhuvanachandra Pinninti Reviewed-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn314/Makefile | 10 ---------- drivers/gpu/drm/amd/display/dc/dio/Makefile | 18 ++++++++++++++++++ .../{ => dio}/dcn301/dcn301_dio_link_encoder.c | 0 .../{ => dio}/dcn301/dcn301_dio_link_encoder.h | 0 .../dcn314/dcn314_dio_stream_encoder.c | 0 .../dcn314/dcn314_dio_stream_encoder.h | 0 .../dc/dio/dcn321/dcn321_dio_link_encoder.c | 1 - .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 1 - 10 files changed, 19 insertions(+), 14 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/Makefile rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn301/dcn301_dio_link_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn301/dcn301_dio_link_encoder.h (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn314/dcn314_dio_stream_encoder.c (100%) rename drivers/gpu/drm/amd/display/dc/{ => dio}/dcn314/dcn314_dio_stream_encoder.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 80069651def3..8992e697759f 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -35,7 +35,6 @@ DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 DC_LIBS += dcn31 -DC_LIBS += dcn314 DC_LIBS += dml DC_LIBS += dml2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index dc37dbf870df..fb4814ab3f05 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -3,7 +3,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_dio_link_encoder.o dcn301_panel_cntl.o +DCN301 = dcn301_panel_cntl.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile deleted file mode 100644 index 15fdcf7c6466..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Makefile for dcn314. - -DCN314 = dcn314_dio_stream_encoder.o - -AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile index 67840e474d7a..0dfd480976f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile @@ -51,6 +51,15 @@ AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30) +############################################################################### +# DCN301 +############################################################################### +DIO_DCN301 = dcn301_dio_link_encoder.o + +AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301) + ############################################################################### # DCN31 ############################################################################### @@ -60,6 +69,15 @@ AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31) +############################################################################### +# DCN314 +############################################################################### +DIO_DCN314 = dcn314_dio_stream_encoder.o + +AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314) + ############################################################################### # DCN32 ############################################################################### diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h rename to drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c index 05783daa62ac..2ed382a8e79c 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c @@ -23,7 +23,6 @@ * */ - #include "reg_helper.h" #include "core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index fcc88ef83e6a..19e50fbf908d 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -22,7 +22,6 @@ * */ - #include "dc_bios_types.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn314/dcn314_dio_stream_encoder.h" From d5022deb4a526009fdc20e4d62528b25b05112dc Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 15 Jul 2024 13:54:18 -0400 Subject: [PATCH 0222/1523] drm/amd/display: Add option to disable unbounded req in DML21 Use debug option for disabling unbounded req in DML21 Signed-off-by: Alvin Lee Reviewed-by: Austin Zheng Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0f34688e4058..65776602648d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -959,6 +959,11 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->minimize_det_reallocation = true; dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); From 3f96f545f877ac59d0c967f52d760b4b2b3b9a47 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 7 Jun 2024 22:09:53 -0600 Subject: [PATCH 0223/1523] drm/amd/display: Fix possible overflow in integer multiplication [WHAT & HOW] Integer multiplies integer may overflow in context that expects an expression of unsigned long long (64 bits). This can be fixed by casting integer to unsigned long long to force 64 bits results. This fixes 2 OVERFLOW_BEFORE_WIDEN issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/resource/dcn32/dcn32_resource_helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index 47c8a9fbe754..f5a4e97c40ce 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -663,7 +663,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } @@ -724,7 +724,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } From 722e96c99f1d7532fdfbb557f50a399f6cc57d82 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 10:24:58 -0600 Subject: [PATCH 0224/1523] drm/amd/display: Check null pointers before using them [WHAT & HOW] dc_link is null checked previously in the same function, indicating it might be null as reported by Coverity. This fixes 1 FORWARD_NULL issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 42ca9b52fe5e..b76579815bdf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12083,7 +12083,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } } - as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); + if (amdgpu_dm_connector->dc_link) + as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); From 58a8ee96f84d2c21abb85ad8c22d2bbdf59bd7a9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 10:37:28 -0600 Subject: [PATCH 0225/1523] drm/amd/display: Check stream_status before it is used [WHAT & HOW] dc_state_get_stream_status can return null, and therefore null must be checked before stream_status is used. This fixes 1 NULL_RETURNS issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 95d6e29d5e47..4c9e420742a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3748,7 +3748,7 @@ static void commit_planes_for_stream_fast(struct dc *dc, surface_count, stream, context); - } else { + } else if (stream_status) { build_dmub_cmd_list(dc, srf_updates, surface_count, From 739d0f3e1f36738d4cd84166784a8f7a58d69612 Mon Sep 17 00:00:00 2001 From: Michael Chen Date: Tue, 23 Jul 2024 17:45:23 -0400 Subject: [PATCH 0226/1523] drm/amdgpu: increase mes log buffer size for gfx12 MES firmware requires larger log buffer for gfx12. Allocate proper buffer respectively for gfx11 and gfx12. Signed-off-by: Michael Chen Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 6 +++--- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 8 ++++++-- drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 +++ drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 +++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b..dac88d2dd70d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -1573,7 +1573,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71..2d659c612f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1..f9343642ae7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1163,6 +1163,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad59..0713bc3eb263 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -551,8 +551,10 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } return mes_v12_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), @@ -1237,6 +1239,8 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d362251..21ceafce1f9b 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3..4cf2c9f30b3d 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ From 9724b8494d3e85cb01c377f201bfaf25fd7d38ff Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jul 2024 13:47:43 -0400 Subject: [PATCH 0227/1523] drm/amd/display: restore immediate_disable_crtc for w/a [why] immediate_disable_crtc does not reset ODM. if switching to disable_crtc which will disable ODM as well. i.e. need to restore ODM mem cfg at reenable it at end of w/a. Signed-off-by: Charlene Liu Reviewed-by: Xi (Alex) Liu Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 4 ++-- drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 2d5bd5c7ab94..e075b2720f96 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -149,8 +149,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { /* This w/a should not trigger when we have a dig active */ if (disable) { - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) - pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 6bbbf313b2bb..4b6446ed4ce4 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -149,7 +149,9 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } - +/* + * Immediate_Disable_Crtc - this is to temp disable Timing generator without reset ODM. + */ bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -162,10 +164,12 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc) VTG0_ENABLE, 0); /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, + if (optc->ctx->dce_environment != DCE_ENV_DIAG) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + /* clear the false state */ optc1_clear_optc_underflow(optc); From 093b79d034c59a3b66f6312863502378a422496d Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jul 2024 15:58:35 -0400 Subject: [PATCH 0228/1523] drm/amd/display: sync dmub output event type. [why] dmubfw added a new event type, update amdgpu to avoid "notify type 6 invalid" Signed-off-by: Charlene Liu Reviewed-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b76579815bdf..5b1e313a48e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -877,6 +877,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) "HPD_IRQ", "SET_CONFIGC_REPLY", "DPIA_NOTIFICATION", + "HPD_SENSE_NOTIFY", }; do { From 4981bd4749fa769b3d6acbc82fe8059c030d0920 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 12 Jul 2024 18:02:30 +0800 Subject: [PATCH 0229/1523] drm/amd/display: Add a missing PSR state [Why & How] Add a missing PSR state to make the dmub_psr_get_state() return a correct PSR state. Signed-off-by: Tom Chung Reviewed-by: Sun peng Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index c550e8997033..97279b080f3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -590,6 +590,7 @@ enum dc_psr_state { PSR_STATE5c, PSR_STATE_HWLOCK_MGR, PSR_STATE_POLLVUPDATE, + PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME, PSR_STATE_INVALID = 0xFF }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 0f3d15126a1e..cae18f8c1c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -94,6 +94,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) state = PSR_STATE_HWLOCK_MGR; else if (raw_state == 0x61) state = PSR_STATE_POLLVUPDATE; + else if (raw_state == 0x62) + state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME; else state = PSR_STATE_INVALID; From df18a4de9e77ad92c472fd1eb0fb1255d52dd4cd Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 12 Jul 2024 17:29:07 +0800 Subject: [PATCH 0230/1523] drm/amd/display: Reset VRR config during resume [Why] After resume the system, the new_crtc_state->vrr_infopacket does not synchronize with the current state. It will affect the update_freesync_state_on_stream() does not update the state correctly. The previous patch causes a PSR SU regression that cannot let panel go into self-refresh mode. [How] Reset the VRR config during resume to force update the VRR config later. Fixes: eb6dfbb7a9c6 ("drm/amd/display: Reset freesync config before update new state") Signed-off-by: Tom Chung Reviewed-by: Sun peng Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5b1e313a48e8..b4fbccbf2f29 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -176,6 +176,7 @@ MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB); static int amdgpu_dm_init(struct amdgpu_device *adev); static void amdgpu_dm_fini(struct amdgpu_device *adev); static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); +static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) { @@ -3213,8 +3214,11 @@ static int dm_resume(void *handle) drm_connector_list_iter_end(&iter); /* Force mode set in atomic commit */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { new_crtc_state->active_changed = true; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + reset_freesync_config_for_crtc(dm_new_crtc_state); + } /* * atomic_check is expected to create the dc states. We need to release From bd870cfd21489d28195fda157710ebd4cecaa8ca Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 16 Jul 2024 17:41:54 -0400 Subject: [PATCH 0231/1523] drm/amd/display: Add seamless boot support for more DIG operation modes [Why] When pre-OS firmware enables display support for displays that operate the DIG in 2 pixels per cycle processing modes the inferred pixel rate from get_pixel_clk_frequency_100hz does not account for the true pixel rate since we're outputting 2 per cycle past the stream encoder. This causes seamless boot validation to abort early. [How] Add a new stream encoder function for getting pixels per cycle from the stream encoder. If the pixels per cycle is greater than 1 and the driver policy is to enable 2 pixels per cycle for post-OS then allow seamless boot to continue. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++++++++++-- .../dc/dio/dcn35/dcn35_dio_stream_encoder.c | 19 +++++++++++++++++++ .../amd/display/dc/inc/hw/stream_encoder.h | 1 + 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4c9e420742a2..b1253e4c81a8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1823,10 +1823,18 @@ bool dc_validate_boot_timing(const struct dc *dc, tg->funcs->get_optc_source(tg, &numOdmPipes, &id_src[0], &id_src[1]); - if (numOdmPipes == 2) + if (numOdmPipes == 2) { pix_clk_100hz *= 2; - if (numOdmPipes == 4) + } else if (numOdmPipes == 4) { pix_clk_100hz *= 4; + } else if (se && se->funcs->get_pixels_per_cycle) { + uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se); + + if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) + return false; + + pix_clk_100hz *= pixels_per_cycle; + } // Note: In rare cases, HW pixclk may differ from crtc's pixclk // slightly due to rounding issues in 10 kHz units. diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 19e50fbf908d..6ab2a218b769 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -422,6 +422,24 @@ void enc35_enable_fifo(struct stream_encoder *enc) REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); } +static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value); + + switch (value) { + case 0: + return 1; + case 1: + return 2; + default: + ASSERT_CRITICAL(false); + return 1; + } +} + static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .dp_set_odm_combine = enc314_dp_set_odm_combine, @@ -474,6 +492,7 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .disable_fifo = enc35_disable_fifo, .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, + .get_pixels_per_cycle = enc35_get_pixels_per_cycle, }; void dcn35_dio_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 6fe42120738d..fe7f3137f228 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -273,6 +273,7 @@ struct stream_encoder_funcs { void (*disable_fifo)(struct stream_encoder *enc); bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); + uint32_t (*get_pixels_per_cycle)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { From cead9ac8b0992ae2659b637e86a0da7cfeb5e267 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 16 Jul 2024 13:39:10 -0400 Subject: [PATCH 0232/1523] drm/amd/display: Use correct cm_helper function Need to use cm3_helper function with DCN401 to avoid cases where high RGB component values can get set to zero if using the TF curve generated by cm_helper. Signed-off-by: Ilya Bakoulin Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 0fa610590245..8e1ca709d304 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -497,6 +497,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; bool is_17x17x17 = true; + bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); @@ -506,11 +507,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.lut1d_func, &dpp_base->regamma_params, false); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -527,11 +527,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = &mcm_luts.shaper->pwl; else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { ASSERT(false); - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.shaper, &dpp_base->regamma_params, true); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -669,6 +668,7 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; bool result = true; const struct pwl_params *lut_params = NULL; + bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; @@ -677,10 +677,9 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, if (plane_state->blend_tf.type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf.pwl; else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->blend_tf, + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, &dpp_base->regamma_params, false); - lut_params = &dpp_base->regamma_params; + lut_params = rval ? &dpp_base->regamma_params : NULL; } result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); lut_params = NULL; @@ -693,10 +692,9 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->in_shaper_func, + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, &dpp_base->shaper_params, true); - lut_params = &dpp_base->shaper_params; + lut_params = rval ? &dpp_base->shaper_params : NULL; } result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); From a15268787b79fd183dd526cc16bec9af4f4e49a1 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 17 Jul 2024 09:17:56 -0600 Subject: [PATCH 0233/1523] drm/amd/display: Avoid overflow assignment in link_dp_cts sampling_rate is an uint8_t but is assigned an unsigned int, and thus it can overflow. As a result, sampling_rate is changed to uint32_t. Similarly, LINK_QUAL_PATTERN_SET has a size of 2 bits, and it should only be assigned to a value less or equal than 4. This fixes 2 INTEGER_OVERFLOW issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 2 +- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 ++- drivers/gpu/drm/amd/display/include/dpcd_defs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 519c3df78ee5..95c275bf649b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -727,7 +727,7 @@ struct dp_audio_test_data_flags { struct dp_audio_test_data { struct dp_audio_test_data_flags flags; - uint8_t sampling_rate; + uint32_t sampling_rate; uint8_t channel_count; uint8_t pattern_type; uint8_t pattern_period[8]; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index df3781081da7..32d5a4b14333 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -775,7 +775,8 @@ bool dp_set_test_pattern( core_link_read_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); - training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; + if (pattern <= PHY_TEST_PATTERN_END_DP11) + training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index aee5170f5fb2..c246235e4afe 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -76,6 +76,7 @@ enum dpcd_phy_test_patterns { PHY_TEST_PATTERN_D10_2, PHY_TEST_PATTERN_SYMBOL_ERROR, PHY_TEST_PATTERN_PRBS7, + PHY_TEST_PATTERN_END_DP11 = PHY_TEST_PATTERN_PRBS7, PHY_TEST_PATTERN_80BIT_CUSTOM,/* For DP1.2 only */ PHY_TEST_PATTERN_CP2520_1, PHY_TEST_PATTERN_CP2520_2, From 13d8850a3387635c051c5ed1c8a8b6c1e9bd1341 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 26 Jul 2024 18:07:41 +0530 Subject: [PATCH 0234/1523] drm/amdgpu: trigger ip dump before suspend of IP's Problem: IP dump right now is done post suspend of all IP's which for some IP's could change power state and software state too which we do not want to reflect in the dump as it might not be same at the time of hang. Solution: IP should be dumped as close to the HW state when the GPU was in hung state without trying to reinitialize any resource. Acked-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 60 +++++++++++----------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 730dae77570c..a2a1a3da17e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5277,11 +5277,29 @@ mode1_reset_failed: return ret; } +static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) +{ + int i; + + lockdep_assert_held(&adev->reset_domain->sem); + + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); + + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); + } + + return 0; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { int i, r = 0; struct amdgpu_job *job = NULL; + struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -5340,6 +5358,18 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } } + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + amdgpu_reset_reg_dumps(tmp_adev); + + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } + if (need_full_reset) r = amdgpu_device_ip_suspend(adev); if (need_full_reset) @@ -5352,47 +5382,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - - dev_info(tmp_adev->dev, "Dumping IP State\n"); - /* Trigger ip dump before we reset the asic */ - for (i = 0; i < tmp_adev->num_ip_blocks; i++) - if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) - tmp_adev->ip_blocks[i].version->funcs - ->dump_ip_state((void *)tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); - } - reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ From 608d886c978cd5f3d8650630568d96c231845227 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Jul 2024 17:30:37 -0400 Subject: [PATCH 0235/1523] drm/amdgpu: Fix APU handling in amdgpu_pm_load_smu_firmware() We only need to skip this on modern APUs. It's required on older APUs as it's where start_smu gets called from. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3502 Fixes: 064d92436b69 ("drm/amd/pm: avoid to load smu firmware for APUs") Reviewed-by: Tim Huang Signed-off-by: Alex Deucher Cc: Tim Huang --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf..8b7d6ed7e2ed 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); From aeb81b62c7fe4782198e9dd79c7d6cdf04d92586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 26 Jul 2024 15:40:15 +0200 Subject: [PATCH 0236/1523] drm/amdgpu: convert bios_hardcoded_edid to drm_edid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of manually passing around 'struct edid *' and its size, use 'struct drm_edid', which encapsulates a validated combination of both. As the drm_edid_ can handle NULL gracefully, the explicit checks can be dropped. Also save a few characters by transforming '&array[0]' to the equivalent 'array' and using 'max_t(int, ...)' instead of manual casts. Signed-off-by: Thomas Weißschuh Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 6 +----- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 2 +- drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 17 ++++++----------- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 8 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index bd0fbdc5f55d..344e0a9ee08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -249,11 +249,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { - if (adev->mode_info.bios_hardcoded_edid) { - return kmemdup((unsigned char *)adev->mode_info.bios_hardcoded_edid, - adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid)); } static void amdgpu_connector_get_edid(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d002b845d8ac..5e3faefc5510 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -51,6 +51,7 @@ struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; struct edid; +struct drm_edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -326,8 +327,7 @@ struct amdgpu_mode_info { /* FMT dithering */ struct drm_property *dither_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u32 firmware_flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6415d0d039e1..e5f508d34ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -549,7 +549,7 @@ static int amdgpu_vkms_sw_fini(void *handle) adev->mode_info.mode_config_initialized = false; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index ebf83fee43bb..8defca3705d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2064,23 +2064,18 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; + const struct drm_edid *edid; int edid_size; if (fake_edid_record->ucFakeEDIDLength == 128) edid_size = fake_edid_record->ucFakeEDIDLength; else edid_size = fake_edid_record->ucFakeEDIDLength * 128; - edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], - edid_size, GFP_KERNEL); - if (edid) { - if (drm_edid_is_valid(edid)) { - adev->mode_info.bios_hardcoded_edid = edid; - adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else { - kfree(edid); - } - } + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + adev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); record += struct_size(fake_edid_record, ucFakeEDIDString, edid_size); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index dddb5fe16f2c..742adbc460c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2846,7 +2846,7 @@ static int dce_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 11780e4d7e9f..8d46ebadfa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2973,7 +2973,7 @@ static int dce_v11_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 05c0df97f01d..f08dc6a3886f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2745,7 +2745,7 @@ static int dce_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index dc73e301d937..a6a3adf2ae13 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2766,7 +2766,7 @@ static int dce_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); From c6bb3acf1cdeed5d01ebde70b769a8e25993965d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 26 Jul 2024 15:40:16 +0200 Subject: [PATCH 0237/1523] drm/radeon: convert bios_hardcoded_edid to drm_edid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of manually passing around 'struct edid *' and its size, use 'struct drm_edid', which encapsulates a validated combination of both. As the drm_edid_ can handle NULL gracefully, the explicit checks can be dropped. Also save a few characters by transforming '&array[0]' to the equivalent 'array' and using 'max_t(int, ...)' instead of manual casts. Signed-off-by: Thomas Weißschuh Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_atombios.c | 17 +++++--------- drivers/gpu/drm/radeon/radeon_combios.c | 26 +++++----------------- drivers/gpu/drm/radeon/radeon_connectors.c | 4 ++-- drivers/gpu/drm/radeon/radeon_display.c | 2 +- drivers/gpu/drm/radeon/radeon_mode.h | 4 ++-- 5 files changed, 16 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 168f3f94003b..81a0a91921b9 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1716,23 +1716,18 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; + const struct drm_edid *edid; int edid_size; if (fake_edid_record->ucFakeEDIDLength == 128) edid_size = fake_edid_record->ucFakeEDIDLength; else edid_size = fake_edid_record->ucFakeEDIDLength * 128; - edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], - edid_size, GFP_KERNEL); - if (edid) { - if (drm_edid_is_valid(edid)) { - rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else { - kfree(edid); - } - } + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + rdev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); record += struct_size(fake_edid_record, ucFakeEDIDString, edid_size); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 41ddc576f8f8..df8d7f56b028 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -370,7 +370,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) { int edid_info, size; - struct edid *edid; + const struct drm_edid *edid; unsigned char *raw; edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) @@ -378,19 +378,14 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) raw = rdev->bios + edid_info; size = EDID_LENGTH * (raw[0x7e] + 1); - edid = kmalloc(size, GFP_KERNEL); - if (edid == NULL) - return false; + edid = drm_edid_alloc(raw, size); - memcpy((unsigned char *)edid, raw, size); - - if (!drm_edid_is_valid(edid)) { - kfree(edid); + if (!drm_edid_valid(edid)) { + drm_edid_free(edid); return false; } rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = size; return true; } @@ -398,18 +393,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) struct edid * radeon_bios_get_hardcoded_edid(struct radeon_device *rdev) { - struct edid *edid; - - if (rdev->mode_info.bios_hardcoded_edid) { - edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - if (edid) { - memcpy((unsigned char *)edid, - (unsigned char *)rdev->mode_info.bios_hardcoded_edid, - rdev->mode_info.bios_hardcoded_edid_size); - return edid; - } - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(rdev->mode_info.bios_hardcoded_edid)); } static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 880edabfc9e3..528a8f3677c2 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1059,7 +1059,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force) */ if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { ret = connector_status_connected; } @@ -1392,7 +1392,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) out: if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { radeon_connector->use_digital = true; ret = connector_status_connected; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 10fd58f400bc..8f5f8abcb1b4 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1658,7 +1658,7 @@ void radeon_modeset_fini(struct radeon_device *rdev) rdev->mode_info.mode_config_initialized = false; } - kfree(rdev->mode_info.bios_hardcoded_edid); + drm_edid_free(rdev->mode_info.bios_hardcoded_edid); /* free i2c buses */ radeon_i2c_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e0a5af180801..421c83fc70dc 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -39,6 +39,7 @@ #include struct edid; +struct drm_edid; struct radeon_bo; struct radeon_device; @@ -262,8 +263,7 @@ struct radeon_mode_info { /* Output CSC */ struct drm_property *output_csc_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u16 firmware_flags; From b5126ba85beadfa1f3cfdc9e7a8533ad444ca210 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Wed, 17 Jul 2024 18:29:04 -0400 Subject: [PATCH 0238/1523] drm/amd/display: Add new enable and disable functions Add new enable and disable functions based on DCCG spec. Signed-off-by: Hansen Dsouza Reviewed-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index b698b773338a..7d88b0ae241c 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -732,6 +732,206 @@ static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_so } } +static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst) +{ + uint32_t enable = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, &enable); + break; + case 1: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, &enable); + break; + case 2: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, &enable); + break; + case 3: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, &enable); + break; + case 4: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, &enable); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + return enable; +} + +static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) +{ + uint32_t disable_l1 = 0; + uint32_t disable_l2 = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2); + break; + case 1: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2); + break; + case 2: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2); + break; + case 3: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2); + break; + default: + BREAK_TO_DEBUGGER(); + return 0; + } + + /* return true if either block level or DCCG level gating is active */ + return (disable_l1 | disable_l2); +} + +static void dccg35_enable_symclk_fe_new( + struct dccg *dccg, + int inst, + enum physymclk_fe_source src) +{ + dccg35_set_physymclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_src_new(dccg, src, inst); +} + +static void dccg35_disable_symclk_fe_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk_fe_src_new(dccg, PHYSYMCLK_FE_REFCLK, inst); + dccg35_set_physymclk_fe_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk_be_new( + struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk_be_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_physymclk_src_new(dccg, inst, PHYSYMCLK_REFCLK); + + /* Check if any other SE connected LE and disable them */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_fe_rcg(dccg, i) == 0) { + if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst)) + dccg35_disable_symclk_fe_new(dccg, i); + } + } + /* Safe to RCG SYMCLK*/ + dccg35_set_physymclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_se_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src) +{ + dccg35_set_symclk32_se_rcg(dccg, inst, false); + dccg35_set_symclk32_se_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_se_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst); + dccg35_set_symclk32_se_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_le_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + dccg35_set_symclk32_le_rcg(dccg, inst, false); + dccg35_set_symclk32_le_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_le_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK); + + /* Check if any SE are connected and disable SE as well */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) { + /* Disable and SE connected to this LE before RCG */ + if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst)) + dccg35_disable_symclk32_se_new(dccg, i); + } + } + /* Safe to RCG SYM32_LE*/ + dccg35_set_symclk32_le_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpp_new( + struct dccg *dccg, + int inst, + enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dccg35_set_dppclk_rcg(dccg, inst, false); + dcn35_set_dppclk_src_new(dccg, inst, src); + /* Switch DPP clock to DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0xFF, + DPPCLK0_DTO_MODULO, 0xFF); +} + +static void dccg35_disable_dpp_new( + struct dccg *dccg, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK); + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0, + DPPCLK0_DTO_MODULO, 1); + dccg35_set_dppclk_rcg(dccg, inst, true); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1753,6 +1953,18 @@ struct dccg *dccg35_create( (void)&dccg35_set_physymclk_src_new; (void)&dccg35_is_symclk_fe_src_functional_be; (void)&dccg35_set_symclk_fe_src_new; + (void)&dccg35_is_fe_rcg; + (void)&dccg35_is_symclk32_se_rcg; + (void)&dccg35_enable_symclk_fe_new; + (void)&dccg35_disable_symclk_fe_new; + (void)&dccg35_enable_symclk_be_new; + (void)&dccg35_disable_symclk_be_new; + (void)&dccg35_enable_symclk32_se_new; + (void)&dccg35_disable_symclk32_se_new; + (void)&dccg35_enable_symclk32_le_new; + (void)&dccg35_disable_symclk32_le_new; + (void)&dccg35_enable_dpp_new; + (void)&dccg35_disable_dpp_new; base = &dccg_dcn->base; base->ctx = ctx; From c3f15273721f2ee60d32fc7d4f2c233a1eff47a8 Mon Sep 17 00:00:00 2001 From: Cruise Date: Thu, 18 Jul 2024 12:55:02 +0800 Subject: [PATCH 0239/1523] drm/amd/display: Add logs for debugging outbox The DP tunnel AUX reply is received through Outbox1. Print the Outbox1 status if an issue occurs. Signed-off-by: Cruise Reviewed-by: Nicholas Kazlauskas Reviewed-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 +++ drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 3 +++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 4 ++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c | 4 ++++ 6 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index fb3391854eed..41270fade5f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -979,6 +979,9 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr); DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr); DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size); + DC_LOG_DEBUG(" outbox1_rptr : %08x", diag_data.outbox1_rptr); + DC_LOG_DEBUG(" outbox1_wptr : %08x", diag_data.outbox1_wptr); + DC_LOG_DEBUG(" outbox1_size : %08x", diag_data.outbox1_size); DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled); DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset); DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset); diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 6589bb9aea6b..cd70453aeae0 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -330,6 +330,9 @@ struct dmub_diagnostic_data { uint32_t inbox0_rptr; uint32_t inbox0_wptr; uint32_t inbox0_size; + uint32_t outbox1_rptr; + uint32_t outbox1_wptr; + uint32_t outbox1_size; uint32_t gpint_datain0; struct dmub_srv_debug timeout_info; uint8_t is_dmcub_enabled : 1; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 662c34e9495c..d9f31b191c69 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -449,6 +449,10 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e1da270502cc..9600b7f858b0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -459,6 +459,10 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 916ed022e96b..746696b6f09a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -502,6 +502,10 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index cf139e9cc20e..39a8cb6d7523 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -444,6 +444,10 @@ void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnost diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; From 6eb1fe53ee94cffd7187844d08d46ba8659d667a Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 16 Jul 2024 14:58:06 -0600 Subject: [PATCH 0240/1523] drm/amd/display: Check null values from functions Functions get_per_method_common_meta and get_expanded_strategy_list can return null and thus it is necessary to check their returned values before dereferencing. This fixes 3 NULL_RETURNS issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index dddb21818f8a..9331a8fe77c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1199,12 +1199,17 @@ static bool is_timing_group_schedulable( /* init allow start and end lines for timing group */ stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); + if (!stream_method_fams2_meta) + return false; + group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); + if (!stream_method_fams2_meta) + return false; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1768,6 +1773,9 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp build_synchronized_timing_groups(pmo, display_config); strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + if (!strategy_list) + return false; + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); if (strategy_list_size == 0) From 4067f4fa0423a89fb19a30b57231b384d77d2610 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 15 Jul 2024 09:57:01 -0600 Subject: [PATCH 0241/1523] drm/amd/display: Initialize get_bytes_per_element's default to 1 Variables, used as denominators and maybe not assigned to other values, should not be 0. bytes_per_element_y & bytes_per_element_c are initialized by get_bytes_per_element() which should never return 0. This fixes 10 DIVIDE_BY_ZERO issues reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 2 +- .../gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 3d95bfa5aca2..ae5251041728 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 98502a4f0567..9e1c18b90805 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -53,7 +53,7 @@ static void calculate_ttu_cursor( static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) From 31663521ede2edb622ee1b397ae3ac666d6351c5 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 18 Jul 2024 11:53:31 -0400 Subject: [PATCH 0242/1523] drm/amd/display: Use gpuvm_min_page_size_kbytes for DML2 surfaces [Why] It's currently hard coded to 256 when it should be using the SOC provided values. This can result in corruption with linear surfaces where we prefetch more PTE than the buffer can hold. [How] Update the min page size correctly for the plane. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_translation_helper.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 25d4ef040173..7e39873832bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -954,7 +954,9 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); } -static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) { dml_uint_t width, height; @@ -971,7 +973,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = width; out->ViewportHeight[location] = height; @@ -1008,7 +1010,9 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) { struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); if (!scaler_data) @@ -1019,7 +1023,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = scaler_data->viewport.width; out->ViewportHeight[location] = scaler_data->viewport.height; @@ -1332,7 +1336,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat disp_cfg_plane_location = dml_dispcfg->num_surfaces++; populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); - populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; @@ -1348,7 +1353,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); - populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; From c9bfc37f085aa180b3c49b9c95756b9ef032243e Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 18 Jul 2024 22:42:06 -0400 Subject: [PATCH 0243/1523] drm/amd/display: Add new enable and disable functions for DCN35 Add new enable and disable functions based on DCCG spec. Signed-off-by: Hansen Dsouza Reviewed-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 7d88b0ae241c..bd3757de51c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -932,6 +932,53 @@ static void dccg35_disable_dpp_new( dccg35_set_dppclk_rcg(dccg, inst, true); } +static void dccg35_disable_dscclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK); + dccg35_set_dsc_clk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dscclk_new(struct dccg *dccg, + int inst, + enum dsc_clk_source src) +{ + dccg35_set_dsc_clk_rcg(dccg, inst, false); + dccg35_set_dsc_clk_src_new(dccg, inst, src); +} + +static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + +static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); + dccg35_set_dtbclk_p_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); + dccg35_set_dpstreamclk_rcg(dccg, inst, true); +} + +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dpstreamclk_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -1965,7 +2012,12 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk32_le_new; (void)&dccg35_enable_dpp_new; (void)&dccg35_disable_dpp_new; - + (void)&dccg35_disable_dscclk_new; + (void)&dccg35_enable_dscclk_new; + (void)&dccg35_enable_dtbclk_p_new; + (void)&dccg35_disable_dtbclk_p_new; + (void)&dccg35_enable_dpstreamclk_new; + (void)&dccg35_disable_dpstreamclk_new; base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 5ce86c6c861352c9346ebb5c96ed70cb67414aa3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 27 Jul 2024 23:02:59 +0900 Subject: [PATCH 0244/1523] rust: suppress error messages from CONFIG_{RUSTC,BINDGEN}_VERSION_TEXT While this is a somewhat unusual case, I encountered odd error messages when I ran Kconfig in a foreign architecture chroot. $ make allmodconfig sh: 1: rustc: not found sh: 1: bindgen: not found # # configuration written to .config # The successful execution of 'command -v rustc' does not necessarily mean that 'rustc --version' will succeed. $ sh -c 'command -v rustc' /home/masahiro/.cargo/bin/rustc $ sh -c 'rustc --version' sh: 1: rustc: not found Here, 'rustc' is built for x86, and I ran it in an arm64 system. The current code: command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n can be turned into: command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version 2>/dev/null || echo n However, I did not understand the necessity of 'command -v $(RUSTC)'. I simplified it to: $(RUSTC) --version 2>/dev/null || echo n Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240727140302.1806011-1-masahiroy@kernel.org [ Rebased on top of v6.11-rc1. - Miguel ] Signed-off-by: Miguel Ojeda --- init/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index a465ea9525bd..cddeec9fcb72 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1919,7 +1919,7 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default $(shell,$(RUSTC) --version 2>/dev/null || echo n) config BINDGEN_VERSION_TEXT string @@ -1927,7 +1927,7 @@ config BINDGEN_VERSION_TEXT # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when # the minimum version is upgraded past that (0.69.1 already fixed the issue). - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version workaround-for-0.69.0 || echo n) + default $(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null || echo n) # # Place an empty function call at each tracepoint site. Can be From aacf93e87f0d808ef46e621aa56caea336b4433c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 27 Jul 2024 23:03:00 +0900 Subject: [PATCH 0245/1523] rust: fix the default format for CONFIG_{RUSTC,BINDGEN}_VERSION_TEXT Another oddity in these config entries is their default value can fall back to 'n', which is a value for bool or tristate symbols. The '|| echo n' is an incorrect workaround to avoid the syntax error. This is not a big deal, as the entry is hidden by 'depends on RUST' in situations where '$(RUSTC) --version' or '$(BINDGEN) --version' fails. Anyway, it looks odd. The default of a string type symbol should be a double-quoted string literal. Turn it into an empty string when the version command fails. Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240727140302.1806011-2-masahiroy@kernel.org [ Rebased on top of v6.11-rc1. - Miguel ] Signed-off-by: Miguel Ojeda --- init/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index cddeec9fcb72..3ada33b1d681 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1919,7 +1919,7 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,$(RUSTC) --version 2>/dev/null || echo n) + default "$(shell,$(RUSTC) --version 2>/dev/null)" config BINDGEN_VERSION_TEXT string @@ -1927,7 +1927,7 @@ config BINDGEN_VERSION_TEXT # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when # the minimum version is upgraded past that (0.69.1 already fixed the issue). - default $(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null || echo n) + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be From 37c526f00bc1c4f847fc800085f8f009d2e11be6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 10 Jan 2022 09:28:56 -0800 Subject: [PATCH 0246/1523] i2c: smbus: Improve handling of stuck alerts The following messages were observed while testing alert functionality on systems with multiple I2C devices on a single bus if alert was active on more than one chip. smbus_alert 3-000c: SMBALERT# from dev 0x0c, flag 0 smbus_alert 3-000c: no driver alert()! and: smbus_alert 3-000c: SMBALERT# from dev 0x28, flag 0 Once it starts, this message repeats forever at high rate. There is no device at any of the reported addresses. Analysis shows that this is seen if multiple devices have the alert pin active. Apparently some devices do not support SMBus arbitration correctly. They keep sending address bits after detecting an address collision and handle the collision not at all or too late. Specifically, address 0x0c is seen with ADT7461A at address 0x4c and ADM1021 at address 0x18 if alert is active on both chips. Address 0x28 is seen with ADT7483 at address 0x2a and ADT7461 at address 0x4c if alert is active on both chips. Once the system is in bad state (alert is set by more than one chip), it often only recovers by power cycling. To reduce the impact of this problem, abort the endless loop in smbus_alert() if the same address is read more than once and not handled by a driver. Fixes: b5527a7766f0 ("i2c: Add SMBus alert support") Signed-off-by: Guenter Roeck [wsa: it also fixed an interrupt storm in one of my experiments] Tested-by: Wolfram Sang [wsa: rebased, moved a comment as well, improved the 'invalid' value] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-smbus.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 7e4203df83ed..836c247e7684 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -34,6 +34,7 @@ static int smbus_do_alert(struct device *dev, void *addrp) struct i2c_client *client = i2c_verify_client(dev); struct alert_data *data = addrp; struct i2c_driver *driver; + int ret; if (!client || client->addr != data->addr) return 0; @@ -47,16 +48,21 @@ static int smbus_do_alert(struct device *dev, void *addrp) device_lock(dev); if (client->dev.driver) { driver = to_i2c_driver(client->dev.driver); - if (driver->alert) + if (driver->alert) { + /* Stop iterating after we find the device */ driver->alert(client, data->type, data->data); - else + ret = -EBUSY; + } else { dev_warn(&client->dev, "no driver alert()!\n"); - } else + ret = -EOPNOTSUPP; + } + } else { dev_dbg(&client->dev, "alert with no driver\n"); + ret = -ENODEV; + } device_unlock(dev); - /* Stop iterating after we find the device */ - return -EBUSY; + return ret; } /* @@ -67,6 +73,7 @@ static irqreturn_t smbus_alert(int irq, void *d) { struct i2c_smbus_alert *alert = d; struct i2c_client *ara; + unsigned short prev_addr = I2C_CLIENT_END; /* Not a valid address */ ara = alert->ara; @@ -94,8 +101,19 @@ static irqreturn_t smbus_alert(int irq, void *d) data.addr, data.data); /* Notify driver for the device which issued the alert */ - device_for_each_child(&ara->adapter->dev, &data, - smbus_do_alert); + status = device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert); + /* + * If we read the same address more than once, and the alert + * was not handled by a driver, it won't do any good to repeat + * the loop because it will never terminate. + * Bail out in this case. + * Note: This assumes that a driver with alert handler handles + * the alert properly and clears it if necessary. + */ + if (data.addr == prev_addr && status != -EBUSY) + break; + prev_addr = data.addr; } return IRQ_HANDLED; From 0ba521d6948ecb4acf1276494dfed127fe096ca6 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 25 Jul 2024 20:46:44 +0200 Subject: [PATCH 0247/1523] rust: macros: indent list item in `module!`'s docs Like commit e516211f615f ("rust: macros: indent list item in `paste!`'s docs"), but for `module!`. Reviewed-by: Trevor Gross Link: https://lore.kernel.org/r/20240725184644.135185-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/macros/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 159e75292970..5be0cb9db3ee 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -94,7 +94,7 @@ use proc_macro::TokenStream; /// - `license`: ASCII string literal of the license of the kernel module (required). /// - `alias`: array of ASCII string literals of the alias names of the kernel module. /// - `firmware`: array of ASCII string literals of the firmware files of -/// the kernel module. +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) From 4ddd51ccff911a2e9e961307692532a325f6c78a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 25 Jul 2024 16:54:53 +0800 Subject: [PATCH 0248/1523] ASoC: fsl_micfil: Expand the range of FIFO watermark mask On the i.MX9x platforms, the mask of FIFO watermark is 0x1F, on i.MX8x platforms, the mask of FIFO watermark is 0X7. So use the mask 0x1F for all platforms to make them compatible. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/1721897694-6088-2-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 2 +- sound/soc/fsl/fsl_micfil.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 0d37edb70261..96a6b88d0d67 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -831,7 +831,7 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { {REG_MICFIL_CTRL1, 0x00000000}, {REG_MICFIL_CTRL2, 0x00000000}, {REG_MICFIL_STAT, 0x00000000}, - {REG_MICFIL_FIFO_CTRL, 0x00000007}, + {REG_MICFIL_FIFO_CTRL, 0x0000001F}, {REG_MICFIL_FIFO_STAT, 0x00000000}, {REG_MICFIL_DATACH0, 0x00000000}, {REG_MICFIL_DATACH1, 0x00000000}, diff --git a/sound/soc/fsl/fsl_micfil.h b/sound/soc/fsl/fsl_micfil.h index c6b902ba0a53..b7798a7cbf2a 100644 --- a/sound/soc/fsl/fsl_micfil.h +++ b/sound/soc/fsl/fsl_micfil.h @@ -72,7 +72,7 @@ #define MICFIL_STAT_CHXF(ch) BIT(ch) /* MICFIL FIFO Control Register -- REG_MICFIL_FIFO_CTRL 0x10 */ -#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(2, 0) +#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(4, 0) /* MICFIL FIFO Status Register -- REG_MICFIL_FIFO_STAT 0x14 */ #define MICFIL_FIFO_STAT_FIFOX_OVER(ch) BIT(ch) From aa4f76ef09a993efa9b5fab6ddf5d6d324baaea3 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 25 Jul 2024 16:54:54 +0800 Subject: [PATCH 0249/1523] ASoC: fsl_micfil: Differentiate register access permission for platforms On i.MX9x platforms, the REG_MICFIL_FSYNC_CTRL, REG_MICFIL_VERID, REG_MICFIL_PARAM are added, but they are not existed on i.MX8x platforms. Use the existed micfil->soc->use_verid to distinguish the access permission for these platforms. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/1721897694-6088-3-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 96a6b88d0d67..22b240a70ad4 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -855,6 +855,8 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -872,9 +874,6 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: - case REG_MICFIL_FSYNC_CTRL: - case REG_MICFIL_VERID: - case REG_MICFIL_PARAM: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: @@ -883,6 +882,12 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NDATA: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + case REG_MICFIL_VERID: + case REG_MICFIL_PARAM: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } @@ -890,6 +895,8 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -899,7 +906,6 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: /* Write 1 to Clear */ - case REG_MICFIL_FSYNC_CTRL: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: /* Write 1 to Clear */ @@ -907,6 +913,10 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NCONFIG: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } From aebb1813c279ce8f3a2dfa3f86def0c0ec1cbb8d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:41 +0200 Subject: [PATCH 0250/1523] ASoC: codecs: wcd937x-sdw: Correct Soundwire ports mask Device has up to WCD937X_MAX_TX_SWR_PORTS (or WCD937X_MAX_SWR_PORTS for sink) number of ports and the array assigned to prop.src_dpn_prop and prop.sink_dpn_prop has 0..WCD937X_MAX_TX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WCD937X_MAX_TX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: c99a515ff153 ("ASoC: codecs: wcd937x-sdw: add SoundWire driver") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-1-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd937x-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd937x-sdw.c b/sound/soc/codecs/wcd937x-sdw.c index 3abc8041406a..0c33f7f3dc25 100644 --- a/sound/soc/codecs/wcd937x-sdw.c +++ b/sound/soc/codecs/wcd937x-sdw.c @@ -1049,7 +1049,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; @@ -1062,7 +1062,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, /* Start in cache-only until device is enumerated */ regcache_cache_only(wcd->regmap, true); } else { - pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_rx_ch_info[0]; } From 3f6fb03dae9c7dfba7670858d29e03c8faaa89fe Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:42 +0200 Subject: [PATCH 0251/1523] ASoC: codecs: wcd938x-sdw: Correct Soundwire ports mask Device has up to WCD938X_MAX_SWR_PORTS number of ports and the array assigned to prop.src_dpn_prop and prop.sink_dpn_prop has 0..WCD938X_MAX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WCD938X_MAX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-2-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index c995bcc59ead..7da8a10bd0a9 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1252,12 +1252,12 @@ static int wcd9380_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_rx_ch_info[0]; } From 74a79977c4e1d09eced33e6e22f875a5bb3fad29 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:43 +0200 Subject: [PATCH 0252/1523] ASoC: codecs: wcd939x-sdw: Correct Soundwire ports mask Device has up to WCD939X_MAX_TX_SWR_PORTS (or WCD939X_MAX_RX_SWR_PORTS for sink) number of ports and the array assigned to prop.src_dpn_prop and prop.sink_dpn_prop has 0..WCD939X_MAX_TX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WCD939X_MAX_TX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: be2af391cea0 ("ASoC: codecs: Add WCD939x Soundwire devices driver") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-3-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd939x-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd939x-sdw.c b/sound/soc/codecs/wcd939x-sdw.c index 94b1e99a3ca0..fca95777a75a 100644 --- a/sound/soc/codecs/wcd939x-sdw.c +++ b/sound/soc/codecs/wcd939x-sdw.c @@ -1453,12 +1453,12 @@ static int wcd9390_probe(struct sdw_slave *pdev, const struct sdw_device_id *id) pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd939x_tx_dpn_prop; wcd->ch_info = &wcd939x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd939x_rx_dpn_prop; wcd->ch_info = &wcd939x_sdw_rx_ch_info[0]; } From eb11c3bb64ad0a05aeacdb01039863aa2aa3614b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:44 +0200 Subject: [PATCH 0253/1523] ASoC: codecs: wsa881x: Correct Soundwire ports mask Device has up to WSA881X_MAX_SWR_PORTS number of ports and the array assigned to prop.sink_dpn_prop has 0..WSA881X_MAX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WSA881X_MAX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: a0aab9e1404a ("ASoC: codecs: add wsa881x amplifier support") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-4-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa881x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 0478599d0f35..fb9e92f08d98 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1152,7 +1152,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, wsa881x->sconfig.frame_rate = 48000; wsa881x->sconfig.direction = SDW_DATA_DIR_RX; wsa881x->sconfig.type = SDW_STREAM_PDM; - pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; pdev->prop.clk_stop_mode1 = true; From 6801ac36f25690e14955f7f9eace1eaa29edbdd0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:45 +0200 Subject: [PATCH 0254/1523] ASoC: codecs: wsa883x: Correct Soundwire ports mask Device has up to WSA883X_MAX_SWR_PORTS number of ports and the array assigned to prop.sink_dpn_prop has 0..WSA883X_MAX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WSA883X_MAX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: 43b8c7dc85a1 ("ASoC: codecs: add wsa883x amplifier support") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-5-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa883x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index d0ab4e2290b6..3e4fdaa3f44f 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1406,7 +1406,7 @@ static int wsa883x_probe(struct sdw_slave *pdev, WSA883X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; From dcb6631d05152930e2ea70fd2abfd811b0e970b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jul 2024 16:10:46 +0200 Subject: [PATCH 0255/1523] ASoC: codecs: wsa884x: Correct Soundwire ports mask Device has up to WSA884X_MAX_SWR_PORTS number of ports and the array assigned to prop.sink_dpn_prop has 0..WSA884X_MAX_SWR_PORTS-1 elements. On the other hand, GENMASK(high, low) creates an inclusive mask between , so we need the mask from 0 up to WSA884X_MAX_SWR_PORTS-1. Theoretically, too wide mask could cause an out of bounds read in sdw_get_slave_dpn_prop() in stream.c, however only in the case of buggy driver, e.g. adding incorrect number of ports via sdw_stream_add_slave(). Fixes: aa21a7d4f68a ("ASoC: codecs: wsa884x: Add WSA884x family of speakers") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240726-asoc-wcd-wsa-swr-ports-genmask-v1-6-d4d7a8b56f05@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa884x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index d17ae17b2938..89eb5e03a617 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1895,7 +1895,7 @@ static int wsa884x_probe(struct sdw_slave *pdev, WSA884X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa884x_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; From 6b99068d5ea0aa295f15f30afc98db74d056ec7b Mon Sep 17 00:00:00 2001 From: Jerome Audu Date: Sat, 27 Jul 2024 15:40:15 +0200 Subject: [PATCH 0256/1523] ASoC: sti: add missing probe entry for player and reader This patch addresses a regression in the ASoC STI drivers that was introduced in Linux version 6.6.y. The issue originated from a series of patches (see https://lore.kernel.org/all/87wmy5b0wt.wl-kuninori.morimoto.gx@renesas.com/) that unintentionally omitted necessary probe functions for the player and reader components. Probe function in `sound/soc/sti/sti_uniperif.c:415` is being replaced by another probe function located at `sound/soc/sti/sti_uniperif.c:453`, which should instead be derived from the player and reader components. This patch correctly reinserts the missing probe entries, restoring the intended functionality. Fixes: 9f625f5e6cf9 ("ASoC: sti: merge DAI call back functions into ops") Signed-off-by: Jerome Audu Link: https://patch.msgid.link/20240727-sti-audio-fix-v2-1-208bde546c3f@free.fr Signed-off-by: Mark Brown --- sound/soc/sti/sti_uniperif.c | 2 +- sound/soc/sti/uniperif.h | 1 + sound/soc/sti/uniperif_player.c | 1 + sound/soc/sti/uniperif_reader.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/sti/sti_uniperif.c b/sound/soc/sti/sti_uniperif.c index ba824f14a39c..a7956e5a4ee5 100644 --- a/sound/soc/sti/sti_uniperif.c +++ b/sound/soc/sti/sti_uniperif.c @@ -352,7 +352,7 @@ static int sti_uniperiph_resume(struct snd_soc_component *component) return ret; } -static int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct sti_uniperiph_dai *dai_data = &priv->dai_data; diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h index 2a5de328501c..74e51f0ff85c 100644 --- a/sound/soc/sti/uniperif.h +++ b/sound/soc/sti/uniperif.h @@ -1380,6 +1380,7 @@ int uni_reader_init(struct platform_device *pdev, struct uniperif *reader); /* common */ +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai); int sti_uniperiph_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index dd9013c47664..6d1ce030963c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -1038,6 +1038,7 @@ static const struct snd_soc_dai_ops uni_player_dai_ops = { .startup = uni_player_startup, .shutdown = uni_player_shutdown, .prepare = uni_player_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_player_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 065c5f0d1f5f..05ea2b794eb9 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -401,6 +401,7 @@ static const struct snd_soc_dai_ops uni_reader_dai_ops = { .startup = uni_reader_startup, .shutdown = uni_reader_shutdown, .prepare = uni_reader_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_reader_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, From c118478665f467e57d06b2354de65974b246b82b Mon Sep 17 00:00:00 2001 From: Bruno Ancona Date: Sun, 28 Jul 2024 22:50:32 -0600 Subject: [PATCH 0257/1523] ASoC: amd: yc: Support mic on HP 14-em0002la Add support for the internal microphone for HP 14-em0002la laptop using a quirk entry. Signed-off-by: Bruno Ancona Link: https://patch.msgid.link/20240729045032.223230-1-brunoanconasala@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1769e07e83dc..f4bbfffe9fcb 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -423,6 +423,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A3E"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8B27"), + } + }, { .driver_data = &acp6x_card, .matches = { From 45d763fe503e6e0f180f873b750aea307e73fdcf Mon Sep 17 00:00:00 2001 From: Paul Handrigan Date: Fri, 26 Jul 2024 10:11:11 -0500 Subject: [PATCH 0258/1523] ASoC: cs530x: Change IN HPF Select kcontrol name Change to the IN HPF Select kcontrol to the correct name IN DEC Filter Select. Signed-off-by: Paul Handrigan Link: https://patch.msgid.link/20240726151111.3247774-1-paulha@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs530x.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index 25a86a32e936..da52afe56c3c 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -129,16 +129,16 @@ volsw_err: static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1270, 50, 0); -static const char * const cs530x_in_hpf_text[] = { +static const char * const cs530x_in_filter_text[] = { "Min Phase Slow Roll-off", "Min Phase Fast Roll-off", "Linear Phase Slow Roll-off", "Linear Phase Fast Roll-off", }; -static SOC_ENUM_SINGLE_DECL(cs530x_in_hpf_enum, CS530X_IN_FILTER, +static SOC_ENUM_SINGLE_DECL(cs530x_in_filter_enum, CS530X_IN_FILTER, CS530X_IN_FILTER_SHIFT, - cs530x_in_hpf_text); + cs530x_in_filter_text); static const char * const cs530x_in_4ch_sum_text[] = { "None", @@ -189,7 +189,7 @@ SOC_SINGLE_EXT_TLV("IN1 Volume", CS530X_IN_VOL_CTRL1_0, 0, 255, 1, SOC_SINGLE_EXT_TLV("IN2 Volume", CS530X_IN_VOL_CTRL1_1, 0, 255, 1, snd_soc_get_volsw, cs530x_put_volsw_vu, in_vol_tlv), -SOC_ENUM("IN HPF Select", cs530x_in_hpf_enum), +SOC_ENUM("IN DEC Filter Select", cs530x_in_filter_enum), SOC_ENUM("Input Ramp Up", cs530x_ramp_inc_enum), SOC_ENUM("Input Ramp Down", cs530x_ramp_dec_enum), From 7ec5bd247a0d6fb23ab7da2bedd9c3f1f9333c3b Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Mon, 29 Jul 2024 12:01:33 +0530 Subject: [PATCH 0259/1523] nvme: remove unused parameter First parameter of nvme_init_integrity() is unused. Remove it, and modify the callers. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 053d5b4909cd..e8afb5a0f3a3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1757,7 +1757,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, +static bool nvme_init_integrity(struct nvme_ns_head *head, struct queue_limits *lim) { struct blk_integrity *bi = &lim->integrity; @@ -2176,7 +2176,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head, &lim)) + if (!nvme_init_integrity(ns->head, &lim)) capacity = 0; ret = queue_limits_commit_update(ns->disk->queue, &lim); @@ -2280,7 +2280,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else - nvme_init_integrity(ns->head->disk, ns->head, &lim); + nvme_init_integrity(ns->head, &lim); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); From 6f4e43a2f771b737d991142ec4f6d4b7ff31fbb4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 24 Jul 2024 14:53:09 -0700 Subject: [PATCH 0260/1523] drm/xe: Fix opregion leak Being part o the display, ideally the setup and cleanup would be done by display itself. However this is a bigger refactor that needs to be done on both i915 and xe. For now, just fix the leak: unreferenced object 0xffff8881a0300008 (size 192): comm "modprobe", pid 4354, jiffies 4295647021 hex dump (first 32 bytes): 00 00 87 27 81 88 ff ff 18 80 9b 00 00 c9 ff ff ...'............ 18 81 9b 00 00 c9 ff ff 00 00 00 00 00 00 00 00 ................ backtrace (crc 99260e31): [] kmemleak_alloc+0x4b/0x80 [] kmalloc_trace_noprof+0x312/0x3d0 [] intel_opregion_setup+0x89/0x700 [xe] [] xe_display_init_noirq+0x2f/0x90 [xe] [] xe_device_probe+0x7a3/0xbf0 [xe] [] xe_pci_probe+0x333/0x5b0 [xe] [] local_pci_probe+0x48/0xb0 [] pci_device_probe+0xc8/0x280 [] really_probe+0xf8/0x390 [] __driver_probe_device+0x8a/0x170 [] driver_probe_device+0x23/0xb0 [] __driver_attach+0xc7/0x190 [] bus_for_each_dev+0x7d/0xd0 [] driver_attach+0x1e/0x30 [] bus_add_driver+0x117/0x250 Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240724215309.644423-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..ca4468c82078 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(xe); } int xe_display_init_noirq(struct xe_device *xe) @@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(xe); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } From 950aeefb34923fe3c28ade35fe05f24e2c5b1d55 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 17 Jul 2024 22:01:30 -0700 Subject: [PATCH 0261/1523] iommufd/device: Fix hwpt at err_unresv in iommufd_device_do_replace() The rewind routine should remove the reserved iovas added to the new hwpt. Fixes: 89db31635c87 ("iommufd: Derive iommufd_hwpt_paging from iommufd_hw_pagetable") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20240718050130.1956804-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61..3214a4c17c6b 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); From 0710c3d304f67f9b68f5082214e311ec8f82bd82 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 26 Jul 2024 13:18:25 +0200 Subject: [PATCH 0262/1523] dt-bindings: Batch-update Konrad Dybcio's email Use my @kernel.org address everywhere. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240726-topic-konrad_email-v1-3-f94665da2919@kernel.org Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml | 2 +- .../devicetree/bindings/clock/qcom,sm8350-videocc.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml | 2 +- .../devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml | 2 +- .../bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml | 2 +- .../devicetree/bindings/display/panel/sony,td4353-jdi.yaml | 2 +- .../devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml | 2 +- .../devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml | 2 +- .../devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml | 2 +- Documentation/devicetree/bindings/iommu/qcom,iommu.yaml | 2 +- .../devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml | 2 +- .../devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml index a584b4953e68..46403b98411f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml index 6b9c1d198b14..10afe984e2fb 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on MSM8994 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml index a5a29dc75ae1..1fe68e07a2b2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml index 2280b859b2ad..78e232fa95dc 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml index cf19f44af774..4ff17a91344b 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6115 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml index 374a1844a159..10a9c96a97b6 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks and power domains on diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml index fd6658cb793d..c03b30f64f35 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Camera Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm camera clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml index 183b1c75dbdf..3cd422a645fd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml index 147b75a21508..de4e9066eeb8 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml index cf4cad76f6c9..d9dd479c17bd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml index 46d1d91e3a01..5c2ecec0624e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM8350 Video Clock & Reset Controller maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm video clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml index 3c2cac14e6c3..d10bb002906e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM8450 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 8e8a288d318c..e22b4c433fd0 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM6375 Display MDSS maintainers: - - Konrad Dybcio + - Konrad Dybcio description: SM6375 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 2399cabf044c..dd614e077bbf 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: |+ This panel seems to only be found in the Asus Z00T diff --git a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml index 191b692125e1..032a989184ff 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony TD4353 JDI 5 / 5.7" 2160x1080 MIPI-DSI Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Sony TD4353 JDI is a 5 (XZ2c) / 5.7 (XZ2) inch 2160x1080 diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml index 9fce7203bd42..78210791496f 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC7280 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml index 6c2da03f0cd2..100c68636909 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC8280XP maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml index 3cff7e662255..300640a533dd 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SM8450 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index 571e5746d177..f8cebc9e8cd9 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies legacy IOMMU implementations maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm "B" family devices which are not compatible with arm-smmu have diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index bd3cbb44c99a..e75393b3d196 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. MDM9607 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm MDM9607 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml index a4771f87d936..b262af6be97d 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6350 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6350 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml index 047f82863f9b..c11af09c3f5b 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6375 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6375 SoC. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml index 7afafde17a38..61cf4fe19ca5 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml @@ -8,7 +8,7 @@ title: Qualcomm Resource Power Manager (RPM) Processor/Subsystem maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio - Stephan Gerhold description: | diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml index 9410404f87f1..ad2dcc39a5f5 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. (QTI) RPM Master Stats maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Qualcomm RPM (Resource Power Manager) architecture includes a concept From eb53e5b933b9ff315087305b3dc931af3067d19c Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Tue, 30 Jul 2024 09:39:40 +0530 Subject: [PATCH 0263/1523] drm/i915/display/dp: Compute AS SDP when vrr is also enabled AS SDP should be computed when VRR timing generator is also enabled. Correct the compute condition to compute params of Adaptive sync SDP when VRR timing genrator is enabled along with sink support indication. --v2: Modify if condition (Jani). Fixes: b2013783c445 ("drm/i915/display: Cache adpative sync caps to use it later") Cc: Mitul Golani Cc: Arun R Murthy Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: intel-xe@lists.freedesktop.org Signed-off-by: Mitul Golani Reviewed-by: Ankit Nautiyal Signed-off-by: Ankit Nautiyal (added prefix drm in subject) Link: https://patchwork.freedesktop.org/patch/msgid/20240730040941.396862-1-mitulkumar.ajitkumar.golani@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5d6568c8e186..86412ae7b48f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2617,7 +2617,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!crtc_state->vrr.enable || intel_dp->as_sdp_supported) + if (!crtc_state->vrr.enable || !intel_dp->as_sdp_supported) return; crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); From 73d7cd542bbd0a7c6881ea0df5255f190a1e7236 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Tue, 30 Jul 2024 09:25:05 +0530 Subject: [PATCH 0264/1523] drm/i915/hdcp: Fix HDCP2_STREAM_STATUS macro Fix HDCP2_STREAM_STATUS macro, it called pipe instead of port never threw a compile error as no one used it. --v2 -Add Fixes [Jani] Fixes: d631b984cc90 ("drm/i915/hdcp: Add HDCP 2.2 stream register") Signed-off-by: Suraj Kandpal Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240730035505.3759899-1-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index a568a457e532..f590d7f48ba7 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -251,7 +251,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 From a371d558e6f3aed977a8a7346350557de5d25190 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 29 Jul 2024 14:19:28 -0400 Subject: [PATCH 0265/1523] mm, slub: do not call do_slab_free for kfence object In 782f8906f805 the freeing of kfence objects was moved from deep inside do_slab_free to the wrapper functions outside. This is a nice change, but unfortunately it missed one spot in __kmem_cache_free_bulk. This results in a crash like this: BUG skbuff_head_cache (Tainted: G S B E ): Padding overwritten. 0xffff88907fea0f00-0xffff88907fea0fff @offset=3840 slab_err (mm/slub.c:1129) free_to_partial_list (mm/slub.c:? mm/slub.c:4036) slab_pad_check (mm/slub.c:864 mm/slub.c:1290) check_slab (mm/slub.c:?) free_to_partial_list (mm/slub.c:3171 mm/slub.c:4036) kmem_cache_alloc_bulk (mm/slub.c:? mm/slub.c:4495 mm/slub.c:4586 mm/slub.c:4635) napi_build_skb (net/core/skbuff.c:348 net/core/skbuff.c:527 net/core/skbuff.c:549) All the other callers to do_slab_free appear to be ok. Add a kfence_free check in __kmem_cache_free_bulk to avoid the crash. Reported-by: Chris Mason Fixes: 782f8906f805 ("mm/slub: free KFENCE objects in slab_free_hook()") Cc: stable@kernel.org Signed-off-by: Rik van Riel Signed-off-by: Vlastimil Babka --- mm/slub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 3520acaf9afa..c9d8a2497fd6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4690,6 +4690,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.slab) continue; + if (kfence_free(df.freelist)) + continue; + do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); From 833cf12846aa19adf9b76bc79c40747726f3c0c1 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 29 Jul 2024 10:40:35 -0700 Subject: [PATCH 0266/1523] drm/i915: Fix possible int overflow in skl_ddi_calculate_wrpll() On the off chance that clock value ends up being too high (by means of skl_ddi_calculate_wrpll() having been called with big enough value of crtc_state->port_clock * 1000), one possible consequence may be that the result will not be able to fit into signed int. Fix this issue by moving conversion of clock parameter from kHz to Hz into the body of skl_ddi_calculate_wrpll(), as well as casting the same parameter to u64 type while calculating the value for AFE clock. This both mitigates the overflow problem and avoids possible erroneous integer promotion mishaps. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 82d354370189 ("drm/i915/skl: Implementation of SKL DPLL programming") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240729174035.25727-1-n.zhandarovich@fintech.ru --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 90998b037349..292d163036b1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) struct skl_wrpll_params wrpll_params = {}; int ret; - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; From c3c4f22b7c814a6ee485ce294065836f8ede30fa Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 30 Jul 2024 11:20:39 +0800 Subject: [PATCH 0267/1523] spi: hisi-kunpeng: Add validation for the minimum value of speed_hz The speed specified by the user is used to calculate the clk_div based on the max_speed_hz in hisi_calc_effective_speed. A very low speed value can lead to a clk_div larger than the variable range. Avoid this by setting the min_speed_hz so that such a small speed value is rejected. __spi_validate() in spi.c will return -EINVAL for the specified speed_hz lower than min_speed_hz. Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Link: https://patch.msgid.link/20240730032040.3156393-2-liudingyuan@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-hisi-kunpeng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 77e9738e42f6..6910b4d4c427 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -495,6 +495,7 @@ static int hisi_spi_probe(struct platform_device *pdev) host->transfer_one = hisi_spi_transfer_one; host->handle_err = hisi_spi_handle_err; host->dev.fwnode = dev->fwnode; + host->min_speed_hz = DIV_ROUND_UP(host->max_speed_hz, CLK_DIV_MAX); hisi_spi_hw_init(hs); From 5127c42c77de18651aa9e8e0a3ced190103b449c Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 30 Jul 2024 11:20:40 +0800 Subject: [PATCH 0268/1523] spi: hisi-kunpeng: Add verification for the max_frequency provided by the firmware If the value of max_speed_hz is 0, it may cause a division by zero error in hisi_calc_effective_speed(). The value of max_speed_hz is provided by firmware. Firmware is generally considered as a trusted domain. However, as division by zero errors can cause system failure, for defense measure, the value of max_speed is validated here. So 0 is regarded as invalid and an error code is returned. Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Link: https://patch.msgid.link/20240730032040.3156393-3-liudingyuan@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-hisi-kunpeng.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 6910b4d4c427..16054695bdb0 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -481,6 +481,9 @@ static int hisi_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (host->max_speed_hz == 0) + return dev_err_probe(dev, -EINVAL, "spi-max-frequency can't be 0\n"); + ret = device_property_read_u16(dev, "num-cs", &host->num_chipselect); if (ret) From e075c3b13a0a142dcd3151b25d29a24f31b7b640 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 29 Jul 2024 14:04:43 +0200 Subject: [PATCH 0269/1523] platform/x86: intel-vbtn: Protect ACPI notify handler against recursion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs") ACPI notify handlers like the intel-vbtn notify_handler() may run on multiple CPU cores racing with themselves. This race gets hit on Dell Venue 7140 tablets when undocking from the keyboard, causing the handler to try and register priv->switches_dev twice, as can be seen from the dev_info() message getting logged twice: [ 83.861800] intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving a switch event [ 83.861858] input: Intel Virtual Switches as /devices/pci0000:00/0000:00:1f.0/PNP0C09:00/INT33D6:00/input/input17 [ 83.861865] intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving a switch event After which things go seriously wrong: [ 83.861872] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:1f.0/PNP0C09:00/INT33D6:00/input/input17' ... [ 83.861967] kobject: kobject_add_internal failed for input17 with -EEXIST, don't try to register things with the same name in the same directory. [ 83.877338] BUG: kernel NULL pointer dereference, address: 0000000000000018 ... Protect intel-vbtn notify_handler() from racing with itself with a mutex to fix this. Fixes: e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs") Reported-by: En-Wei Wu Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2073001 Tested-by: Kostadin Stoilov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240729120443.14779-1-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vbtn.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 9b7ce03ba085..a353e830b65f 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -7,11 +7,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include "../dual_accel_detect.h" @@ -66,6 +68,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { }; struct intel_vbtn_priv { + struct mutex mutex; /* Avoid notify_handler() racing with itself */ struct input_dev *buttons_dev; struct input_dev *switches_dev; bool dual_accel; @@ -155,6 +158,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) bool autorelease; int ret; + guard(mutex)(&priv->mutex); + if ((ke = sparse_keymap_entry_from_scancode(priv->buttons_dev, event))) { if (!priv->has_buttons) { dev_warn(&device->dev, "Warning: received 0x%02x button event on a device without buttons, please report this.\n", @@ -290,6 +295,10 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + err = devm_mutex_init(&device->dev, &priv->mutex); + if (err) + return err; + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; From 4c83ee4bf32ea8e57ae2321906c067d69ad7c41b Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 29 Jul 2024 14:08:31 +1200 Subject: [PATCH 0270/1523] platform/x86/amd: pmf: Add quirk for ROG Ally X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS ROG Ally X has the same issue as the G14 where it advertises SPS support but doesn't use it. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20240729020831.28117-1-luke@ljones.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/pmf-quirks.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c index 0b2eb0ae85fe..460444cda1b2 100644 --- a/drivers/platform/x86/amd/pmf/pmf-quirks.c +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -29,6 +29,14 @@ static const struct dmi_system_id fwbug_list[] = { }, .driver_data = &quirk_no_sps_bug, }, + { + .ident = "ROG Ally X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "RC72LA"), + }, + .driver_data = &quirk_no_sps_bug, + }, {} }; @@ -48,4 +56,3 @@ void amd_pmf_quirks_init(struct amd_pmf_dev *dev) dmi_id->ident); } } - From 426463d94d45d37c233e480231a40b9b35f10e49 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 18 Jul 2024 20:31:19 +0530 Subject: [PATCH 0271/1523] platform/x86/amd/pmc: Send OS_HINT command for new AMD platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To initiate the HW deep state transition, the OS_HINT command has to be sent to the PMFW. Add this support to the platforms that belong to family 1Ah model 60h series. Signed-off-by: Shyam Sundar S K Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240718150119.3427190-1-Shyam-sundar.S-k@amd.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 2 ++ drivers/platform/x86/amd/pmc/pmc.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a3d881f6e5d9..c3e51f0a5c33 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -764,6 +764,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return MSG_OS_HINT_RN; } return -EINVAL; @@ -967,6 +968,7 @@ static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index 9e32d3128c3a..f1166d15c856 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -67,6 +67,7 @@ void amd_mp2_stb_deinit(struct amd_pmc_dev *dev); #define AMD_CPU_ID_PS 0x14E8 #define AMD_CPU_ID_SP 0x14A4 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PCI_DEVICE_ID_AMD_MP2_STB 0x172c #endif /* PMC_H */ From 942810c0e89277d738b7f1b6f379d0a5877999f6 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 23 Jul 2024 18:54:50 +0530 Subject: [PATCH 0272/1523] platform/x86/amd/pmf: Add new ACPI ID AMDI0107 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new ACPI ID AMDI0107 used by upcoming AMD platform to the PMF supported list of devices. Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20240723132451.3488326-1-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 2d6e2558863c..8f1f719befa3 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -41,6 +41,7 @@ #define AMD_CPU_ID_RMB 0x14b5 #define AMD_CPU_ID_PS 0x14e8 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PMF_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -249,6 +250,7 @@ static const struct pci_device_id pmf_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RMB) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; @@ -382,6 +384,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = { {"AMDI0102", 0}, {"AMDI0103", 0}, {"AMDI0105", 0}, + {"AMDI0107", 0}, { } }; MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids); From e4c4638b6a10427d30e29d22351c375886025f47 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Jul 2024 15:35:47 +0200 Subject: [PATCH 0273/1523] spi: spidev: Add missing spi_device_id for bh2228fv When the of_device_id entry for "rohm,bh2228fv" was added, the corresponding spi_device_id was forgotten, causing a warning message during boot-up: SPI driver spidev has no spi_device_id for rohm,bh2228fv Fix module autoloading and shut up the warning by adding the missing entry. Fixes: fc28d1c1fe3b3e2f ("spi: spidev: add correct compatible for Rohm BH2228FV") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/cb571d4128f41175f31319cd9febc829417ea167.1722346539.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 05e6d007f9a7..5304728c68c2 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -700,6 +700,7 @@ static const struct class spidev_class = { }; static const struct spi_device_id spidev_spi_ids[] = { + { .name = "bh2228fv" }, { .name = "dh2228fv" }, { .name = "ltc2488" }, { .name = "sx1301" }, From e61dd678601eac53d501dda1eb1bcffec7b11bd6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 18:21:32 -0400 Subject: [PATCH 0274/1523] bcachefs: Fix double free of ca->buckets_nouse Reported-by: Dan Carpenter Fixes: ffcbec6076 ("bcachefs: Kill opts.buckets_nouse") Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec..e7fa2de35014 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); From 7f7a2da3bf8bc0e0f6c239af495b7050056e889c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 26 Jul 2024 18:22:16 -0700 Subject: [PATCH 0275/1523] drm/xe: Use dma_fence_chain_free in chain fence unused as a sync A chain fence is uninitialized if not installed in a drm sync obj. Thus if xe_sync_entry_cleanup is called and sync->chain_fence is non-NULL the proper cleanup is dma_fence_chain_free rather than a dma-fence put. Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2411 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2261 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240727012216.2118276-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 533246f42256..3aa6270e5dd7 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -259,7 +259,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } From 6f20fc09936e786a4ba18f5514fe185e0451ada9 Mon Sep 17 00:00:00 2001 From: Dominik Grzegorzek Date: Mon, 29 Jul 2024 16:01:52 +0300 Subject: [PATCH 0276/1523] drm/xe: Move and export xe_hw_engine lookup. Move and export xe_hw_engine lookup. This is in preparation to use this in eudebug code where we want to find active engine. v2: s/tile/gt due to uapi changes (Mika) Signed-off-by: Dominik Grzegorzek Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240729130152.100130-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 37 ++++-------------------------- drivers/gpu/drm/xe/xe_hw_engine.c | 31 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine.h | 7 ++++++ 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 69867a7b7c77..956dc15b432a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -413,34 +413,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -462,8 +434,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, if (xe_hw_engine_is_reserved(hwe)) continue; - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + if (hwe->class == XE_ENGINE_CLASS_COPY) logical_mask |= BIT(hwe->logical_instance); } @@ -492,7 +463,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -571,7 +542,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -608,7 +579,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 07ed9fd28f19..00ace5fcc284 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -5,7 +5,10 @@ #include "xe_hw_engine.h" +#include + #include +#include #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -1135,3 +1138,31 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) { return engine_infos[hwe->engine_id].domain; } + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + unsigned int idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 900c8c991430..d227ffe557eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -9,6 +9,8 @@ #include "xe_hw_engine_types.h" struct drm_printer; +struct drm_xe_engine_class_instance; +struct xe_device; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci); + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) { return hwe->name; From 16d731890db94e23d5483402494ef378f2271ba1 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 31 Jul 2024 09:19:50 +0200 Subject: [PATCH 0277/1523] dt-bindings: usb: microchip,usb2514: Add USB2517 compatible USB2517 is a 7-port variant of this USB hub. Add an USB compatible based on USB vendor & product ID. Signed-off-by: Alexander Stein Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240731071950.989113-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/microchip,usb2514.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index 783c27591e56..245e8c3ce669 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -18,6 +18,7 @@ properties: - usb424,2412 - usb424,2417 - usb424,2514 + - usb424,2517 reg: true From 228a953e61d6d608a3facc1c3a27b9fb03c99de7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 17 Jul 2024 11:50:53 +0200 Subject: [PATCH 0278/1523] usb: gadget: midi2: Fix the response for FB info with block 0xff When the block number 0xff is given to Function Block Discovery message, the device should return the information of all Function Blocks, but currently the gadget driver treats it as an error. Implement the proper behavior for the block 0xff instead. Fixes: 8b645922b223 ("usb: gadget: Add support for USB MIDI 2.0 function driver") Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240717095102.10493-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi2.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 38e8ed3144f0..3f63253ad3e0 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -642,12 +642,21 @@ static void process_ump_stream_msg(struct f_midi2_ep *ep, const u32 *data) if (format) return; // invalid blk = (*data >> 8) & 0xff; - if (blk >= ep->num_blks) - return; - if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) - reply_ump_stream_fb_info(ep, blk); - if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) - reply_ump_stream_fb_name(ep, blk); + if (blk == 0xff) { + /* inquiry for all blocks */ + for (blk = 0; blk < ep->num_blks; blk++) { + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } + } else if (blk < ep->num_blks) { + /* only the specified block */ + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } return; } } From 76a7bfc445b8e9893c091e24ccfd4f51dfdc0a70 Mon Sep 17 00:00:00 2001 From: Chris Wulff Date: Sun, 21 Jul 2024 15:23:15 -0400 Subject: [PATCH 0279/1523] usb: gadget: u_audio: Check return codes from usb_ep_enable and config_ep_by_speed. These functions can fail if descriptors are malformed, or missing, for the selected USB speed. Fixes: eb9fecb9e69b ("usb: gadget: f_uac2: split out audio core") Fixes: 24f779dac8f3 ("usb: gadget: f_uac2/u_audio: add feedback endpoint support") Cc: stable@vger.kernel.org Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/20240721192314.3532697-2-crwulff@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 42 ++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 89af0feb7512..24299576972f 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -592,16 +592,25 @@ int u_audio_start_capture(struct g_audio *audio_dev) struct usb_ep *ep, *ep_fback; struct uac_rtd_params *prm; struct uac_params *params = &audio_dev->params; - int req_len, i; + int req_len, i, ret; prm = &uac->c_prm; dev_dbg(dev, "start capture with rate %d\n", prm->srate); ep = audio_dev->out_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for out_ep failed (%d)\n", ret); + return ret; + } + req_len = ep->maxpacket; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for out_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { @@ -629,9 +638,18 @@ int u_audio_start_capture(struct g_audio *audio_dev) return 0; /* Setup feedback endpoint */ - config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed in_ep_fback failed (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } + prm->fb_ep_enabled = true; - usb_ep_enable(ep_fback); + ret = usb_ep_enable(ep_fback); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep_fback (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } req_len = ep_fback->maxpacket; req_fback = usb_ep_alloc_request(ep_fback, GFP_ATOMIC); @@ -687,13 +705,17 @@ int u_audio_start_playback(struct g_audio *audio_dev) struct uac_params *params = &audio_dev->params; unsigned int factor; const struct usb_endpoint_descriptor *ep_desc; - int req_len, i; + int req_len, i, ret; unsigned int p_pktsize; prm = &uac->p_prm; dev_dbg(dev, "start playback with rate %d\n", prm->srate); ep = audio_dev->in_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for in_ep failed (%d)\n", ret); + return ret; + } ep_desc = ep->desc; /* @@ -720,7 +742,11 @@ int u_audio_start_playback(struct g_audio *audio_dev) uac->p_residue_mil = 0; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { From afdcfd3d6fcdeca2735ca8d994c5f2d24a368f0a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 9 Jul 2024 13:38:41 +0200 Subject: [PATCH 0280/1523] usb: vhci-hcd: Do not drop references before new references are gained At a few places the driver carries stale pointers to references that can still be used. Make sure that does not happen. This strictly speaking closes ZDI-CAN-22273, though there may be similar races in the driver. Signed-off-by: Oliver Neukum Cc: stable Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20240709113851.14691-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 82650c11e451..302a89aeb258 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -745,6 +745,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag * */ if (usb_pipedevice(urb->pipe) == 0) { + struct usb_device *old; __u8 type = usb_pipetype(urb->pipe); struct usb_ctrlrequest *ctrlreq = (struct usb_ctrlrequest *) urb->setup_packet; @@ -755,14 +756,15 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto no_need_xmit; } + old = vdev->udev; switch (ctrlreq->bRequest) { case USB_REQ_SET_ADDRESS: /* set_address may come when a device is reset */ dev_info(dev, "SetAddress Request (%d) to port %d\n", ctrlreq->wValue, vdev->rhport); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); spin_lock(&vdev->ud.lock); vdev->ud.status = VDEV_ST_USED; @@ -781,8 +783,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag usbip_dbg_vhci_hc( "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n"); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); goto out; default: @@ -1067,6 +1069,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) static void vhci_device_reset(struct usbip_device *ud) { struct vhci_device *vdev = container_of(ud, struct vhci_device, ud); + struct usb_device *old = vdev->udev; unsigned long flags; spin_lock_irqsave(&ud->lock, flags); @@ -1074,8 +1077,8 @@ static void vhci_device_reset(struct usbip_device *ud) vdev->speed = 0; vdev->devid = 0; - usb_put_dev(vdev->udev); vdev->udev = NULL; + usb_put_dev(old); if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); From 973a57891608a98e894db2887f278777f564de18 Mon Sep 17 00:00:00 2001 From: Chris Wulff Date: Wed, 24 Jul 2024 21:04:20 -0400 Subject: [PATCH 0281/1523] usb: gadget: core: Check for unset descriptor Make sure the descriptor has been set before looking at maxpacket. This fixes a null pointer panic in this case. This may happen if the gadget doesn't properly set up the endpoint for the current speed, or the gadget descriptors are malformed and the descriptor for the speed/endpoint are not found. No current gadget driver is known to have this problem, but this may cause a hard-to-find bug during development of new gadgets. Fixes: 54f83b8c8ea9 ("USB: gadget: Reject endpoints with 0 maxpacket value") Cc: stable@vger.kernel.org Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/20240725010419.314430-2-crwulff@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index b0a613758414..cf6478f97f4a 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -118,12 +118,10 @@ int usb_ep_enable(struct usb_ep *ep) goto out; /* UDC drivers can't handle endpoints with maxpacket size 0 */ - if (usb_endpoint_maxp(ep->desc) == 0) { - /* - * We should log an error message here, but we can't call - * dev_err() because there's no way to find the gadget - * given only ep. - */ + if (!ep->desc || usb_endpoint_maxp(ep->desc) == 0) { + WARN_ONCE(1, "%s: ep%d (%s) has %s\n", __func__, ep->address, ep->name, + (!ep->desc) ? "NULL descriptor" : "maxpacket 0"); + ret = -EINVAL; goto out; } From e885f5f1f2b43575aa8e4e31404132d77d6663d1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 29 Jul 2024 10:42:58 +0200 Subject: [PATCH 0282/1523] usb: typec: fsa4480: Check if the chip is really there Currently, the driver will happily register the switch/mux devices, and so long as the i2c master doesn't complain, the user would never know there's something wrong. Add a device id check (based on [1]) and return -ENODEV if the read fails or returns nonsense. Checking the value on a Qualcomm SM6115P-based Lenovo Tab P11 tablet, the ID mentioned in the datasheet does indeed show up: fsa4480 1-0042: Found FSA4480 v1.1 (Vendor ID = 0) [1] https://www.onsemi.com/pdf/datasheet/fsa4480-d.pdf Fixes: 1dc246320c6b ("usb: typec: mux: Add On Semi fsa4480 driver") Cc: stable Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240729-topic-fs4480_check-v3-1-f5bf732d3424@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/fsa4480.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/typec/mux/fsa4480.c b/drivers/usb/typec/mux/fsa4480.c index cb7cdf90cb0a..cd235339834b 100644 --- a/drivers/usb/typec/mux/fsa4480.c +++ b/drivers/usb/typec/mux/fsa4480.c @@ -13,6 +13,10 @@ #include #include +#define FSA4480_DEVICE_ID 0x00 + #define FSA4480_DEVICE_ID_VENDOR_ID GENMASK(7, 6) + #define FSA4480_DEVICE_ID_VERSION_ID GENMASK(5, 3) + #define FSA4480_DEVICE_ID_REV_ID GENMASK(2, 0) #define FSA4480_SWITCH_ENABLE 0x04 #define FSA4480_SWITCH_SELECT 0x05 #define FSA4480_SWITCH_STATUS1 0x07 @@ -251,6 +255,7 @@ static int fsa4480_probe(struct i2c_client *client) struct typec_switch_desc sw_desc = { }; struct typec_mux_desc mux_desc = { }; struct fsa4480 *fsa; + int val = 0; int ret; fsa = devm_kzalloc(dev, sizeof(*fsa), GFP_KERNEL); @@ -268,6 +273,15 @@ static int fsa4480_probe(struct i2c_client *client) if (IS_ERR(fsa->regmap)) return dev_err_probe(dev, PTR_ERR(fsa->regmap), "failed to initialize regmap\n"); + ret = regmap_read(fsa->regmap, FSA4480_DEVICE_ID, &val); + if (ret || !val) + return dev_err_probe(dev, -ENODEV, "FSA4480 not found\n"); + + dev_dbg(dev, "Found FSA4480 v%lu.%lu (Vendor ID = %lu)\n", + FIELD_GET(FSA4480_DEVICE_ID_VERSION_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_REV_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_VENDOR_ID, val)); + /* Safe mode */ fsa->cur_enable = FSA4480_ENABLE_DEVICE | FSA4480_ENABLE_USB; fsa->mode = TYPEC_STATE_SAFE; From 3c526089a663e25ac78b6a61d84a52a83680d0c3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jul 2024 09:05:50 -0500 Subject: [PATCH 0283/1523] usb: typec: tcpci: Fix error code in tcpci_check_std_output_cap() The tcpci_check_std_output_cap() function is supposed to return negative error codes but it's declared as type bool so the error handling doesn't work. Declare it as an int instead. Fixes: 62ce9ef14797 ("usb: typec: tcpci: add support to set connector orientation") Signed-off-by: Dan Carpenter Reviewed-by: Heikki Krogerus Reviewed-by: Marco Felsch Link: https://lore.kernel.org/r/b0880888-6719-4614-91fc-8ee63b71d304@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index b862fdf3fe1d..3e3dcb983dde 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -67,7 +67,7 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) return regmap_raw_write(tcpci->regmap, reg, &val, sizeof(u16)); } -static bool tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) +static int tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) { unsigned int reg; int ret; From 5a444bea37e2759549ef72bfe83d1c8712e76b3d Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 30 Jul 2024 18:27:54 +0530 Subject: [PATCH 0284/1523] usb: gadget: u_serial: Set start_delayed during suspend Upstream commit aba3a8d01d62 ("usb: gadget: u_serial: add suspend resume callbacks") added started_delayed flag, so that new ports which are opened after USB suspend can start IO while resuming. But if the port was already opened, and gadget suspend kicks in afterwards, start_delayed will never be set. This causes resume to bail out before calling gs_start_io(). Fix this by setting start_delayed during suspend. Fixes: aba3a8d01d62 ("usb: gadget: u_serial: add suspend resume callbacks") Cc: stable@vger.kernel.org Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20240730125754.576326-1-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index eec7f7a2e40f..b394105e55d6 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1441,6 +1441,7 @@ void gserial_suspend(struct gserial *gser) spin_lock(&port->port_lock); spin_unlock(&serial_port_lock); port->suspended = true; + port->start_delayed = true; spin_unlock_irqrestore(&port->port_lock, flags); } EXPORT_SYMBOL_GPL(gserial_suspend); From 8290b567621ba4e3ccf45ec9d67e0507196c5ddc Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 24 Jul 2024 09:23:50 -0700 Subject: [PATCH 0285/1523] usb: typec: tipd: Fix dereferencing freeing memory in tps6598x_apply_patch() release_firmware() already frees fw, fix this my moving release_firmware after the dereference. Fixes: 916b8e5fa73d ("usb: typec: tipd: add error log to provide firmware name and size") Signed-off-by: Harshit Mogalapalli Reviewed-by: Heikki Krogerus Reviewed-by: Javier Carrasco Link: https://lore.kernel.org/r/20240724162356.992763-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index ea768b19a7f1..eb5596e3406a 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1191,11 +1191,11 @@ static int tps6598x_apply_patch(struct tps6598x *tps) dev_info(tps->dev, "Firmware update succeeded\n"); release_fw: - release_firmware(fw); if (ret) { dev_err(tps->dev, "Failed to write patch %s of %zu bytes\n", firmware_name, fw->size); } + release_firmware(fw); return ret; }; From b1dad2f091382b0049c72dab8153779248fa8016 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 24 Jul 2024 09:23:51 -0700 Subject: [PATCH 0286/1523] usb: typec: tipd: Delete extra semi-colon There shouldn't be a ; at the end of the function, delete it. Signed-off-by: Harshit Mogalapalli Reviewed-by: Javier Carrasco Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240724162356.992763-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index eb5596e3406a..dd51a25480bf 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1198,7 +1198,7 @@ release_fw: release_firmware(fw); return ret; -}; +} static int cd321x_init(struct tps6598x *tps) { From 4c288f56030f380acb9572d88396ac15edfd30ed Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 29 Jul 2024 20:33:20 +0300 Subject: [PATCH 0287/1523] drm/i915/bios: remove stale and useless comments The comments do not add any value. Remove. Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20240729173320.1053791-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 02443462bec3..460e83f0d5a5 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1692,14 +1692,6 @@ parse_mipi_config(struct drm_i915_private *i915, /* Initialize this to undefined indicating no generic MIPI support */ panel->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID; - /* Block #40 is already parsed and panel_fixed_mode is - * stored in i915->lfp_vbt_mode - * resuse this when needed - */ - - /* Parse #52 for panel index used from panel_type already - * parsed - */ start = bdb_find_section(i915, BDB_MIPI_CONFIG); if (!start) { drm_dbg_kms(&i915->drm, "No MIPI config BDB found"); From 6e73c490445ae77c52f62fcf9a49193d17c6f79a Mon Sep 17 00:00:00 2001 From: Luis Felipe Hernandez Date: Tue, 30 Jul 2024 20:15:59 -0400 Subject: [PATCH 0288/1523] platform/x86: msi-wmi-platform: Fix spelling mistakes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were a few instances of typos that lead could to confusion when reading. The following words have been corrected: Binay -> Binary singe -> single chaged -> changed Signed-off-by: Luis Felipe Hernandez Reviewed-by: Armin Wolf Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20240731001602.259338-1-luis.hernandez093@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- Documentation/wmi/devices/msi-wmi-platform.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 29b1b2e6d42c..31a136942892 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -130,12 +130,12 @@ data using the `bmfdec `_ utility: Due to a peculiarity in how Windows handles the ``CreateByteField()`` ACPI operator (errors only happen when a invalid byte field is ultimately accessed), all methods require a 32 byte input -buffer, even if the Binay MOF says otherwise. +buffer, even if the Binary MOF says otherwise. The input buffer contains a single byte to select the subfeature to be accessed and 31 bytes of input data, the meaning of which depends on the subfeature being accessed. -The output buffer contains a singe byte which signals success or failure (``0x00`` on failure) +The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. WMI method Get_EC() @@ -147,7 +147,7 @@ data contains a flag byte and a 28 byte controller firmware version string. The first 4 bits of the flag byte contain the minor version of the embedded controller interface, with the next 2 bits containing the major version of the embedded controller interface. -The 7th bit signals if the embedded controller page chaged (exact meaning is unknown), and the +The 7th bit signals if the embedded controller page changed (exact meaning is unknown), and the last bit signals if the platform is a Tigerlake platform. The MSI software seems to only use this interface when the last bit is set. From 3114f77e9453daa292ec0906f313a715c69b5943 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 30 Jul 2024 15:59:30 +0000 Subject: [PATCH 0289/1523] platform/x86/intel/ifs: Initialize union ifs_status to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the IFS scan test exits prematurely due to a timeout before completing a single run, the union ifs_status remains uninitialized, leading to incorrect test status reporting. To prevent this, always initialize the union ifs_status to zero. Fixes: 2b40e654b73a ("platform/x86/intel/ifs: Add scan test support") Suggested-by: Ilpo Järvinen Reviewed-by: Jithu Joseph Reviewed-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20240730155930.1754744-1-sathyanarayanan.kuppuswamy@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/ifs/runtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 282e4bfe30da..be3d51ed0e47 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -221,8 +221,8 @@ static int doscan(void *data) */ static void ifs_test_core(int cpu, struct device *dev) { + union ifs_status status = {}; union ifs_scan activate; - union ifs_status status; unsigned long timeout; struct ifs_data *ifsd; int to_start, to_stop; From 6eabce6608d6f3440f4c03aa3d3ef50a47a3d193 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Wed, 17 Jul 2024 07:24:38 -0500 Subject: [PATCH 0290/1523] serial: core: check uartclk for zero to avoid divide by zero Calling ioctl TIOCSSERIAL with an invalid baud_base can result in uartclk being zero, which will result in a divide by zero error in uart_get_divisor(). The check for uartclk being zero in uart_set_info() needs to be done before other settings are made as subsequent calls to ioctl TIOCSSERIAL for the same port would be impacted if the uartclk check was done where uartclk gets set. Oops: divide error: 0000 PREEMPT SMP KASAN PTI RIP: 0010:uart_get_divisor (drivers/tty/serial/serial_core.c:580) Call Trace: serial8250_get_divisor (drivers/tty/serial/8250/8250_port.c:2576 drivers/tty/serial/8250/8250_port.c:2589) serial8250_do_set_termios (drivers/tty/serial/8250/8250_port.c:502 drivers/tty/serial/8250/8250_port.c:2741) serial8250_set_termios (drivers/tty/serial/8250/8250_port.c:2862) uart_change_line_settings (./include/linux/spinlock.h:376 ./include/linux/serial_core.h:608 drivers/tty/serial/serial_core.c:222) uart_port_startup (drivers/tty/serial/serial_core.c:342) uart_startup (drivers/tty/serial/serial_core.c:368) uart_set_info (drivers/tty/serial/serial_core.c:1034) uart_set_info_user (drivers/tty/serial/serial_core.c:1059) tty_set_serial (drivers/tty/tty_io.c:2637) tty_ioctl (drivers/tty/tty_io.c:2647 drivers/tty/tty_io.c:2791) __x64_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:907 fs/ioctl.c:893 fs/ioctl.c:893) do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Reported-by: syzkaller Cc: stable@vger.kernel.org Signed-off-by: George Kennedy Rule: add Link: https://lore.kernel.org/stable/1721148848-9784-1-git-send-email-george.kennedy%40oracle.com Link: https://lore.kernel.org/r/1721219078-3209-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 9a18d0b95a41..5bea3af46abc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -881,6 +881,14 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, new_flags = (__force upf_t)new_info->flags; old_custom_divisor = uport->custom_divisor; + if (!(uport->flags & UPF_FIXED_PORT)) { + unsigned int uartclk = new_info->baud_base * 16; + /* check needs to be done here before other settings made */ + if (uartclk == 0) { + retval = -EINVAL; + goto exit; + } + } if (!capable(CAP_SYS_ADMIN)) { retval = -EPERM; if (change_irq || change_port || From 133f4c00b8b2bfcacead9b81e7e8edfceb4b06c4 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 23 Jul 2024 08:53:00 -0400 Subject: [PATCH 0291/1523] serial: sc16is7xx: fix TX fifo corruption Sometimes, when a packet is received on channel A at almost the same time as a packet is about to be transmitted on channel B, we observe with a logic analyzer that the received packet on channel A is transmitted on channel B. In other words, the Tx buffer data on channel B is corrupted with data from channel A. The problem appeared since commit 4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels"), which changed the EFR locking to operate on each channel instead of chip-wise. This commit has introduced a regression, because the EFR lock is used not only to protect the EFR registers access, but also, in a very obscure and undocumented way, to protect access to the data buffer, which is shared by the Tx and Rx handlers, but also by each channel of the IC. Fix this regression first by switching to kfifo_out_linear_ptr() in sc16is7xx_handle_tx() to eliminate the need for a shared Rx/Tx buffer. Secondly, replace the chip-wise Rx buffer with a separate Rx buffer for each channel. Fixes: 4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240723125302.1305372-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index c79dcd7c8d1a..58696e05492c 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -327,6 +327,7 @@ struct sc16is7xx_one { struct kthread_work reg_work; struct kthread_delayed_work ms_work; struct sc16is7xx_one_config config; + unsigned char buf[SC16IS7XX_FIFO_SIZE]; /* Rx buffer. */ unsigned int old_mctrl; u8 old_lcr; /* Value before EFR access. */ bool irda_mode; @@ -340,7 +341,6 @@ struct sc16is7xx_port { unsigned long gpio_valid_mask; #endif u8 mctrl_mask; - unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; struct sc16is7xx_one p[]; @@ -612,18 +612,18 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, unsigned int iir) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lsr = 0, bytes_read, i; bool read_lsr = (iir == SC16IS7XX_IIR_RLSE_SRC) ? true : false; u8 ch, flag; - if (unlikely(rxlen >= sizeof(s->buf))) { + if (unlikely(rxlen >= sizeof(one->buf))) { dev_warn_ratelimited(port->dev, "ttySC%i: Possible RX FIFO overrun: %d\n", port->line, rxlen); port->icount.buf_overrun++; /* Ensure sanity of RX level */ - rxlen = sizeof(s->buf); + rxlen = sizeof(one->buf); } while (rxlen) { @@ -636,10 +636,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, lsr = 0; if (read_lsr) { - s->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); + one->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); bytes_read = 1; } else { - sc16is7xx_fifo_read(port, s->buf, rxlen); + sc16is7xx_fifo_read(port, one->buf, rxlen); bytes_read = rxlen; } @@ -672,7 +672,7 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, } for (i = 0; i < bytes_read; ++i) { - ch = s->buf[i]; + ch = one->buf[i]; if (uart_handle_sysrq_char(port, ch)) continue; @@ -690,10 +690,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, static void sc16is7xx_handle_tx(struct uart_port *port) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct tty_port *tport = &port->state->port; unsigned long flags; unsigned int txlen; + unsigned char *tail; if (unlikely(port->x_char)) { sc16is7xx_port_write(port, SC16IS7XX_THR_REG, port->x_char); @@ -718,8 +718,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) txlen = 0; } - txlen = uart_fifo_out(port, s->buf, txlen); - sc16is7xx_fifo_write(port, s->buf, txlen); + txlen = kfifo_out_linear_ptr(&tport->xmit_fifo, &tail, txlen); + sc16is7xx_fifo_write(port, tail, txlen); + uart_xmit_advance(port, txlen); uart_port_lock_irqsave(port, &flags); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) From 7d3b793faaab1305994ce568b59d61927235f57b Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 23 Jul 2024 08:53:01 -0400 Subject: [PATCH 0292/1523] serial: sc16is7xx: fix invalid FIFO access with special register set When enabling access to the special register set, Receiver time-out and RHR interrupts can happen. In this case, the IRQ handler will try to read from the FIFO thru the RHR register at address 0x00, but address 0x00 is mapped to DLL register, resulting in erroneous FIFO reading. Call graph example: sc16is7xx_startup(): entry sc16is7xx_ms_proc(): entry sc16is7xx_set_termios(): entry sc16is7xx_set_baud(): DLH/DLL = $009C --> access special register set sc16is7xx_port_irq() entry --> IIR is 0x0C sc16is7xx_handle_rx() entry sc16is7xx_fifo_read(): --> unable to access FIFO (RHR) because it is mapped to DLL (LCR=LCR_CONF_MODE_A) sc16is7xx_set_baud(): exit --> Restore access to general register set Fix the problem by claiming the efr_lock mutex when accessing the Special register set. Fixes: dfeae619d781 ("serial: sc16is7xx") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240723125302.1305372-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 58696e05492c..b4c1798a1df2 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -592,6 +592,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_MCR_CLKSEL_BIT, prescaler == 1 ? 0 : SC16IS7XX_MCR_CLKSEL_BIT); + mutex_lock(&one->efr_lock); + /* Backup LCR and access special register set (DLL/DLH) */ lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, @@ -606,6 +608,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Restore LCR and access to general register set */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&one->efr_lock); + return DIV_ROUND_CLOSEST((clk / prescaler) / 16, div); } From 6e20753da6bc651e02378a0cdb78f16c42098c88 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Thu, 25 Jul 2024 15:20:45 +0200 Subject: [PATCH 0293/1523] tty: vt: conmakehash: cope with abs_srctree no longer in env conmakehash uses getenv("abs_srctree") from the environment to strip the absolute path from the generated sources. However since commit e2bad142bb3d ("kbuild: unexport abs_srctree and abs_objtree") this environment variable no longer gets set. Instead use basename() to indicate the used file in a comment of the generated source file. Fixes: 3bd85c6c97b2 ("tty: vt: conmakehash: Don't mention the full path of the input in output") Cc: stable Signed-off-by: Max Krummenacher Link: https://lore.kernel.org/stable/20240725132056.9151-1-max.oss.09%40gmail.com Link: https://lore.kernel.org/r/20240725132056.9151-1-max.oss.09@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/conmakehash.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index dc2177fec715..82d9db68b2ce 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -11,6 +11,8 @@ * Copyright (C) 1995-1997 H. Peter Anvin */ +#include +#include #include #include #include @@ -76,8 +78,8 @@ static void addpair(int fp, int un) int main(int argc, char *argv[]) { FILE *ctbl; - const char *tblname, *rel_tblname; - const char *abs_srctree; + const char *tblname; + char base_tblname[PATH_MAX]; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -102,16 +104,6 @@ int main(int argc, char *argv[]) } } - abs_srctree = getenv("abs_srctree"); - if (abs_srctree && !strncmp(abs_srctree, tblname, strlen(abs_srctree))) - { - rel_tblname = tblname + strlen(abs_srctree); - while (*rel_tblname == '/') - ++rel_tblname; - } - else - rel_tblname = tblname; - /* For now we assume the default font is always 256 characters. */ fontlen = 256; @@ -253,6 +245,8 @@ int main(int argc, char *argv[]) for ( i = 0 ; i < fontlen ; i++ ) nuni += unicount[i]; + strncpy(base_tblname, tblname, PATH_MAX); + base_tblname[PATH_MAX - 1] = 0; printf("\ /*\n\ * Do not edit this file; it was automatically generated by\n\ @@ -264,7 +258,7 @@ int main(int argc, char *argv[]) #include \n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", rel_tblname, fontlen); +{\n\t", basename(base_tblname), fontlen); for ( i = 0 ; i < fontlen ; i++ ) { From cd04d50979502a1a965869dcd246d44db1bf0153 Mon Sep 17 00:00:00 2001 From: Andrew Ballance Date: Mon, 8 Jul 2024 19:44:26 -0500 Subject: [PATCH 0294/1523] rust: firmware: fix invalid rustdoc link remove an extra quote from the doc comment so that rustdoc no longer genertes a link to a nonexistent file. Signed-off-by: Andrew Ballance Reviewed-by: Danilo Krummrich Acked-by: Miguel Ojeda Fixes: de6582833db0 ("rust: add firmware abstractions") Link: https://lore.kernel.org/r/20240709004426.44854-1-andrewjballance@gmail.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/firmware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2ba03af9f036..dee5b4b18aec 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -2,7 +2,7 @@ //! Firmware abstraction //! -//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; use core::ptr::NonNull; From a2e4bdca2c361260609d47dff6c0e36ef2b41d4c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 30 Jul 2024 18:09:31 +0200 Subject: [PATCH 0295/1523] Documentation: embargoed-hardware-issues.rst: minor cleanups and fixes The embargoed-hardware-issues.rst file needed a bunch of minor grammar, punctuation, and syntax cleanups based on feedback we have gotten over the past few years. The main change here is the term "silicon" being used over "hardware" to differentiate between companies that make a chip (i.e. a CPU) and those that take the chip and put it into their system. No process changes are made here at all, only clarification for the way the current process works. All of these changes have been approved by a review from a large number of different open source legal members, representing the companies involved in this process. Acked-by: Jonathan Corbet Link: https://lore.kernel.org/r/2024073032-outsource-sniff-e8ea@gregkh Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Michael Dolan Signed-off-by: Michael Dolan Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- .../process/embargoed-hardware-issues.rst | 122 ++++++++++-------- 1 file changed, 65 insertions(+), 57 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6e9a4597bf2c..2b34bb6b7cda 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -13,9 +13,9 @@ kernel. Hardware issues like Meltdown, Spectre, L1TF etc. must be treated differently because they usually affect all Operating Systems ("OS") and therefore need coordination across different OS vendors, distributions, -hardware vendors and other parties. For some of the issues, software -mitigations can depend on microcode or firmware updates, which need further -coordination. +silicon vendors, hardware integrators, and other parties. For some of the +issues, software mitigations can depend on microcode or firmware updates, +which need further coordination. .. _Contact: @@ -32,8 +32,8 @@ Linux kernel security team (:ref:`Documentation/admin-guide/ `) instead. The team can be contacted by email at . This -is a private list of security officers who will help you to coordinate a -fix according to our documented process. +is a private list of security officers who will help you coordinate a fix +according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -43,7 +43,7 @@ the following URLs: - PGP: https://www.kernel.org/static/files/hardware-security.asc - S/MIME: https://www.kernel.org/static/files/hardware-security.crt -While hardware security issues are often handled by the affected hardware +While hardware security issues are often handled by the affected silicon vendor, we welcome contact from researchers or individuals who have identified a potential hardware flaw. @@ -65,7 +65,7 @@ of Linux Foundation's IT operations personnel technically have the ability to access the embargoed information, but are obliged to confidentiality by their employment contract. Linux Foundation IT personnel are also responsible for operating and managing the rest of -kernel.org infrastructure. +kernel.org's infrastructure. The Linux Foundation's current director of IT Project infrastructure is Konstantin Ryabitsev. @@ -85,7 +85,7 @@ Memorandum of Understanding The Linux kernel community has a deep understanding of the requirement to keep hardware security issues under embargo for coordination between -different OS vendors, distributors, hardware vendors and other parties. +different OS vendors, distributors, silicon vendors, and other parties. The Linux kernel community has successfully handled hardware security issues in the past and has the necessary mechanisms in place to allow @@ -103,11 +103,11 @@ the issue in the best technical way. All involved developers pledge to adhere to the embargo rules and to keep the received information confidential. Violation of the pledge will lead to immediate exclusion from the current issue and removal from all related -mailing-lists. In addition, the hardware security team will also exclude +mailing lists. In addition, the hardware security team will also exclude the offender from future issues. The impact of this consequence is a highly effective deterrent in our community. In case a violation happens the hardware security team will inform the involved parties immediately. If you -or anyone becomes aware of a potential violation, please report it +or anyone else becomes aware of a potential violation, please report it immediately to the Hardware security officers. @@ -124,14 +124,16 @@ method for these types of issues. Start of Disclosure """"""""""""""""""" -Disclosure starts by contacting the Linux kernel hardware security team by -email. This initial contact should contain a description of the problem and -a list of any known affected hardware. If your organization builds or -distributes the affected hardware, we encourage you to also consider what -other hardware could be affected. +Disclosure starts by emailing the Linux kernel hardware security team per +the Contact section above. This initial contact should contain a +description of the problem and a list of any known affected silicon. If +your organization builds or distributes the affected hardware, we encourage +you to also consider what other hardware could be affected. The disclosing +party is responsible for contacting the affected silicon vendors in a +timely manner. The hardware security team will provide an incident-specific encrypted -mailing-list which will be used for initial discussion with the reporter, +mailing list which will be used for initial discussion with the reporter, further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of @@ -158,8 +160,8 @@ This serves several purposes: - The disclosed entities can be contacted to name experts who should participate in the mitigation development. - - If an expert which is required to handle an issue is employed by an - listed entity or member of an listed entity, then the response teams can + - If an expert who is required to handle an issue is employed by a listed + entity or member of an listed entity, then the response teams can request the disclosure of that expert from that entity. This ensures that the expert is also part of the entity's response team. @@ -169,8 +171,8 @@ Disclosure The disclosing party provides detailed information to the initial response team via the specific encrypted mailing-list. -From our experience the technical documentation of these issues is usually -a sufficient starting point and further technical clarification is best +From our experience, the technical documentation of these issues is usually +a sufficient starting point, and further technical clarification is best done via email. Mitigation development @@ -179,35 +181,39 @@ Mitigation development The initial response team sets up an encrypted mailing-list or repurposes an existing one if appropriate. -Using a mailing-list is close to the normal Linux development process and -has been successfully used in developing mitigations for various hardware +Using a mailing list is close to the normal Linux development process and +has been successfully used to develop mitigations for various hardware security issues in the past. -The mailing-list operates in the same way as normal Linux development. -Patches are posted, discussed and reviewed and if agreed on applied to a -non-public git repository which is only accessible to the participating +The mailing list operates in the same way as normal Linux development. +Patches are posted, discussed, and reviewed and if agreed upon, applied to +a non-public git repository which is only accessible to the participating developers via a secure connection. The repository contains the main development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed. Bringing in experts can happen at any -time of the development process and needs to be handled in a timely manner. +kernel developer community as needed. Any involved party can suggest +further experts to be included, each of which will be subject to the same +requirements outlined above. -If an expert is employed by or member of an entity on the disclosure list +Bringing in experts can happen at any time in the development process and +needs to be handled in a timely manner. + +If an expert is employed by or a member of an entity on the disclosure list provided by the disclosing party, then participation will be requested from the relevant entity. -If not, then the disclosing party will be informed about the experts +If not, then the disclosing party will be informed about the experts' participation. The experts are covered by the Memorandum of Understanding -and the disclosing party is requested to acknowledge the participation. In -case that the disclosing party has a compelling reason to object, then this -objection has to be raised within five work days and resolved with the -incident team immediately. If the disclosing party does not react within -five work days this is taken as silent acknowledgement. +and the disclosing party is requested to acknowledge their participation. +In the case where the disclosing party has a compelling reason to object, +any objection must to be raised within five working days and resolved with +the incident team immediately. If the disclosing party does not react +within five working days this is taken as silent acknowledgment. -After acknowledgement or resolution of an objection the expert is disclosed -by the incident team and brought into the development process. +After the incident team acknowledges or resolves an objection, the expert +is disclosed and brought into the development process. List participants may not communicate about the issue outside of the private mailing list. List participants may not use any shared resources @@ -217,19 +223,20 @@ private mailing list. List participants may not use any shared resources Coordinated release """"""""""""""""""" -The involved parties will negotiate the date and time where the embargo -ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. There is no pre-notification process: -fixes are published in public and available to everyone at the same time. +The involved parties will negotiate the date and time when the embargo +ends. At that point, the prepared mitigations are published into the +relevant kernel trees. There is no pre-notification process: the +mitigations are published in public and available to everyone at the same +time. While we understand that hardware security issues need coordinated embargo -time, the embargo time should be constrained to the minimum time which is -required for all involved parties to develop, test and prepare the +time, the embargo time should be constrained to the minimum time that is +required for all involved parties to develop, test, and prepare their mitigations. Extending embargo time artificially to meet conference talk -dates or other non-technical reasons is creating more work and burden for -the involved developers and response teams as the patches need to be kept -up to date in order to follow the ongoing upstream kernel development, -which might create conflicting changes. +dates or other non-technical reasons creates more work and burden for the +involved developers and response teams as the patches need to be kept up to +date in order to follow the ongoing upstream kernel development, which +might create conflicting changes. CVE assignment """""""""""""" @@ -275,34 +282,35 @@ an involved disclosed party. The current ambassadors list: If you want your organization to be added to the ambassadors list, please contact the hardware security team. The nominated ambassador has to -understand and support our process fully and is ideally well connected in +understand and support our process fully and is ideally well-connected in the Linux kernel community. Encrypted mailing-lists ----------------------- -We use encrypted mailing-lists for communication. The operating principle +We use encrypted mailing lists for communication. The operating principle of these lists is that email sent to the list is encrypted either with the -list's PGP key or with the list's S/MIME certificate. The mailing-list +list's PGP key or with the list's S/MIME certificate. The mailing list software decrypts the email and re-encrypts it individually for each subscriber with the subscriber's PGP key or S/MIME certificate. Details -about the mailing-list software and the setup which is used to ensure the +about the mailing list software and the setup that is used to ensure the security of the lists and protection of the data can be found here: https://korg.wiki.kernel.org/userdoc/remail. List keys ^^^^^^^^^ -For initial contact see :ref:`Contact`. For incident specific mailing-lists -the key and S/MIME certificate are conveyed to the subscribers by email -sent from the specific list. +For initial contact see the :ref:`Contact` section above. For incident +specific mailing lists, the key and S/MIME certificate are conveyed to the +subscribers by email sent from the specific list. -Subscription to incident specific lists +Subscription to incident-specific lists ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Subscription is handled by the response teams. Disclosed parties who want -to participate in the communication send a list of potential subscribers to -the response team so the response team can validate subscription requests. +Subscription to incident-specific lists is handled by the response teams. +Disclosed parties who want to participate in the communication send a list +of potential experts to the response team so the response team can validate +subscription requests. Each subscriber needs to send a subscription request to the response team by email. The email must be signed with the subscriber's PGP key or S/MIME From 86fee2877f3427df5876159a182aa70d10964cdf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 30 Jul 2024 18:09:32 +0200 Subject: [PATCH 0296/1523] Documentation: embargoed-hardware-issues.rst: add a section documenting the "early access" process Over the past years there have been many "misunderstandings" and "confusion" as to who is, and is not, allowed early access to the changes created by the members of the embargoed hardware issue teams working on a specific problem. The current process, while it does work, is "difficult" for many companies to understand and agree with. Because of this, there has been numerous attempts by many companies to work around the process by lies, subterfuge, and other side channels sometimes involving unsuspecting lawyers. Cut all of that out, and put the responsibility of distributing code on the silicon vendor affected, as they already have legal agreements in place that cover this type of distribution. When this distribution happens, the developers involved MUST be notified of this happening, to be kept aware of the situation at all times. The wording here has been hashed out by many different companies and lawyers involved in the process, as well as community members and everyone now agrees that the proposed change here should work better than what is currently happening. This change has been approved by a review from a large number of different open source legal members, representing the companies involved in this process. Link: https://lore.kernel.org/r/2024073035-bagel-vertigo-e0dd@gregkh Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Michael Dolan Signed-off-by: Michael Dolan Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- .../process/embargoed-hardware-issues.rst | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 2b34bb6b7cda..daebce49cfdf 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -219,6 +219,37 @@ List participants may not communicate about the issue outside of the private mailing list. List participants may not use any shared resources (e.g. employer build farms, CI systems, etc) when working on patches. +Early access +"""""""""""" + +The patches discussed and developed on the list can neither be distributed +to any individual who is not a member of the response team nor to any other +organization. + +To allow the affected silicon vendors to work with their internal teams and +industry partners on testing, validation, and logistics, the following +exception is provided: + + Designated representatives of the affected silicon vendors are + allowed to hand over the patches at any time to the silicon + vendor’s response team. The representative must notify the kernel + response team about the handover. The affected silicon vendor must + have and maintain their own documented security process for any + patches shared with their response team that is consistent with + this policy. + + The silicon vendor’s response team can distribute these patches to + their industry partners and to their internal teams under the + silicon vendor’s documented security process. Feedback from the + industry partners goes back to the silicon vendor and is + communicated by the silicon vendor to the kernel response team. + + The handover to the silicon vendor’s response team removes any + responsibility or liability from the kernel response team regarding + premature disclosure, which happens due to the involvement of the + silicon vendor’s internal teams or industry partners. The silicon + vendor guarantees this release of liability by agreeing to this + process. Coordinated release """"""""""""""""""" From be62f1289df01b7083f9ee5daf2a27d81355d666 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 30 Jul 2024 07:43:21 -0700 Subject: [PATCH 0297/1523] fsi: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-core.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-master-hub.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-master-aspeed.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-master-gpio.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-master-ast-cf.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fsi/fsi-scom.o Add the missing invocations of the MODULE_DESCRIPTION() macro, and fix the copy/paste of the module description comment in fsi-master-ast-cf.c. Reviewed-by: Eddie James Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240730-module_description_orphans-v1-4-7094088076c8@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/fsi-core.c | 1 + drivers/fsi/fsi-master-aspeed.c | 1 + drivers/fsi/fsi-master-ast-cf.c | 3 ++- drivers/fsi/fsi-master-gpio.c | 1 + drivers/fsi/fsi-master-hub.c | 1 + drivers/fsi/fsi-scom.c | 1 + 6 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 46ac5a8beab7..e2e1e9df6115 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -1444,5 +1444,6 @@ static void fsi_exit(void) } module_exit(fsi_exit); module_param(discard_errors, int, 0664); +MODULE_DESCRIPTION("FSI core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(discard_errors, "Don't invoke error handling on bus accesses"); diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index b0b624c3717b..6f5e1bdf7e40 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -670,4 +670,5 @@ static struct platform_driver fsi_master_aspeed_driver = { }; module_platform_driver(fsi_master_aspeed_driver); +MODULE_DESCRIPTION("FSI master driver for AST2600"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c index f8c776ce1b56..a4c37ff8edd6 100644 --- a/drivers/fsi/fsi-master-ast-cf.c +++ b/drivers/fsi/fsi-master-ast-cf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2018 IBM Corp /* - * A FSI master controller, using a simple GPIO bit-banging interface + * A FSI master based on Aspeed ColdFire coprocessor */ #include @@ -1438,5 +1438,6 @@ static struct platform_driver fsi_master_acf = { }; module_platform_driver(fsi_master_acf); +MODULE_DESCRIPTION("A FSI master based on Aspeed ColdFire coprocessor"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME); diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c index 10fc344b6b22..f761344f4873 100644 --- a/drivers/fsi/fsi-master-gpio.c +++ b/drivers/fsi/fsi-master-gpio.c @@ -892,4 +892,5 @@ static struct platform_driver fsi_master_gpio_driver = { }; module_platform_driver(fsi_master_gpio_driver); +MODULE_DESCRIPTION("A FSI master controller, using a simple GPIO bit-banging interface"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-hub.c b/drivers/fsi/fsi-master-hub.c index 6d8b6e8854e5..6568fed7db3c 100644 --- a/drivers/fsi/fsi-master-hub.c +++ b/drivers/fsi/fsi-master-hub.c @@ -295,4 +295,5 @@ static struct fsi_driver hub_master_driver = { }; module_fsi_driver(hub_master_driver); +MODULE_DESCRIPTION("FSI hub master driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index 61dbda9dbe2b..411ddc018cd8 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -625,4 +625,5 @@ static void scom_exit(void) module_init(scom_init); module_exit(scom_exit); +MODULE_DESCRIPTION("SCOM FSI Client device driver"); MODULE_LICENSE("GPL"); From e6cd0dc91ef9d24edda553343e64eb6b542c21dd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 14 Jul 2024 01:48:13 +0200 Subject: [PATCH 0298/1523] eeprom: ee1004: Fix locking issues in ee1004_probe() Currently, the devres-based management of ee1004_bus_data has several issues when it comes to locking: 1. It does not call mutex_unlock() before returning an error. 2. When encountering an error, it deadlocks when trying to recursively lock a mutex. Fix this by moving the mutex-protected bus data initialization into a separate function so that devm_add_action_or_reset() is called without the mutex being held. Reported-by: Dan Carpenter Fixes: 55d57ef6fa97 ("eeprom: ee1004: Use devres for bus data cleanup") Signed-off-by: Armin Wolf Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20240713234813.21746-1-W_Armin@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/ee1004.c | 85 +++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index d4aeeb2b2169..89224d4af4a2 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -233,6 +233,49 @@ static void ee1004_cleanup_bus_data(void *data) mutex_unlock(&ee1004_bus_lock); } +static int ee1004_init_bus_data(struct i2c_client *client) +{ + struct ee1004_bus_data *bd; + int err, cnr = 0; + + bd = ee1004_get_bus_data(client->adapter); + if (!bd) + return dev_err_probe(&client->dev, -ENOSPC, "Only %d busses supported", + EE1004_MAX_BUSSES); + + i2c_set_clientdata(client, bd); + + if (++bd->dev_count == 1) { + /* Use 2 dummy devices for page select command */ + for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { + struct i2c_client *cl; + + cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_out; + } + + bd->set_page[cnr] = cl; + } + + /* Remember current page to avoid unneeded page select */ + err = ee1004_get_current_page(bd); + if (err < 0) + goto err_out; + + dev_dbg(&client->dev, "Currently selected page: %d\n", err); + bd->current_page = err; + } + + return 0; + +err_out: + ee1004_cleanup(cnr, bd); + + return err; +} + static int ee1004_probe(struct i2c_client *client) { struct nvmem_config config = { @@ -251,9 +294,8 @@ static int ee1004_probe(struct i2c_client *client) .compat = true, .base_dev = &client->dev, }; - struct ee1004_bus_data *bd; struct nvmem_device *ndev; - int err, cnr = 0; + int err; /* Make sure we can operate on this adapter */ if (!i2c_check_functionality(client->adapter, @@ -264,46 +306,21 @@ static int ee1004_probe(struct i2c_client *client) mutex_lock(&ee1004_bus_lock); - bd = ee1004_get_bus_data(client->adapter); - if (!bd) { + err = ee1004_init_bus_data(client); + if (err < 0) { mutex_unlock(&ee1004_bus_lock); - return dev_err_probe(&client->dev, -ENOSPC, - "Only %d busses supported", EE1004_MAX_BUSSES); - } - - err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, bd); - if (err < 0) return err; - - i2c_set_clientdata(client, bd); - - if (++bd->dev_count == 1) { - /* Use 2 dummy devices for page select command */ - for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { - struct i2c_client *cl; - - cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); - if (IS_ERR(cl)) { - mutex_unlock(&ee1004_bus_lock); - return PTR_ERR(cl); - } - bd->set_page[cnr] = cl; - } - - /* Remember current page to avoid unneeded page select */ - err = ee1004_get_current_page(bd); - if (err < 0) { - mutex_unlock(&ee1004_bus_lock); - return err; - } - dev_dbg(&client->dev, "Currently selected page: %d\n", err); - bd->current_page = err; } ee1004_probe_temp_sensor(client); mutex_unlock(&ee1004_bus_lock); + err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, + i2c_get_clientdata(client)); + if (err < 0) + return err; + ndev = devm_nvmem_register(&client->dev, &config); if (IS_ERR(ndev)) return PTR_ERR(ndev); From f528cd55853968db8e959ff0e4c2c43e561e7b83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:38:23 +0200 Subject: [PATCH 0299/1523] misc: mrvl-cn10k-dpi: add PCI_IOV dependency I found one more missing dependency in the new driver: when building without CONFIG_PCI_IOV, pci_sriov_configure_simple() cannot be called directly: drivers/misc/mrvl_cn10k_dpi.c: In function 'dpi_remove': include/linux/stddef.h:9:14: error: called object is not a function or function pointer 9 | #define NULL ((void *)0) | ^ include/linux/pci.h:2416:41: note: in expansion of macro 'NULL' 2416 | #define pci_sriov_configure_simple NULL | ^~~~ drivers/misc/mrvl_cn10k_dpi.c:652:9: note: in expansion of macro 'pci_sriov_configure_simple' 652 | pci_sriov_configure_simple(pdev, 0); Add this to the Kconfig file as well. Fixes: 5f67eef6dff3 ("misc: mrvl-cn10k-dpi: add Octeon CN10K DPI administrative driver") Signed-off-by: Arnd Bergmann Tested-by: Vamsi Attunuru Link: https://lore.kernel.org/r/20240719103858.1292094-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 41c3d2821a78..41c54051347a 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -587,7 +587,7 @@ config NSM config MARVELL_CN10K_DPI tristate "Octeon CN10K DPI driver" - depends on PCI + depends on PCI && PCI_IOV depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enables Octeon CN10K DMA packet interface (DPI) driver which From d1009d04a0fefe4df86285cbb37c78aa0b7ab852 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 17 Jul 2024 09:17:07 -0700 Subject: [PATCH 0300/1523] char: add missing NetWinder MODULE_DESCRIPTION() macros Since commit 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing"), a module without a MODULE_DESCRIPTION() will result in a warning with make W=1. The following warnings are being observed in drivers/char when CONFIG_ARCH_NETWINDER is enabled: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/char/ds1620.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/char/nwbutton.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/char/nwflash.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240717-md-arm-drivers-char-nw-v1-1-fee7a8505e9e@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/ds1620.c | 1 + drivers/char/nwbutton.c | 1 + drivers/char/nwflash.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index cf89a9631107..a4f4291b4492 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -421,4 +421,5 @@ static void __exit ds1620_exit(void) module_init(ds1620_init); module_exit(ds1620_exit); +MODULE_DESCRIPTION("Dallas Semiconductor DS1620 thermometer driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index ea378c0ed549..92cee5717237 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -241,6 +241,7 @@ static void __exit nwbutton_exit (void) MODULE_AUTHOR("Alex Holden"); +MODULE_DESCRIPTION("NetWinder button driver"); MODULE_LICENSE("GPL"); module_init(nwbutton_init); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 0973c2c2b01a..9f52f0306ef7 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -618,6 +618,7 @@ static void __exit nwflash_exit(void) iounmap((void *)FLASH_BASE); } +MODULE_DESCRIPTION("NetWinder flash memory driver"); MODULE_LICENSE("GPL"); module_param(flashdebug, bool, 0644); From 11512c197d387b59569d3a93af93de204d3bdaa6 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 22 Jul 2024 15:05:11 +0000 Subject: [PATCH 0301/1523] binder: fix descriptor lookup for context manager In commit 15d9da3f818c ("binder: use bitmap for faster descriptor lookup"), it was incorrectly assumed that references to the context manager node should always get descriptor zero assigned to them. However, if the context manager dies and a new process takes its place, then assigning descriptor zero to the new context manager might lead to collisions, as there could still be references to the older node. This issue was reported by syzbot with the following trace: kernel BUG at drivers/android/binder.c:1173! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 447 Comm: binder-util Not tainted 6.10.0-rc6-00348-g31643d84b8c3 #10 Hardware name: linux,dummy-virt (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : binder_inc_ref_for_node+0x500/0x544 lr : binder_inc_ref_for_node+0x1e4/0x544 sp : ffff80008112b940 x29: ffff80008112b940 x28: ffff0e0e40310780 x27: 0000000000000000 x26: 0000000000000001 x25: ffff0e0e40310738 x24: ffff0e0e4089ba34 x23: ffff0e0e40310b00 x22: ffff80008112bb50 x21: ffffaf7b8f246970 x20: ffffaf7b8f773f08 x19: ffff0e0e4089b800 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 000000002de4aa60 x14: 0000000000000000 x13: 2de4acf000000000 x12: 0000000000000020 x11: 0000000000000018 x10: 0000000000000020 x9 : ffffaf7b90601000 x8 : ffff0e0e48739140 x7 : 0000000000000000 x6 : 000000000000003f x5 : ffff0e0e40310b28 x4 : 0000000000000000 x3 : ffff0e0e40310720 x2 : ffff0e0e40310728 x1 : 0000000000000000 x0 : ffff0e0e40310710 Call trace: binder_inc_ref_for_node+0x500/0x544 binder_transaction+0xf68/0x2620 binder_thread_write+0x5bc/0x139c binder_ioctl+0xef4/0x10c8 [...] This patch adds back the previous behavior of assigning the next non-zero descriptor if references to previous context managers still exist. It amends both strategies, the newer dbitmap code and also the legacy slow_desc_lookup_olocked(), by allowing them to start looking for available descriptors at a given offset. Fixes: 15d9da3f818c ("binder: use bitmap for faster descriptor lookup") Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+3dae065ca76952a67257@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000c1c0a0061d1e6979@google.com/ Reviewed-by: Alice Ryhl Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240722150512.4192473-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 15 ++++++--------- drivers/android/dbitmap.h | 22 +++++++--------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f26286e3713e..905290c98c3c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1044,13 +1044,13 @@ static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, } /* Find the smallest unused descriptor the "slow way" */ -static u32 slow_desc_lookup_olocked(struct binder_proc *proc) +static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) { struct binder_ref *ref; struct rb_node *n; u32 desc; - desc = 1; + desc = offset; for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->data.desc > desc) @@ -1071,21 +1071,18 @@ static int get_ref_desc_olocked(struct binder_proc *proc, u32 *desc) { struct dbitmap *dmap = &proc->dmap; + unsigned int nbits, offset; unsigned long *new, bit; - unsigned int nbits; /* 0 is reserved for the context manager */ - if (node == proc->context->binder_context_mgr_node) { - *desc = 0; - return 0; - } + offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; if (!dbitmap_enabled(dmap)) { - *desc = slow_desc_lookup_olocked(proc); + *desc = slow_desc_lookup_olocked(proc, offset); return 0; } - if (dbitmap_acquire_first_zero_bit(dmap, &bit) == 0) { + if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { *desc = bit; return 0; } diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h index b8ac7b4764fd..956f1bd087d1 100644 --- a/drivers/android/dbitmap.h +++ b/drivers/android/dbitmap.h @@ -6,8 +6,7 @@ * * Used by the binder driver to optimize the allocation of the smallest * available descriptor ID. Each bit in the bitmap represents the state - * of an ID, with the exception of BIT(0) which is used exclusively to - * reference binder's context manager. + * of an ID. * * A dbitmap can grow or shrink as needed. This part has been designed * considering that users might need to briefly release their locks in @@ -58,11 +57,7 @@ static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) if (bit < (dmap->nbits >> 2)) return dmap->nbits >> 1; - /* - * Note that find_last_bit() returns dmap->nbits when no bits - * are set. While this is technically not possible here since - * BIT(0) is always set, this check is left for extra safety. - */ + /* find_last_bit() returns dmap->nbits when no bits are set. */ if (bit == dmap->nbits) return NBITS_MIN; @@ -132,16 +127,17 @@ dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) } /* - * Finds and sets the first zero bit in the bitmap. Upon success @bit + * Finds and sets the next zero bit in the bitmap. Upon success @bit * is populated with the index and 0 is returned. Otherwise, -ENOSPC * is returned to indicate that a dbitmap_grow() is needed. */ static inline int -dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) +dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, + unsigned long *bit) { unsigned long n; - n = find_first_zero_bit(dmap->map, dmap->nbits); + n = find_next_zero_bit(dmap->map, dmap->nbits, offset); if (n == dmap->nbits) return -ENOSPC; @@ -154,9 +150,7 @@ dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) static inline void dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) { - /* BIT(0) should always set for the context manager */ - if (bit) - clear_bit(bit, dmap->map); + clear_bit(bit, dmap->map); } static inline int dbitmap_init(struct dbitmap *dmap) @@ -168,8 +162,6 @@ static inline int dbitmap_init(struct dbitmap *dmap) } dmap->nbits = NBITS_MIN; - /* BIT(0) is reserved for the context manager */ - set_bit(0, dmap->map); return 0; } From 2c10a20f5e84ab777d29ed921d4c78d66de6d0fb Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 25 Jul 2024 11:55:10 +0530 Subject: [PATCH 0302/1523] binder_alloc: Fix sleeping function called from invalid context 36c55ce8703c ("binder_alloc: Replace kcalloc with kvcalloc to mitigate OOM issues") introduced schedule while atomic issue. [ 2689.152635][ T4275] BUG: sleeping function called from invalid context at mm/vmalloc.c:2847 [ 2689.161291][ T4275] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 4275, name: kworker/1:140 [ 2689.170708][ T4275] preempt_count: 1, expected: 0 [ 2689.175572][ T4275] RCU nest depth: 0, expected: 0 [ 2689.180521][ T4275] INFO: lockdep is turned off. [ 2689.180523][ T4275] Preemption disabled at: [ 2689.180525][ T4275] [] binder_alloc_deferred_release+0x2c/0x388 .. .. [ 2689.213419][ T4275] __might_resched+0x174/0x178 [ 2689.213423][ T4275] __might_sleep+0x48/0x7c [ 2689.213426][ T4275] vfree+0x4c/0x15c [ 2689.213430][ T4275] kvfree+0x24/0x44 [ 2689.213433][ T4275] binder_alloc_deferred_release+0x2c0/0x388 [ 2689.213436][ T4275] binder_proc_dec_tmpref+0x15c/0x2a8 [ 2689.213440][ T4275] binder_deferred_func+0xa8/0x8ec [ 2689.213442][ T4275] process_one_work+0x254/0x59c [ 2689.213447][ T4275] worker_thread+0x274/0x3ec [ 2689.213450][ T4275] kthread+0x110/0x134 [ 2689.213453][ T4275] ret_from_fork+0x10/0x20 Fix it by moving the place of kvfree outside of spinlock context. Fixes: 36c55ce8703c ("binder_alloc: Replace kcalloc with kvcalloc to mitigate OOM issues") Acked-by: Carlos Llamas Signed-off-by: Mukesh Ojha Link: https://lore.kernel.org/r/20240725062510.2856662-1-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b00961944ab1..b3acbc4174fb 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -939,9 +939,9 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) __free_page(alloc->pages[i].page_ptr); page_count++; } - kvfree(alloc->pages); } spin_unlock(&alloc->lock); + kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); From f38ba5459ced3441852f37f20fcfb7bd39d20f62 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 25 Jul 2024 09:46:32 -0700 Subject: [PATCH 0303/1523] spmi: pmic-arb: Pass the correct of_node to irq_domain_add_tree Currently, irqchips for all of the subnodes (which represent a given bus master) point to the parent wrapper node. This is no bueno, as no interrupts arrive, ever (because nothing references that node). Fix that by passing a reference to the respective master's of_node. Worth noting, this is a NOP for devices with only a single master described. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240522-topic-spmi_multi_master_irqfix-v2-1-7ec92a862b9f@linaro.org Reviewed-by: Abel Vesa Tested-by: Dmitry Baryshkov Reviewed-by: Dmitry Baryshkov Fixes: 02922ccbb330 ("spmi: pmic-arb: Register controller for bus instead of arbiter") Cc: stable@vger.kernel.org Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240725164636.3362690-3-sboyd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spmi/spmi-pmic-arb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index f240fcc5a4e1..b6880c13163c 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -1737,8 +1737,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, dev_dbg(&pdev->dev, "adding irq domain for bus %d\n", bus_index); - bus->domain = irq_domain_add_tree(dev->of_node, - &pmic_arb_irq_domain_ops, bus); + bus->domain = irq_domain_add_tree(node, &pmic_arb_irq_domain_ops, bus); if (!bus->domain) { dev_err(&pdev->dev, "unable to create irq_domain\n"); return -ENOMEM; From ffcf2eb4bfa24f7256de53a95182c3e3e23fdc6c Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 25 Jul 2024 09:46:33 -0700 Subject: [PATCH 0304/1523] spmi: pmic-arb: add missing newline in dev_err format strings dev_err() format strings should end with '\n'. Several such format strings in the spmi-pmic-arb driver are missing it. Add newlines where needed. Fixes: 02922ccbb330 ("spmi: pmic-arb: Register controller for bus instead of arbiter") Signed-off-by: David Collins Link: https://lore.kernel.org/r/20240703221248.3640490-1-quic_collinsd@quicinc.com Reviewed-by: Bjorn Andersson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240725164636.3362690-4-sboyd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spmi/spmi-pmic-arb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index b6880c13163c..9ba9495fcc4b 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -398,7 +398,7 @@ static int pmic_arb_fmt_read_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, u8 sid, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -477,7 +477,7 @@ static int pmic_arb_fmt_write_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -1702,7 +1702,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "cnfg"); if (index < 0) { - dev_err(dev, "cnfg reg region missing"); + dev_err(dev, "cnfg reg region missing\n"); return -EINVAL; } @@ -1712,7 +1712,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "intr"); if (index < 0) { - dev_err(dev, "intr reg region missing"); + dev_err(dev, "intr reg region missing\n"); return -EINVAL; } From 15fffc6a5624b13b428bb1c6e9088e32a55eb82c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 12 Jul 2024 12:42:09 -0700 Subject: [PATCH 0305/1523] driver core: Fix uevent_show() vs driver detach race uevent_show() wants to de-reference dev->driver->name. There is no clean way for a device attribute to de-reference dev->driver unless that attribute is defined via (struct device_driver).dev_groups. Instead, the anti-pattern of taking the device_lock() in the attribute handler risks deadlocks with code paths that remove device attributes while holding the lock. This deadlock is typically invisible to lockdep given the device_lock() is marked lockdep_set_novalidate_class(), but some subsystems allocate a local lockdep key for @dev->mutex to reveal reports of the form: ====================================================== WARNING: possible circular locking dependency detected 6.10.0-rc7+ #275 Tainted: G OE N ------------------------------------------------------ modprobe/2374 is trying to acquire lock: ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220 but task is already holding lock: ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&cxl_root_key){+.+.}-{3:3}: __mutex_lock+0x99/0xc30 uevent_show+0xac/0x130 dev_attr_show+0x18/0x40 sysfs_kf_seq_show+0xac/0xf0 seq_read_iter+0x110/0x450 vfs_read+0x25b/0x340 ksys_read+0x67/0xf0 do_syscall_64+0x75/0x190 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (kn->active#6){++++}-{0:0}: __lock_acquire+0x121a/0x1fa0 lock_acquire+0xd6/0x2e0 kernfs_drain+0x1e9/0x200 __kernfs_remove+0xde/0x220 kernfs_remove_by_name_ns+0x5e/0xa0 device_del+0x168/0x410 device_unregister+0x13/0x60 devres_release_all+0xb8/0x110 device_unbind_cleanup+0xe/0x70 device_release_driver_internal+0x1c7/0x210 driver_detach+0x47/0x90 bus_remove_driver+0x6c/0xf0 cxl_acpi_exit+0xc/0x11 [cxl_acpi] __do_sys_delete_module.isra.0+0x181/0x260 do_syscall_64+0x75/0x190 entry_SYSCALL_64_after_hwframe+0x76/0x7e The observation though is that driver objects are typically much longer lived than device objects. It is reasonable to perform lockless de-reference of a @driver pointer even if it is racing detach from a device. Given the infrequency of driver unregistration, use synchronize_rcu() in module_remove_driver() to close any potential races. It is potentially overkill to suffer synchronize_rcu() just to handle the rare module removal racing uevent_show() event. Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1]. Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()") Reported-by: syzbot+4762dd74e32532cda5ff@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.ne.jp [1] Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.notmuch Cc: stable@vger.kernel.org Cc: Ashish Sangwan Cc: Namjae Jeon Cc: Dirk Behme Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/172081332794.577428.9738802016494057132.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 13 ++++++++----- drivers/base/module.c | 4 ++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 730cae66607c..8c0733d3aad8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj) static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); + struct device_driver *driver; int retval = 0; /* add device node properties if present */ @@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Synchronize with module_remove_driver() */ + rcu_read_lock(); + driver = READ_ONCE(dev->driver); + if (driver) + add_uevent_var(env, "DRIVER=%s", driver->name); + rcu_read_unlock(); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; diff --git a/drivers/base/module.c b/drivers/base/module.c index 7af224e6914a..f742ad2a21da 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "base.h" static char *make_driver_name(const struct device_driver *drv) @@ -97,6 +98,9 @@ void module_remove_driver(const struct device_driver *drv) if (!drv) return; + /* Synchronize with dev_uevent() */ + synchronize_rcu(); + sysfs_remove_link(&drv->p->kobj, "module"); if (drv->owner) From 00af4f3dda1461ec90d892edc10bec6d3c50c554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 15 Jul 2024 12:44:53 +0200 Subject: [PATCH 0306/1523] USB: serial: debug: do not echo input by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is intended as a "client" end of the console connection. When connected to a host it's supposed to receive debug logs, and possibly allow to interact with whatever debug console is available there. Feeding messages back, depending on a configuration may cause log messages be executed as shell commands (which can be really bad if one is unlucky, imagine a log message like "prevented running `rm -rf /home`"). In case of Xen, it exposes sysrq-like debug interface, and feeding it its own logs will pretty quickly hit 'R' for "instant reboot". Contrary to a classic serial console, the USB one cannot be configured ahead of time, as the device shows up only when target OS is up. And at the time device is opened to execute relevant ioctl, it's already too late, especially when logs start flowing shortly after device is initialized. Avoid the issue by changing default to no echo for this type of devices. Signed-off-by: Marek Marczykowski-Górecki [ johan: amend summary; disable also ECHONL ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/usb_debug.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index 6934970f180d..5a8869cd95d5 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -76,6 +76,11 @@ static void usb_debug_process_read_urb(struct urb *urb) usb_serial_generic_process_read_urb(urb); } +static void usb_debug_init_termios(struct tty_struct *tty) +{ + tty->termios.c_lflag &= ~(ECHO | ECHONL); +} + static struct usb_serial_driver debug_device = { .driver = { .owner = THIS_MODULE, @@ -85,6 +90,7 @@ static struct usb_serial_driver debug_device = { .num_ports = 1, .bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -96,6 +102,7 @@ static struct usb_serial_driver dbc_device = { .id_table = dbc_id_table, .num_ports = 1, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; From 9da8aa3b3ca05b22be5ba312771e6df4366e56cc Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Wed, 31 Jul 2024 13:48:28 +0200 Subject: [PATCH 0307/1523] ASoC: nau8822: Lower debug print priority NAU8822 codec PLL parameters are not an information that the general user should care about, this print is supposed to be used for debugging, adjust the debug print priority accordingly. Signed-off-by: Francesco Dolcini Link: https://patch.msgid.link/20240731114828.61238-1-francesco@dolcini.it Signed-off-by: Mark Brown --- sound/soc/codecs/nau8822.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index e1cbaf8a944d..fd4a96a12060 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -736,7 +736,7 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, return ret; } - dev_info(component->dev, + dev_dbg(component->dev, "pll_int=%x pll_frac=%x mclk_scaler=%x pre_factor=%x\n", pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); From e1cf752ede8e82c2d084868c50a1ca6cdb07c9c4 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Wed, 12 Jun 2024 11:29:34 +0200 Subject: [PATCH 0308/1523] dt-bindings: eeprom: at25: add fujitsu,mb85rs256 compatible The fujitsu,mb85rs256 is a 256 Kbit SPI memory FRAM in the same family as the two existing fujitsu,mb85rs* compatibles and at25 compatible. Signed-off-by: Francesco Dolcini Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240612092934.12282-1-francesco@dolcini.it Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/eeprom/at25.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 1715b0c9feea..c31e5e719525 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -28,6 +28,7 @@ properties: - anvo,anv32e61w - atmel,at25256B - fujitsu,mb85rs1mt + - fujitsu,mb85rs256 - fujitsu,mb85rs64 - microchip,at25160bn - microchip,25lc040 From 6339b7edada2d0c4661bc4200f1adfc80f2e24aa Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 30 Jul 2024 16:01:36 +0530 Subject: [PATCH 0309/1523] nvme: remove a field from nvme_ns_head pi_offset field is not required to be present in nvme_ns_head. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 16 ++++++++-------- drivers/nvme/host/nvme.h | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e8afb5a0f3a3..33fa01c599ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -36,6 +36,7 @@ struct nvme_ns_info { struct nvme_ns_ids ids; u32 nsid; __le32 anagrpid; + u8 pi_offset; bool is_shared; bool is_readonly; bool is_ready; @@ -1758,7 +1759,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static bool nvme_init_integrity(struct nvme_ns_head *head, - struct queue_limits *lim) + struct queue_limits *lim, struct nvme_ns_info *info) { struct blk_integrity *bi = &lim->integrity; @@ -1816,7 +1817,7 @@ static bool nvme_init_integrity(struct nvme_ns_head *head, } bi->tuple_size = head->ms; - bi->pi_offset = head->pi_offset; + bi->pi_offset = info->pi_offset; return true; } @@ -1902,12 +1903,11 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, static void nvme_configure_metadata(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id, - struct nvme_id_ns_nvm *nvm) + struct nvme_id_ns_nvm *nvm, struct nvme_ns_info *info) { head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); head->pi_type = 0; head->pi_size = 0; - head->pi_offset = 0; head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; @@ -1922,7 +1922,7 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - head->pi_offset = head->ms - head->pi_size; + info->pi_offset = head->ms - head->pi_size; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -2156,7 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; @@ -2176,7 +2176,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->head, &lim)) + if (!nvme_init_integrity(ns->head, &lim, info)) capacity = 0; ret = queue_limits_commit_update(ns->disk->queue, &lim); @@ -2280,7 +2280,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else - nvme_init_integrity(ns->head, &lim); + nvme_init_integrity(ns->head, &lim, info); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f900e44243ae..c6386af76d24 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -474,7 +474,6 @@ struct nvme_ns_head { u16 ms; u16 pi_size; u8 pi_type; - u8 pi_offset; u8 guard_type; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; From 73d148ccb9e1b62cdcb65e1c6a461229446a55a2 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 30 Jul 2024 16:01:37 +0530 Subject: [PATCH 0310/1523] nvme: change data type of lba_shift u8 fits the need, so stop using int for it. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c6386af76d24..6039dc78b36e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -470,7 +470,7 @@ struct nvme_ns_head { struct nvme_effects_log *effects; u64 nuse; unsigned ns_id; - int lba_shift; + u8 lba_shift; u16 ms; u16 pi_size; u8 pi_type; From b4c1f33a5d59f577814d87704c45a745a35d8bd9 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 30 Jul 2024 16:01:38 +0530 Subject: [PATCH 0311/1523] nvme: reorganize nvme_ns_head fields shuffle few fields to reduce the holes within nvme_ns_head. On x86_64, the size is reduced to 1104 bytes from 1120 bytes. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6039dc78b36e..ae5314d32943 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -462,19 +462,19 @@ struct nvme_ns_head { struct srcu_struct srcu; struct nvme_subsystem *subsys; struct nvme_ns_ids ids; - struct list_head entry; - struct kref ref; - bool shared; - bool passthru_err_log_enabled; - int instance; - struct nvme_effects_log *effects; - u64 nuse; - unsigned ns_id; u8 lba_shift; u16 ms; u16 pi_size; u8 pi_type; u8 guard_type; + struct list_head entry; + struct kref ref; + bool shared; + bool passthru_err_log_enabled; + struct nvme_effects_log *effects; + u64 nuse; + unsigned ns_id; + int instance; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif From e9b36c5be2e7fdef2cc933c1dac50bd81881e9b8 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:50 +0300 Subject: [PATCH 0312/1523] drm/dp_mst: Factor out function to queue a topology probe work Factor out a function to queue a work for probing the topology, also used by the next patch. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-2-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 7f8e1cfbe19d..eb1b7b286b76 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -2692,6 +2692,11 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) drm_kms_helper_hotplug_event(dev); } +static void drm_dp_mst_queue_probe_work(struct drm_dp_mst_topology_mgr *mgr) +{ + queue_work(system_long_wq, &mgr->work); +} + static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, u8 *guid) { @@ -3685,7 +3690,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* Write reset payload */ drm_dp_dpcd_write_payload(mgr, 0, 0, 0x3f); - queue_work(system_long_wq, &mgr->work); + drm_dp_mst_queue_probe_work(mgr); ret = 0; } else { @@ -3809,7 +3814,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, * state of our in-memory topology back into sync with reality. So, * restart the probing process as if we're probing a new hub */ - queue_work(system_long_wq, &mgr->work); + drm_dp_mst_queue_probe_work(mgr); mutex_unlock(&mgr->lock); if (sync) { From dbaeef363ea54f4c18112874b77503c72ba60fec Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:51 +0300 Subject: [PATCH 0313/1523] drm/dp_mst: Add a helper to queue a topology probe A follow up i915 patch will need to reprobe the MST topology after the initial probing, add a helper for this. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-3-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 27 +++++++++++++++++++ include/drm/display/drm_dp_mst_helper.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index eb1b7b286b76..0fbc9bbed5d8 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3728,6 +3728,33 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb) drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb); } +/** + * drm_dp_mst_topology_queue_probe - Queue a topology probe + * @mgr: manager to probe + * + * Queue a work to probe the MST topology. Driver's should call this only to + * sync the topology's HW->SW state after the MST link's parameters have + * changed in a way the state could've become out-of-sync. This is the case + * for instance when the link rate between the source and first downstream + * branch device has switched between UHBR and non-UHBR rates. Except of those + * cases - for instance when a sink gets plugged/unplugged to a port - the SW + * state will get updated automatically via MST UP message notifications. + */ +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr) +{ + mutex_lock(&mgr->lock); + + if (drm_WARN_ON(mgr->dev, !mgr->mst_state || !mgr->mst_primary)) + goto out_unlock; + + drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary); + drm_dp_mst_queue_probe_work(mgr); + +out_unlock: + mutex_unlock(&mgr->lock); +} +EXPORT_SYMBOL(drm_dp_mst_topology_queue_probe); + /** * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager * @mgr: manager to suspend diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94..02b037d3a93f 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -885,6 +885,8 @@ int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr); void drm_dp_mst_dump_topology(struct seq_file *m, struct drm_dp_mst_topology_mgr *mgr); +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr); + void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr); int __must_check drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, From 0cf09702a118b1c09b694862061913108565edd0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:52 +0300 Subject: [PATCH 0314/1523] drm/dp_mst: Simplify the condition when to enumerate path resources In the if (old_ddps != port->ddps || !created) if (port->ddps && !port->input) ret = drm_dp_send_enum_path_resources(); sequence the first if's condition is true if the port exists already (!created) or the port was created anew (hence old_ddps==0) and it was in the plugged state (port->ddps==1). The second if's condition is true for output ports in the plugged state. So the function is called for an output port in the plugged state, regardless if it already existed or not and regardless of the old plugged state. In all other cases port->full_pbn can be zeroed as the port is either an input for which full_pbn is never set, or an output in the unplugged state for which full_pbn was already zeroed previously or the port was just created (with port->full_pbn==0). Simplify the condition, making it clear that the path resources are always enumerated for an output port in the plugged state. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-4-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 0fbc9bbed5d8..a892ef8e8a2d 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -2339,7 +2339,7 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, { struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; struct drm_dp_mst_port *port; - int old_ddps = 0, ret; + int ret; u8 new_pdt = DP_PEER_DEVICE_NONE; bool new_mcs = 0; bool created = false, send_link_addr = false, changed = false; @@ -2372,7 +2372,6 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, */ drm_modeset_lock(&mgr->base.lock, NULL); - old_ddps = port->ddps; changed = port->ddps != port_msg->ddps || (port->ddps && (port->ldps != port_msg->legacy_device_plug_status || @@ -2407,15 +2406,13 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, * Reprobe PBN caps on both hotplug, and when re-probing the link * for our parent mstb */ - if (old_ddps != port->ddps || !created) { - if (port->ddps && !port->input) { - ret = drm_dp_send_enum_path_resources(mgr, mstb, - port); - if (ret == 1) - changed = true; - } else { - port->full_pbn = 0; - } + if (port->ddps && !port->input) { + ret = drm_dp_send_enum_path_resources(mgr, mstb, + port); + if (ret == 1) + changed = true; + } else { + port->full_pbn = 0; } ret = drm_dp_port_set_pdt(port, new_pdt, new_mcs); From a64d5fe490cd92d74b1cf46da63164cefcc61a58 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:53 +0300 Subject: [PATCH 0315/1523] drm/i915/ddi: For an active output call the DP encoder sync_state() only for DP If the DDI encoder output is enabled in HDMI mode there is no point in calling intel_dp_sync_state(), as in that case the DPCD initialization will fail - as expected - with AUX timeouts. Prevent calling the hook in this case. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a07aca96e551..11ee4406dce8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4172,7 +4172,8 @@ static void intel_ddi_sync_state(struct intel_encoder *encoder, intel_tc_port_sanitize_mode(enc_to_dig_port(encoder), crtc_state); - if (intel_encoder_is_dp(encoder)) + if ((crtc_state && intel_crtc_has_dp_encoder(crtc_state)) || + (!crtc_state && intel_encoder_is_dp(encoder))) intel_dp_sync_state(encoder, crtc_state); } From a3f91f405aa7c54d0856c1b8698e4ff05ae7d439 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:54 +0300 Subject: [PATCH 0316/1523] drm/i915/dp: Initialize the link parameters during HW readout Initialize the DP link parameters during HW readout. These need to be up-to-date at least for the MST topology probing, which depends on the link rate and lane count programmed in DPCD. A follow-up patch will program the DPCD values to reflect the maximum link parameters before the first MST topology probing, but should do so only if the link is disabled (link_trained==false). Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-6-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 86412ae7b48f..32964e15f3b1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3352,8 +3352,11 @@ void intel_dp_sync_state(struct intel_encoder *encoder, intel_dp_tunnel_resume(intel_dp, crtc_state, dpcd_updated); - if (crtc_state) + if (crtc_state) { intel_dp_reset_link_params(intel_dp); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count); + intel_dp->link_trained = true; + } } bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, From a4530e20faddcc4ef4e5939f186a8560c71a5e94 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:55 +0300 Subject: [PATCH 0317/1523] drm/i915/dp: Send only a single modeset-retry uevent for a commit There are multiple failure cases a modeset-retry uevent can be sent for a link (TBT tunnel BW allocation failure, unrecoverable link training failure), a follow-up patch adding the handling for a new case where the DP MST payload allocation fails. The uevent is the same in all cases, sent to all the connectors on the link, so in case of multiple failures there is no point in sending a separate uevent for each failure; prevent this, sending only a single modeset-retry uevent for a commit. Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-7-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index a9d2acdc51a4..3501125c5515 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1754,6 +1754,7 @@ struct intel_dp { u8 lane_count; u8 sink_count; bool link_trained; + bool needs_modeset_retry; bool use_max_params; u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 32964e15f3b1..e52c652b26a9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2876,6 +2876,11 @@ intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, struct intel_dp *intel_dp = enc_to_intel_dp(encoder); int i; + if (intel_dp->needs_modeset_retry) + return; + + intel_dp->needs_modeset_retry = true; + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { intel_dp_queue_modeset_retry_work(intel_dp->attached_connector); @@ -3009,6 +3014,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, { memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); intel_dp->link_trained = false; + intel_dp->needs_modeset_retry = false; intel_dp->link_rate = link_rate; intel_dp->lane_count = lane_count; } From 634e1804242b0d5d64031199236fb3f337b5af3d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:56 +0300 Subject: [PATCH 0318/1523] drm/i915/dp: Add a separate function to reduce the link parameters A follow-up patch will add an alternative way to reduce the link parameters in BW order on MST links, prepare for that here. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-8-imre.deak@intel.com --- .../drm/i915/display/intel_dp_link_training.c | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 58dea87a9fa2..57536ae200b7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1193,6 +1193,36 @@ static int reduce_lane_count(struct intel_dp *intel_dp, int current_lane_count) return current_lane_count >> 1; } +static bool reduce_link_params_in_rate_lane_order(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + int link_rate; + int lane_count; + + lane_count = crtc_state->lane_count; + link_rate = reduce_link_rate(intel_dp, crtc_state->port_clock); + if (link_rate < 0) { + lane_count = reduce_lane_count(intel_dp, crtc_state->lane_count); + link_rate = intel_dp_max_common_rate(intel_dp); + } + + if (lane_count < 0) + return false; + + *new_link_rate = link_rate; + *new_lane_count = lane_count; + + return true; +} + +static bool reduce_link_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + return reduce_link_params_in_rate_lane_order(intel_dp, crtc_state, + new_link_rate, new_lane_count); +} + static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -1206,14 +1236,7 @@ static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } - new_lane_count = crtc_state->lane_count; - new_link_rate = reduce_link_rate(intel_dp, crtc_state->port_clock); - if (new_link_rate < 0) { - new_lane_count = reduce_lane_count(intel_dp, crtc_state->lane_count); - new_link_rate = intel_dp_max_common_rate(intel_dp); - } - - if (new_lane_count < 0) + if (!reduce_link_params(intel_dp, crtc_state, &new_link_rate, &new_lane_count)) return -1; if (intel_dp_is_edp(intel_dp) && From 96c468c366dacc0e41e08ac53e20a5025f6ba967 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:57 +0300 Subject: [PATCH 0319/1523] drm/i915/dp: Add helpers to set link training mode, BW parameters Add helpers to set the link mode and BW parameters. These are required by a follow-up patch setting the parameters for a disabled link. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-9-imre.deak@intel.com --- .../drm/i915/display/intel_dp_link_training.c | 34 +++++++++++++------ .../drm/i915/display/intel_dp_link_training.h | 6 ++++ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 57536ae200b7..214c8858b8a9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -21,6 +21,8 @@ * IN THE SOFTWARE. */ +#include + #include "i915_drv.h" #include "intel_display_types.h" #include "intel_dp.h" @@ -703,26 +705,28 @@ static bool intel_dp_link_max_vswing_reached(struct intel_dp *intel_dp, return true; } -static void -intel_dp_update_downspread_ctrl(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +void intel_dp_link_training_set_mode(struct intel_dp *intel_dp, int link_rate, bool is_vrr) { u8 link_config[2]; - link_config[0] = crtc_state->vrr.flipline ? DP_MSA_TIMING_PAR_IGNORE_EN : 0; - link_config[1] = intel_dp_is_uhbr(crtc_state) ? + link_config[0] = is_vrr ? DP_MSA_TIMING_PAR_IGNORE_EN : 0; + link_config[1] = drm_dp_is_uhbr_rate(link_rate) ? DP_SET_ANSI_128B132B : DP_SET_ANSI_8B10B; drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2); } -static void -intel_dp_update_link_bw_set(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - u8 link_bw, u8 rate_select) +static void intel_dp_update_downspread_ctrl(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { - u8 lane_count = crtc_state->lane_count; + intel_dp_link_training_set_mode(intel_dp, + crtc_state->port_clock, crtc_state->vrr.flipline); +} - if (crtc_state->enhanced_framing) +void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, + int link_bw, int rate_select, int lane_count, + bool enhanced_framing) +{ + if (enhanced_framing) lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; if (link_bw) { @@ -746,6 +750,14 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, } } +static void intel_dp_update_link_bw_set(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + u8 link_bw, u8 rate_select) +{ + intel_dp_link_training_set_bw(intel_dp, link_bw, rate_select, crtc_state->lane_count, + crtc_state->enhanced_framing); +} + /* * Prepare link training by configuring the link parameters. On DDI platforms * also enable the port here. diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h index 42e7fc6cb171..2066b9146762 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h @@ -16,6 +16,12 @@ struct intel_dp; int intel_dp_read_dprx_caps(struct intel_dp *intel_dp, u8 dpcd[DP_RECEIVER_CAP_SIZE]); int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp); +void intel_dp_link_training_set_mode(struct intel_dp *intel_dp, + int link_rate, bool is_vrr); +void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, + int link_bw, int rate_select, int lane_count, + bool enhanced_framing); + void intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, From aa705f7ec6e2423af87c20b30f0e26eafdfd4513 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 29 Jul 2024 17:44:58 +0300 Subject: [PATCH 0320/1523] drm/i915/dp_mst: Reduce the link parameters in BW order after LT failures On MST links - at least for some MST branch devices - the list of modes returned to users on an enabled link depends on the current link rate/lane count parameters (besides the DPRX link capabilities, any MST branch BW limit and the maximum link parameters reduced after LT failures). In particular the MST branch BW limit may depend on the link rate/lane count parameters programmed to DPCD. After an LT failure and limiting the maximum link parameters accordingly, users should see a mode list reflecting these new limits. However with the current fallback order this isn't ensured, as the new limit could allow for modes requiring a higher link BW, but these modes will be filtered out due to the enabled link's lower link BW. Ensure that the mode list changes in a consistent way after a link training failure and reducing the link parameters by changing the fallback order on MST links to happen in BW order. v2: - s/INTEL_DP_MAX_SUPPORTED_LANE_COUNTS/INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS and s/num_common_lane_counts/num_common_lane_configs to make the difference wrt. max lane counts clearer. (Suraj) - Add a TODO comment to make the SST fallback logic work the same way as MST. (Arun) - Use sort_r()'s default swap function instead of a custom one. Cc: Suraj Kandpal Cc: Arun R Murthy Reviewed-by: Suraj Kandpal Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240729144458.2763667-1-imre.deak@intel.com --- .../drm/i915/display/intel_display_types.h | 12 ++ drivers/gpu/drm/i915/display/intel_dp.c | 103 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 2 + .../drm/i915/display/intel_dp_link_training.c | 44 +++++++- 4 files changed, 159 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 3501125c5515..ac98bb57456e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1778,6 +1778,18 @@ struct intel_dp { int common_rates[DP_MAX_SUPPORTED_RATES]; struct { /* TODO: move the rest of link specific fields to here */ + /* common rate,lane_count configs in bw order */ + int num_configs; +#define INTEL_DP_MAX_LANE_COUNT 4 +#define INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS (ilog2(INTEL_DP_MAX_LANE_COUNT) + 1) +#define INTEL_DP_LANE_COUNT_EXP_BITS order_base_2(INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS) +#define INTEL_DP_LINK_RATE_IDX_BITS (BITS_PER_TYPE(u8) - INTEL_DP_LANE_COUNT_EXP_BITS) +#define INTEL_DP_MAX_LINK_CONFIGS (DP_MAX_SUPPORTED_RATES * \ + INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS) + struct intel_dp_link_config { + u8 link_rate_idx:INTEL_DP_LINK_RATE_IDX_BITS; + u8 lane_count_exp:INTEL_DP_LANE_COUNT_EXP_BITS; + } configs[INTEL_DP_MAX_LINK_CONFIGS]; /* Max lane count for the current link */ int max_lane_count; /* Max rate for the current link */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e52c652b26a9..c8cf3448fedd 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -634,6 +635,106 @@ int intel_dp_rate_index(const int *rates, int len, int rate) return -1; } +static int intel_dp_link_config_rate(struct intel_dp *intel_dp, + const struct intel_dp_link_config *lc) +{ + return intel_dp_common_rate(intel_dp, lc->link_rate_idx); +} + +static int intel_dp_link_config_lane_count(const struct intel_dp_link_config *lc) +{ + return 1 << lc->lane_count_exp; +} + +static int intel_dp_link_config_bw(struct intel_dp *intel_dp, + const struct intel_dp_link_config *lc) +{ + return drm_dp_max_dprx_data_rate(intel_dp_link_config_rate(intel_dp, lc), + intel_dp_link_config_lane_count(lc)); +} + +static int link_config_cmp_by_bw(const void *a, const void *b, const void *p) +{ + struct intel_dp *intel_dp = (struct intel_dp *)p; /* remove const */ + const struct intel_dp_link_config *lc_a = a; + const struct intel_dp_link_config *lc_b = b; + int bw_a = intel_dp_link_config_bw(intel_dp, lc_a); + int bw_b = intel_dp_link_config_bw(intel_dp, lc_b); + + if (bw_a != bw_b) + return bw_a - bw_b; + + return intel_dp_link_config_rate(intel_dp, lc_a) - + intel_dp_link_config_rate(intel_dp, lc_b); +} + +static void intel_dp_link_config_init(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_dp_link_config *lc; + int num_common_lane_configs; + int i; + int j; + + if (drm_WARN_ON(&i915->drm, !is_power_of_2(intel_dp_max_common_lane_count(intel_dp)))) + return; + + num_common_lane_configs = ilog2(intel_dp_max_common_lane_count(intel_dp)) + 1; + + if (drm_WARN_ON(&i915->drm, intel_dp->num_common_rates * num_common_lane_configs > + ARRAY_SIZE(intel_dp->link.configs))) + return; + + intel_dp->link.num_configs = intel_dp->num_common_rates * num_common_lane_configs; + + lc = &intel_dp->link.configs[0]; + for (i = 0; i < intel_dp->num_common_rates; i++) { + for (j = 0; j < num_common_lane_configs; j++) { + lc->lane_count_exp = j; + lc->link_rate_idx = i; + + lc++; + } + } + + sort_r(intel_dp->link.configs, intel_dp->link.num_configs, + sizeof(intel_dp->link.configs[0]), + link_config_cmp_by_bw, NULL, + intel_dp); +} + +void intel_dp_link_config_get(struct intel_dp *intel_dp, int idx, int *link_rate, int *lane_count) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + const struct intel_dp_link_config *lc; + + if (drm_WARN_ON(&i915->drm, idx < 0 || idx >= intel_dp->link.num_configs)) + idx = 0; + + lc = &intel_dp->link.configs[idx]; + + *link_rate = intel_dp_link_config_rate(intel_dp, lc); + *lane_count = intel_dp_link_config_lane_count(lc); +} + +int intel_dp_link_config_index(struct intel_dp *intel_dp, int link_rate, int lane_count) +{ + int link_rate_idx = intel_dp_rate_index(intel_dp->common_rates, intel_dp->num_common_rates, + link_rate); + int lane_count_exp = ilog2(lane_count); + int i; + + for (i = 0; i < intel_dp->link.num_configs; i++) { + const struct intel_dp_link_config *lc = &intel_dp->link.configs[i]; + + if (lc->lane_count_exp == lane_count_exp && + lc->link_rate_idx == link_rate_idx) + return i; + } + + return -1; +} + static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); @@ -652,6 +753,8 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } + + intel_dp_link_config_init(intel_dp); } static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 9be539edf817..1b9aaddd8c35 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -107,6 +107,8 @@ int intel_dp_max_common_rate(struct intel_dp *intel_dp); int intel_dp_max_common_lane_count(struct intel_dp *intel_dp); int intel_dp_common_rate(struct intel_dp *intel_dp, int index); int intel_dp_rate_index(const int *rates, int len, int rate); +int intel_dp_link_config_index(struct intel_dp *intel_dp, int link_rate, int lane_count); +void intel_dp_link_config_get(struct intel_dp *intel_dp, int idx, int *link_rate, int *lane_count); void intel_dp_update_sink_caps(struct intel_dp *intel_dp); void intel_dp_reset_link_params(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 214c8858b8a9..86d58a4c8453 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1170,6 +1170,41 @@ static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, return true; } +static bool reduce_link_params_in_bw_order(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + int link_rate; + int lane_count; + int i; + + i = intel_dp_link_config_index(intel_dp, crtc_state->port_clock, crtc_state->lane_count); + for (i--; i >= 0; i--) { + intel_dp_link_config_get(intel_dp, i, &link_rate, &lane_count); + + if ((intel_dp->link.force_rate && + intel_dp->link.force_rate != link_rate) || + (intel_dp->link.force_lane_count && + intel_dp->link.force_lane_count != lane_count)) + continue; + + /* TODO: Make switching from UHBR to non-UHBR rates work. */ + if (drm_dp_is_uhbr_rate(crtc_state->port_clock) != + drm_dp_is_uhbr_rate(link_rate)) + continue; + + break; + } + + if (i < 0) + return false; + + *new_link_rate = link_rate; + *new_lane_count = lane_count; + + return true; +} + static int reduce_link_rate(struct intel_dp *intel_dp, int current_rate) { int rate_index; @@ -1231,8 +1266,13 @@ static bool reduce_link_params_in_rate_lane_order(struct intel_dp *intel_dp, static bool reduce_link_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, int *new_link_rate, int *new_lane_count) { - return reduce_link_params_in_rate_lane_order(intel_dp, crtc_state, - new_link_rate, new_lane_count); + /* TODO: Use the same fallback logic on SST as on MST. */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + return reduce_link_params_in_bw_order(intel_dp, crtc_state, + new_link_rate, new_lane_count); + else + return reduce_link_params_in_rate_lane_order(intel_dp, crtc_state, + new_link_rate, new_lane_count); } static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, From 8f1fe39ded725483a67c15014a1863179c783b38 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:59 +0300 Subject: [PATCH 0321/1523] drm/i915/dp_mst: Configure MST after the link parameters are reset The MST topology probing depends on the maximum link parameters - programmed to DPCD if required by a follow-up patch - so make sure these parameters are up-to-date before configuring and probing the MST topology. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-11-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c8cf3448fedd..d707ad666993 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6020,13 +6020,13 @@ intel_dp_detect(struct drm_connector *connector, intel_dp_detect_sdp_caps(intel_dp); - intel_dp_mst_configure(intel_dp); - if (intel_dp->reset_link_params) { intel_dp_reset_link_params(intel_dp); intel_dp->reset_link_params = false; } + intel_dp_mst_configure(intel_dp); + intel_dp_print_rates(intel_dp); if (intel_dp->is_mst) { From 9d1f218ddce20329a01c46d7a1ea4d88fdb0fe86 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:55:00 +0300 Subject: [PATCH 0322/1523] drm/i915/dp_mst: Queue modeset-retry after a failed payload BW allocation If the MST payload allocation failed, enabling the output also failed most probably, so send a uevent accordinly requesting the user to retry the modeset. While at it remove the driver specific debug message, there is already one printed by drm_dp_add_payload_part1(). Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-12-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27ce5c3f5951..57f29906fa28 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1158,8 +1158,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, ret = drm_dp_add_payload_part1(&intel_dp->mst_mgr, mst_state, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (ret < 0) - drm_dbg_kms(&dev_priv->drm, "Failed to create MST payload for %s: %d\n", - connector->base.name, ret); + intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); /* * Before Gen 12 this is not done as part of @@ -1223,6 +1222,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, enum transcoder trans = pipe_config->cpu_transcoder; bool first_mst_stream = intel_dp->active_mst_links == 1; struct intel_crtc *pipe_crtc; + int ret; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); @@ -1254,8 +1254,11 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (first_mst_stream) intel_ddi_wait_for_fec_status(encoder, pipe_config, true); - drm_dp_add_payload_part2(&intel_dp->mst_mgr, - drm_atomic_get_mst_payload_state(mst_state, connector->port)); + ret = drm_dp_add_payload_part2(&intel_dp->mst_mgr, + drm_atomic_get_mst_payload_state(mst_state, + connector->port)); + if (ret < 0) + intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); if (DISPLAY_VER(dev_priv) >= 12) intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, trans), From e40fbf616610e33aba9597f278925b4ae8cc806c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:55:01 +0300 Subject: [PATCH 0323/1523] drm/i915/dp_mst: Reprobe the MST topology after a link parameter change The MST link BW reported by branch devices via the ENUM_PATH_RESOURCES message depends on the channel coding and link rate/lane count parameters programmed to DPCD. This is the case at least for some branch devices, while for others the reported BW is independent of the link parameters. In any case the DP standard requires the branch device to adjust the returned value to both account for the different way the BW for FEC is accounted for (included in the returned value for non-UHBR and not included for UHBR rates) and to limit the returned value to the (trained) link BW between the source and first downstream branch device, see DP v2.0/v2.1 Figure 2-94, DP v2.1 5.9.7. Presumedly this is also the reason why the standard requires the DPCD link rate/lane count values being up-to-date before sending the ENUM_PATH_RESOURCES message, see DP v2.1 2.14.9.4. Based on the above reprobe the MST topology after the link is retrained with new link parameters to make sure that the MST link BW tracked in the MST topology state (via each topology port's full_pbn value) is up-to-date. The next patch will make sure that the MST link BW is also kept up-to-date if the link is disabled. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-13-imre.deak@intel.com --- .../drm/i915/display/intel_display_types.h | 8 +++++ drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 32 ++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ac98bb57456e..327a4133030f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1794,6 +1794,14 @@ struct intel_dp { int max_lane_count; /* Max rate for the current link */ int max_rate; + /* + * Link parameters for which the MST topology was probed. + * Tracking these ensures that the MST path resources are + * re-enumerated whenever the link is retrained with new link + * parameters, as required by the DP standard. + */ + int mst_probed_lane_count; + int mst_probed_rate; int force_lane_count; int force_rate; bool retrain_disabled; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d707ad666993..5523469404f9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3126,6 +3126,8 @@ void intel_dp_reset_link_params(struct intel_dp *intel_dp) { intel_dp->link.max_lane_count = intel_dp_max_common_lane_count(intel_dp); intel_dp->link.max_rate = intel_dp_max_common_rate(intel_dp); + intel_dp->link.mst_probed_lane_count = 0; + intel_dp->link.mst_probed_rate = 0; intel_dp->link.retrain_disabled = false; intel_dp->link.seq_train_failures = 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 57f29906fa28..19c8b6878b03 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1113,6 +1113,33 @@ static void intel_mst_pre_pll_enable_dp(struct intel_atomic_state *state, to_intel_crtc(pipe_config->uapi.crtc)); } +static bool intel_mst_probed_link_params_valid(struct intel_dp *intel_dp, + int link_rate, int lane_count) +{ + return intel_dp->link.mst_probed_rate == link_rate && + intel_dp->link.mst_probed_lane_count == lane_count; +} + +static void intel_mst_set_probed_link_params(struct intel_dp *intel_dp, + int link_rate, int lane_count) +{ + intel_dp->link.mst_probed_rate = link_rate; + intel_dp->link.mst_probed_lane_count = lane_count; +} + +static void intel_mst_reprobe_topology(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + if (intel_mst_probed_link_params_valid(intel_dp, + crtc_state->port_clock, crtc_state->lane_count)) + return; + + drm_dp_mst_topology_queue_probe(&intel_dp->mst_mgr); + + intel_mst_set_probed_link_params(intel_dp, + crtc_state->port_clock, crtc_state->lane_count); +} + static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, @@ -1149,10 +1176,13 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, intel_dp_sink_enable_decompression(state, connector, pipe_config); - if (first_mst_stream) + if (first_mst_stream) { dig_port->base.pre_enable(state, &dig_port->base, pipe_config, NULL); + intel_mst_reprobe_topology(intel_dp, pipe_config); + } + intel_dp->active_mst_links++; ret = drm_dp_add_payload_part1(&intel_dp->mst_mgr, mst_state, From e44bc451aa4b08845c238028daafb4eaffb573a3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:55:02 +0300 Subject: [PATCH 0324/1523] drm/i915/dp_mst: Ensure link parameters are up-to-date for a disabled link As explained in the previous patch, the MST link BW reported by branch devices during topology probing/path resources enumeration depends on the link parameters programmed to DPCD to be up-to-date. After a sink is plugged this is not ensured, as those DPCD values start out zeroed. The target link parameters (for a subsequent modeset) are the maximum that is supported, so make sure these maximum values are programmed before the topology probing. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-14-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 3 ++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 31 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.h | 1 + 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5523469404f9..09789d62001f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4254,6 +4254,9 @@ intel_dp_mst_configure(struct intel_dp *intel_dp) intel_dp->is_mst = intel_dp->mst_detect != DRM_DP_SST; + if (intel_dp->is_mst) + intel_dp_mst_prepare_probe(intel_dp); + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); /* Avoid stale info on the next detect cycle. */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 19c8b6878b03..faee7af0a8a4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -43,6 +43,7 @@ #include "intel_dp_hdcp.h" #include "intel_dp_mst.h" #include "intel_dp_tunnel.h" +#include "intel_dp_link_training.h" #include "intel_dpio_phy.h" #include "intel_hdcp.h" #include "intel_hotplug.h" @@ -2031,3 +2032,33 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, return false; } + +/** + * intel_dp_mst_prepare_probe - Prepare an MST link for topology probing + * @intel_dp: DP port object + * + * Prepare an MST link for topology probing, programming the target + * link parameters to DPCD. This step is a requirement of the enumaration + * of path resources during probing. + */ +void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp) +{ + int link_rate = intel_dp_max_link_rate(intel_dp); + int lane_count = intel_dp_max_lane_count(intel_dp); + u8 rate_select; + u8 link_bw; + + if (intel_dp->link_trained) + return; + + if (intel_mst_probed_link_params_valid(intel_dp, link_rate, lane_count)) + return; + + intel_dp_compute_rate(intel_dp, link_rate, &link_bw, &rate_select); + + intel_dp_link_training_set_mode(intel_dp, link_rate, false); + intel_dp_link_training_set_bw(intel_dp, link_bw, rate_select, lane_count, + drm_dp_enhanced_frame_cap(intel_dp->dpcd)); + + intel_mst_set_probed_link_params(intel_dp, link_rate, lane_count); +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 8ca1d599091c..fba76454fa67 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -27,5 +27,6 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, struct intel_link_bw_limits *limits); bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, struct intel_crtc *crtc); +void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp); #endif /* __INTEL_DP_MST_H__ */ From 5c7b393452b55ee0cdbced0d3bc798f6875b5c24 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:55:03 +0300 Subject: [PATCH 0325/1523] drm/i915/dp_mst: Enable LT fallback between UHBR/non-UHBR link rates Enable switching between UHBR and non-UHBR link rates on MST links when reducing the link parameters after an LT failure. Reviewed-by: Suraj Kandpal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-15-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 86d58a4c8453..9c8738295106 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1188,11 +1188,6 @@ static bool reduce_link_params_in_bw_order(struct intel_dp *intel_dp, intel_dp->link.force_lane_count != lane_count)) continue; - /* TODO: Make switching from UHBR to non-UHBR rates work. */ - if (drm_dp_is_uhbr_rate(crtc_state->port_clock) != - drm_dp_is_uhbr_rate(link_rate)) - continue; - break; } From 684890a0185dabf5920c43b639133adc4c2632cf Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 31 Jul 2024 10:33:09 +0100 Subject: [PATCH 0326/1523] Input: adc-joystick - fix optional value handling The abs-fuzz and abs-flat properties are documented as optional. When these are absent, fwnode_property_read_u32() will leave the input unchanged, meaning that an axis either picks up the value for the previous axis or an uninitialized value. Explicitly set these values to zero when they are unspecified to match the documented behaviour in the device tree bindings. Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20240731093310.3696919-1-jkeeping@inmusicbrands.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/adc-joystick.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/adc-joystick.c b/drivers/input/joystick/adc-joystick.c index 5f46a7104b52..de1fa4cf291b 100644 --- a/drivers/input/joystick/adc-joystick.c +++ b/drivers/input/joystick/adc-joystick.c @@ -182,8 +182,11 @@ static int adc_joystick_set_axes(struct device *dev, struct adc_joystick *joy) swap(range[0], range[1]); } - fwnode_property_read_u32(child, "abs-fuzz", &fuzz); - fwnode_property_read_u32(child, "abs-flat", &flat); + if (fwnode_property_read_u32(child, "abs-fuzz", &fuzz)) + fuzz = 0; + + if (fwnode_property_read_u32(child, "abs-flat", &flat)) + flat = 0; input_set_abs_params(joy->input, axes[i].code, range[0], range[1], fuzz, flat); From 170c966cbe274e664288cfc12ee919d5e706dc50 Mon Sep 17 00:00:00 2001 From: Laura Nao Date: Tue, 30 Jul 2024 12:29:28 +0200 Subject: [PATCH 0327/1523] selftests: ksft: Fix finished() helper exit code on skipped tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Python finished() helper currently exits with KSFT_FAIL when there are only passed and skipped tests. Fix the logic to exit with KSFT_PASS instead, making it consistent with its C and bash counterparts (ksft_finished() and ktap_finished() respectively). Reviewed-by: Nícolas F. R. A. Prado Fixes: dacf1d7a78bf ("kselftest: Add test to verify probe of devices from discoverable buses") Signed-off-by: Laura Nao Reviewed-by: Muhammad Usama Anjum Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest/ksft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e..bf215790a89d 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -70,7 +70,7 @@ def test_result(condition, description=""): def finished(): - if ksft_cnt["pass"] == ksft_num_tests: + if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests: exit_code = KSFT_PASS else: exit_code = KSFT_FAIL From 39a3396558fb97e6e7d4c1eb04c2166da31904a9 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Tue, 30 Jul 2024 23:14:40 -0700 Subject: [PATCH 0328/1523] clk: thead: fix dependency on clk_ignore_unused Add the CLK_IGNORE_UNUSED flag to the vp-axi clock (CLK_VP_AXI) to avoid depending on clk_ignore_unused in the cmdline. Without this fix, the emmc-sdio clock (CLK_EMMC_SDIO) fails to work after vp-axi is disabled. Signed-off-by: Drew Fustini Link: https://lore.kernel.org/r/20240731061439.3807172-1-drew@pdp7.com Fixes: ae81b69fd2b1 ("clk: thead: Add support for T-Head TH1520 AP_SUBSYS clocks") Signed-off-by: Stephen Boyd --- drivers/clk/thead/clk-th1520-ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c09..17e32ae08720 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; From f1e6f89506e48b42235755fb3c2b73022697d8e5 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Thu, 1 Aug 2024 01:26:22 +0530 Subject: [PATCH 0329/1523] drm/xe/xe2: Enable Priority Mem Read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable feature to allow memory reads to take a priority memory path. This will reduce latency on the read path, but may introduce read after write (RAW) hazards as read and writes will no longer be ordered. To avoid RAW hazards, SW can use the MI_MEM_FENCE command or any other MI command that generates non posted memory writes. This will ensure data is coherent in memory prior to execution of commands which read data from memory. RCS,BCS and CCS support this feature. No pattern identified in KMD that could lead to a hazard. v2: Modify commit message, enable priority mem read feature for media, modify version range, modify bspec detail (Matt Roper) v3: Rebase, fix cramped line-wrapping (jcavitt) v4: Rebase v5: Media does not support Priority Mem Read. Modify commit to reflect the same. v6: Rebase Bspec: 60298, 60237, 60187, 60188 Signed-off-by: Pallavi Mishra Reviewed-by: Matt Roper Acked-by: José Roberto de Souza Acked-by: Carl Zhang Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240731195622.1868401-1-pallavi.mishra@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c38db2a74614..81b71903675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,6 +104,7 @@ #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define CS_PRIORITY_MEM_READ REG_BIT(7) #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 00ace5fcc284..403eb1d2d20a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -428,6 +428,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) 0xA, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; From 4da409ba6430e3b5c6da7879dbbdc5f6717d91b1 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Thu, 11 Jul 2024 10:19:05 +0530 Subject: [PATCH 0330/1523] drm/i915/dpkgc: Add VRR condition for DPKGC Enablement DPKGC can now be enabled with VRR enabled if Vmin = Vmax = Flipline is met. Bspec: 68986 Signed-off-by: Suraj Kandpal Reviewed-by: Mitul Golani Link: https://patchwork.freedesktop.org/patch/msgid/20240711044905.3306882-1-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 24 +++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index a2726364b34d..045c7cac166b 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -2830,17 +2830,17 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, } /* - * If Fixed Refresh Rate: + * If Fixed Refresh Rate or For VRR case Vmin = Vmax = Flipline: * Program DEEP PKG_C_LATENCY Pkg C with highest valid latency from * watermark level1 and up and above. If watermark level 1 is * invalid program it with all 1's. * Program PKG_C_LATENCY Added Wake Time = DSB execution time - * If Variable Refresh Rate: + * If Variable Refresh Rate where Vmin != Vmax != Flipline: * Program DEEP PKG_C_LATENCY Pkg C with all 1's. * Program PKG_C_LATENCY Added Wake Time = 0 */ static void -skl_program_dpkgc_latency(struct drm_i915_private *i915, bool vrr_enabled) +skl_program_dpkgc_latency(struct drm_i915_private *i915, bool enable_dpkgc) { u32 max_latency = 0; u32 clear = 0, val = 0; @@ -2849,15 +2849,15 @@ skl_program_dpkgc_latency(struct drm_i915_private *i915, bool vrr_enabled) if (DISPLAY_VER(i915) < 20) return; - if (vrr_enabled) { - max_latency = LNL_PKG_C_LATENCY_MASK; - added_wake_time = 0; - } else { + if (enable_dpkgc) { max_latency = skl_watermark_max_latency(i915, 1); if (max_latency == 0) max_latency = LNL_PKG_C_LATENCY_MASK; added_wake_time = DSB_EXE_TIME + i915->display.sagv.block_time_us; + } else { + max_latency = LNL_PKG_C_LATENCY_MASK; + added_wake_time = 0; } clear |= LNL_ADDED_WAKE_TIME_MASK | LNL_PKG_C_LATENCY_MASK; @@ -2873,7 +2873,7 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc *crtc; struct intel_crtc_state __maybe_unused *new_crtc_state; int ret, i; - bool vrr_enabled = false; + bool enable_dpkgc = false; for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { ret = skl_build_pipe_wm(state, crtc); @@ -2899,11 +2899,13 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; - if (new_crtc_state->vrr.enable) - vrr_enabled = true; + if ((new_crtc_state->vrr.vmin == new_crtc_state->vrr.vmax && + new_crtc_state->vrr.vmin == new_crtc_state->vrr.flipline) || + !new_crtc_state->vrr.enable) + enable_dpkgc = true; } - skl_program_dpkgc_latency(to_i915(state->base.dev), vrr_enabled); + skl_program_dpkgc_latency(to_i915(state->base.dev), enable_dpkgc); skl_print_wm_changes(state); From 688c43dd6ca9e0cd0568868aefca5b041695c3f4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 31 Jul 2024 14:07:44 +0300 Subject: [PATCH 0331/1523] drm/i915: remove unused HAS_BROKEN_CS_TLB() The last users have been removed years ago. Finish the job. Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240731110744.1572240-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d1d21d433766..bef3c5b0551f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -678,9 +678,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, ((sizes) & ~RUNTIME_INFO(i915)->page_sizes) == 0; \ }) -/* Early gen2 have a totally busted CS tlb and require pinned batches. */ -#define HAS_BROKEN_CS_TLB(i915) (IS_I830(i915) || IS_I845G(i915)) - #define NEEDS_RC6_CTX_CORRUPTION_WA(i915) \ (IS_BROADWELL(i915) || GRAPHICS_VER(i915) == 9) From 9374ae912dbb1eed8139ed75fd2c0f1b30ca454d Mon Sep 17 00:00:00 2001 From: Mengqi Zhang Date: Tue, 16 Jul 2024 09:37:04 +0800 Subject: [PATCH 0332/1523] mmc: mtk-sd: receive cmd8 data when hs400 tuning fail When we use cmd8 as the tuning command in hs400 mode, the command response sent back by some eMMC devices cannot be correctly sampled by MTK eMMC controller at some weak sample timing. In this case, command timeout error may occur. So we must receive the following data to make sure the next cmd8 send correctly. Signed-off-by: Mengqi Zhang Fixes: c4ac38c6539b ("mmc: mtk-sd: Add HS400 online tuning support") Cc: stable@vger.stable.com Link: https://lore.kernel.org/r/20240716013704.10578-1-mengqi.zhang@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index a94835b8ab93..e386f78e3267 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1230,7 +1230,7 @@ static bool msdc_cmd_done(struct msdc_host *host, int events, } if (!sbc_error && !(events & MSDC_INT_CMDRDY)) { - if (events & MSDC_INT_CMDTMO || + if ((events & MSDC_INT_CMDTMO && !host->hs400_tuning) || (!mmc_op_tuning(cmd->opcode) && !host->hs400_tuning)) /* * should not clear fifo/interrupt as the tune data @@ -1323,9 +1323,9 @@ static void msdc_start_command(struct msdc_host *host, static void msdc_cmd_next(struct msdc_host *host, struct mmc_request *mrq, struct mmc_command *cmd) { - if ((cmd->error && - !(cmd->error == -EILSEQ && - (mmc_op_tuning(cmd->opcode) || host->hs400_tuning))) || + if ((cmd->error && !host->hs400_tuning && + !(cmd->error == -EILSEQ && + mmc_op_tuning(cmd->opcode))) || (mrq->sbc && mrq->sbc->error)) msdc_request_done(host, mrq); else if (cmd == mrq->sbc) From a6e9c391d45b5865b61e569146304cff72821a5d Mon Sep 17 00:00:00 2001 From: Camila Alvarez Date: Tue, 30 Jul 2024 19:42:43 -0400 Subject: [PATCH 0333/1523] HID: cougar: fix slab-out-of-bounds Read in cougar_report_fixup report_fixup for the Cougar 500k Gaming Keyboard was not verifying that the report descriptor size was correct before accessing it Reported-by: syzbot+24c0361074799d02c452@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=24c0361074799d02c452 Signed-off-by: Camila Alvarez Reviewed-by: Silvan Jegen Signed-off-by: Jiri Kosina --- drivers/hid/hid-cougar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b..0fa785f52707 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); From 7354eb7f1558466e92e926802d36e69e42938ea9 Mon Sep 17 00:00:00 2001 From: Curtis Malainey Date: Wed, 31 Jul 2024 14:21:44 -0700 Subject: [PATCH 0334/1523] ASoC: SOF: Remove libraries from topology lookups Default firmware shipped in open source are not licensed for 3P libraries, therefore topologies should not reference them. If a OS wants to use 3P (that they have licensed) then they should use the appropriate topology override mechanisms. Fixes: 8a7d5d85ed2161 ("ASoC: SOF: mediatek: mt8195: Add devicetree support to select topologies") Signed-off-by: Curtis Malainey Cc: Wojciech Macek Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20240731212153.921327-1-cujomalainey@chromium.org Signed-off-by: Mark Brown --- sound/soc/sof/mediatek/mt8195/mt8195.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 24ae1d4959be..1c6e035fd313 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -573,7 +573,7 @@ static const struct snd_sof_dsp_ops sof_mt8195_ops = { static struct snd_sof_of_mach sof_mt8195_machs[] = { { .compatible = "google,tomato", - .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682-dts.tplg" + .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682.tplg" }, { .compatible = "mediatek,mt8195", .sof_tplg_filename = "sof-mt8195.tplg" From c0e0bde2c7e6bca2a1f195d82ef7598d521399e5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 31 Jul 2024 14:17:14 +0200 Subject: [PATCH 0335/1523] drm/i915: Use backlight power constants Replace FB_BLANK_ constants with their counterparts from the backlight subsystem. The values are identical, so there's no change in functionality or semantics. Signed-off-by: Thomas Zimmermann Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240731122311.1143153-3-tzimmermann@suse.de Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_backlight.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d1..6f678c039ed8 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -455,7 +455,7 @@ void intel_backlight_disable(const struct drm_connector_state *old_conn_state) mutex_lock(&i915->display.backlight.lock); if (panel->backlight.device) - panel->backlight.device->props.power = FB_BLANK_POWERDOWN; + panel->backlight.device->props.power = BACKLIGHT_POWER_OFF; panel->backlight.enabled = false; panel->backlight.funcs->disable(old_conn_state, 0); @@ -773,7 +773,7 @@ static void __intel_backlight_enable(const struct intel_crtc_state *crtc_state, panel->backlight.funcs->enable(crtc_state, conn_state, panel->backlight.level); panel->backlight.enabled = true; if (panel->backlight.device) - panel->backlight.device->props.power = FB_BLANK_UNBLANK; + panel->backlight.device->props.power = BACKLIGHT_POWER_ON; } void intel_backlight_enable(const struct intel_crtc_state *crtc_state, @@ -870,12 +870,12 @@ static int intel_backlight_device_update_status(struct backlight_device *bd) */ if (panel->backlight.enabled) { if (panel->backlight.power) { - bool enable = bd->props.power == FB_BLANK_UNBLANK && + bool enable = bd->props.power == BACKLIGHT_POWER_ON && bd->props.brightness != 0; panel->backlight.power(connector, enable); } } else { - bd->props.power = FB_BLANK_POWERDOWN; + bd->props.power = BACKLIGHT_POWER_OFF; } drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); @@ -945,9 +945,9 @@ int intel_backlight_device_register(struct intel_connector *connector) props.max_brightness); if (panel->backlight.enabled) - props.power = FB_BLANK_UNBLANK; + props.power = BACKLIGHT_POWER_ON; else - props.power = FB_BLANK_POWERDOWN; + props.power = BACKLIGHT_POWER_OFF; name = kstrdup_const("intel_backlight", GFP_KERNEL); if (!name) From 146458645e505f5eac498759bcd865cf7c0dfd9a Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Thu, 1 Aug 2024 16:54:24 +0530 Subject: [PATCH 0336/1523] drm/xe/hwmon: Fix PL1 disable flow in xe_hwmon_power_max_write In xe_hwmon_power_max_write, for PL1 disable supported case, instead of returning after PL1 disable, PL1 enable path was also being run. Fixed it by returning after disable. v2: Correct typo and grammar in commit message. (Jonathan) Signed-off-by: Karthik Poosa Fixes: fef6dd12b45a ("drm/xe/hwmon: Protect hwmon rw attributes with hwmon_lock") Reviewed-by: Jonathan Cavitt Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240801112424.1841766-1-karthik.poosa@intel.com --- drivers/gpu/drm/xe/xe_hwmon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..832ea81faeee 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ From 33eb1e5db351e2c0e652d878b66b8a6d4d013135 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Mar 2024 13:40:30 +1030 Subject: [PATCH 0337/1523] btrfs: factor out stripe length calculation into a helper Currently there are two locations which need to calculate the real length of a stripe (which can be at the end of a chunk, and the chunk size may not always be 64K aligned). Factor them into a helper as we're going to have a third user soon. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d7100018..439545710d76 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); From 63447b7dd40c6a9ae8d3bb70c11f4c46731823e3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Mar 2024 13:40:31 +1030 Subject: [PATCH 0338/1523] btrfs: scrub: update last_physical after scrubbing one stripe Currently sctx->stat.last_physical only got updated in the following cases: - When the last stripe of a non-RAID56 chunk is scrubbed This implies a pitfall, if the last stripe is at the chunk boundary, and we finished the scrub of the whole chunk, we won't update last_physical at all until the next chunk. - When a P/Q stripe of a RAID56 chunk is scrubbed This leads the following two problems: - sctx->stat.last_physical is not updated for a almost full chunk This is especially bad, affecting scrub resume, as the resume would start from last_physical, causing unnecessary re-scrub. - "btrfs scrub status" will not report any progress for a long time Fix the problem by properly updating @last_physical after each stripe is scrubbed. And since we're here, for the sake of consistency, use spin lock to protect the update of @last_physical, just like all the remaining call sites touching sctx->stat. Reported-by: Michel Palleau Link: https://lore.kernel.org/linux-btrfs/CAMFk-+igFTv2E8svg=cQ6o3e6CrR5QwgQ3Ok9EyRaEvvthpqCQ@mail.gmail.com/ Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 439545710d76..0de9162ff481 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1875,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2143,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2340,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; From 872617a0896fc7510b0b8f25d323670424461cfc Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 18 Jul 2024 14:46:23 -0700 Subject: [PATCH 0339/1523] btrfs: implement launder_folio for clearing dirty page reserve In the buffered write path, dirty pages can be said to "own" the qgroup reservation until they create an ordered_extent. It is possible for there to be outstanding dirty pages when a transaction is aborted, in which case there is no cancellation path for freeing this reservation and it is leaked. We do already walk the list of outstanding delalloc inodes in btrfs_destroy_delalloc_inodes() and call invalidate_inode_pages2() on them. This does *not* call btrfs_invalidate_folio(), as one might guess, but rather calls launder_folio() and release_folio(). Since this is a reservation associated with dirty pages only, rather than something associated with the private bit (ordered_extent is cancelled separately already in the cleanup transaction path), implementing this release should be done via launder_folio. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1d4e0a65494a..1659ed3a0ba9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7202,6 +7202,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10137,6 +10143,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, From 30479f31d44d47ed00ae0c7453d9b253537005b2 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 19 Jul 2024 16:49:08 -0700 Subject: [PATCH 0340/1523] btrfs: fix qgroup reserve leaks in cow_file_range In the buffered write path, the dirty page owns the qgroup reserve until it creates an ordered_extent. Therefore, any errors that occur before the ordered_extent is created must free that reservation, or else the space is leaked. The fstest generic/475 exercises various IO error paths, and is able to trigger errors in cow_file_range where we fail to get to allocating the ordered extent. Note that because we *do* clear delalloc, we are likely to remove the inode from the delalloc list, so the inodes/pages to not have invalidate/launder called on them in the commit abort path. This results in failures at the unmount stage of the test that look like: BTRFS: error (device dm-8 state EA) in cleanup_transaction:2018: errno=-5 IO failure BTRFS: error (device dm-8 state EA) in btrfs_replace_file_extents:2416: errno=-5 IO failure BTRFS warning (device dm-8 state EA): qgroup 0/5 has unreleased space, type 0 rsv 28672 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 22588 at fs/btrfs/disk-io.c:4333 close_ctree+0x222/0x4d0 [btrfs] Modules linked in: btrfs blake2b_generic libcrc32c xor zstd_compress raid6_pq CPU: 3 PID: 22588 Comm: umount Kdump: loaded Tainted: G W 6.10.0-rc7-gab56fde445b8 #21 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 RIP: 0010:close_ctree+0x222/0x4d0 [btrfs] RSP: 0018:ffffb4465283be00 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffffa1a1818e1000 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffb4465283bbe0 RDI: ffffa1a19374fcb8 RBP: ffffa1a1818e13c0 R08: 0000000100028b16 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000003 R12: ffffa1a18ad7972c R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f9168312b80(0000) GS:ffffa1a4afcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f91683c9140 CR3: 000000010acaa000 CR4: 00000000000006f0 Call Trace: ? close_ctree+0x222/0x4d0 [btrfs] ? __warn.cold+0x8e/0xea ? close_ctree+0x222/0x4d0 [btrfs] ? report_bug+0xff/0x140 ? handle_bug+0x3b/0x70 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? close_ctree+0x222/0x4d0 [btrfs] generic_shutdown_super+0x70/0x160 kill_anon_super+0x11/0x40 btrfs_kill_super+0x11/0x20 [btrfs] deactivate_locked_super+0x2e/0xa0 cleanup_mnt+0xb5/0x150 task_work_run+0x57/0x80 syscall_exit_to_user_mode+0x121/0x130 do_syscall_64+0xab/0x1a0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f916847a887 ---[ end trace 0000000000000000 ]--- BTRFS error (device dm-8 state EA): qgroup reserved space leaked Cases 2 and 3 in the out_reserve path both pertain to this type of leak and must free the reserved qgroup data. Because it is already an error path, I opted not to handle the possible errors in btrfs_free_qgroup_data. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1659ed3a0ba9..ceee5422089b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1585,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1598,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2259,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; From 1e7bec1f7d6533f08bc1c4ee94930c02361db86c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 29 Jul 2024 11:05:48 -0400 Subject: [PATCH 0341/1523] btrfs: emit a warning about space cache v1 being deprecated We've been wanting to get rid of this for a while, add a message to indicate that this feature is going away and when so we can finally have a date when we're going to remove it. The output looks like this BTRFS warning (device nvme0n1): space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2 Reviewed-by: Qu Wenruo Reviewed-by: Neal Gompa Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fb..83478deada3b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -683,8 +683,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } From c3bc97d2f102ddd5a8341eeb2dbae2a3e98bb46a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:16 -0700 Subject: [PATCH 0342/1523] drm/xe: Take ref to VM in delayed snapshot Kernel BO's don't take a ref to the VM, we need the VM for the delayed snapshot, so take a ref to the VM in delayed snapshot. v2: - Check for lrc_bo before taking a VM ref (CI) - Check lrc_bo->vm before taking / dropping a VM ref (CI) - Drop VM in xe_lrc_snapshot_free v5: - Fix commit message wording (Johnathan) Fixes: 47058633d9c5 ("drm/xe: Move lrc snapshot capturing to xe_lrc.c") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..58121821f081 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } From 53369581dc0c68a5700ed51e1660f44c4b2bb524 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:17 -0700 Subject: [PATCH 0343/1523] drm/printer: Allow NULL data in devcoredump printer We want to determine the size of the devcoredump before writing it out. To that end, we will run the devcoredump printer with NULL data to get the size, alloc data based on the generated offset, then run the devcorecump again with a valid data pointer to print. This necessitates not writing data to the data pointer on the initial pass, when it is NULL. v5: - Better commit message (Jonathan) - Add kerenl doc with examples (Jani) Cc: Maarten Lankhorst Acked-by: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-3-matthew.brost@intel.com --- drivers/gpu/drm/drm_print.c | 13 +++++---- include/drm/drm_print.h | 54 ++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf24dfdeb6b2..0081190201a7 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) copy = iterator->remain; /* Copy out the bit of the string that we need */ - memcpy(iterator->data, - str + (iterator->start - iterator->offset), copy); + if (iterator->data) + memcpy(iterator->data, + str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; @@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) len = min_t(ssize_t, strlen(str), iterator->remain); - memcpy(iterator->data + pos, str, len); + if (iterator->data) + memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; @@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; - snprintf(((char *) iterator->data) + pos, - iterator->remain, "%pV", vaf); + if (iterator->data) + snprintf(((char *) iterator->data) + pos, + iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 5d9dff5149c9..d2676831d765 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -221,7 +221,8 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) /** * struct drm_print_iterator - local struct used with drm_printer_coredump - * @data: Pointer to the devcoredump output buffer + * @data: Pointer to the devcoredump output buffer, can be NULL if using + * drm_printer_coredump to determine size of devcoredump * @start: The offset within the buffer to start writing * @remain: The number of bytes to write for this iteration */ @@ -266,6 +267,57 @@ struct drm_print_iterator { * coredump_read, ...) * } * + * The above example has a time complexity of O(N^2), where N is the size of the + * devcoredump. This is acceptable for small devcoredumps but scales poorly for + * larger ones. + * + * Another use case for drm_coredump_printer is to capture the devcoredump into + * a saved buffer before the dev_coredump() callback. This involves two passes: + * one to determine the size of the devcoredump and another to print it to a + * buffer. Then, in dev_coredump(), copy from the saved buffer into the + * devcoredump read buffer. + * + * For example:: + * + * char *devcoredump_saved_buffer; + * + * ssize_t __coredump_print(char *buffer, ssize_t count, ...) + * { + * struct drm_print_iterator iter; + * struct drm_printer p; + * + * iter.data = buffer; + * iter.start = 0; + * iter.remain = count; + * + * p = drm_coredump_printer(&iter); + * + * drm_printf(p, "foo=%d\n", foo); + * ... + * return count - iter.remain; + * } + * + * void coredump_print(...) + * { + * ssize_t count; + * + * count = __coredump_print(NULL, INT_MAX, ...); + * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); + * __coredump_print(devcoredump_saved_buffer, count, ...); + * } + * + * void coredump_read(char *buffer, loff_t offset, size_t count, + * void *data, size_t datalen) + * { + * ... + * memcpy(buffer, devcoredump_saved_buffer + offset, count); + * ... + * } + * + * The above example has a time complexity of O(N*2), where N is the size of the + * devcoredump. This scales better than the previous example for larger + * devcoredumps. + * * RETURNS: * The &drm_printer object */ From 4f04d07c0a94b0b30db839e44f1b8364a1b508ac Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:18 -0700 Subject: [PATCH 0344/1523] drm/xe: Faster devcoredump The current algorithm to read out devcoredump is O(N*N) where N is the size of coredump due to usage of the drm_coredump_printer in xe_devcoredump_read. Switch to a O(N) algorithm which prints the devcoredump into a readable format in snapshot work and update xe_devcoredump_read to memcpy from the readable format directly. v2: - Fix double free on devcoredump removal (Testing) - Set read_data_size after snap work flush - Adjust remaining in iterator upon realloc (Testing) - Set read_data upon realloc (Testing) v3: - Kernel doc v4: - Two pass algorithm to determine size (Maarten) v5: - Use scope for reading variables (Johnathan) Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2408 Cc: Rodrigo Vivi Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 111 ++++++++++++++++------ drivers/gpu/drm/xe/xe_devcoredump_types.h | 8 ++ 2 files changed, 88 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d8d8ca2c19d3..bdb76e834e4c 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +static ssize_t __xe_devcoredump_read(char *buffer, size_t count, + struct xe_devcoredump *coredump) { - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - - /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); -} - -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct xe_devcoredump *coredump = data; struct xe_device *xe; struct xe_devcoredump_snapshot *ss; struct drm_printer p; @@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct timespec64 ts; int i; - if (!coredump) - return -ENODEV; - xe = coredump_to_xe(coredump); ss = &coredump->snapshot; - /* Ensure delayed work is captured before continuing */ - flush_work(&ss->work); - iter.data = buffer; - iter.offset = 0; - iter.start = offset; + iter.start = 0; iter.remain = count; p = drm_coredump_printer(&iter); @@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return count - iter.remain; } +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) +{ + int i; + + xe_guc_ct_snapshot_free(ss->ct); + ss->ct = NULL; + + xe_guc_exec_queue_snapshot_free(ss->ge); + ss->ge = NULL; + + xe_sched_job_snapshot_free(ss->job); + ss->job = NULL; + + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (ss->hwe[i]) { + xe_hw_engine_snapshot_free(ss->hwe[i]); + ss->hwe[i] = NULL; + } + + xe_vm_snapshot_free(ss->vm); + ss->vm = NULL; +} + +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + ss = &coredump->snapshot; + + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + + if (!ss->read.buffer) + return -ENODEV; + + if (offset >= ss->read.size) + return 0; + + byte_copied = count < ss->read.size - offset ? count : + ss->read.size - offset; + memcpy(buffer, ss->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); - xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); - xe_sched_job_snapshot_free(coredump->snapshot.job); - for (i = 0; i < XE_NUM_HW_ENGINES; i++) - if (coredump->snapshot.hwe[i]) - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); - xe_vm_snapshot_free(coredump->snapshot.vm); + xe_devcoredump_snapshot_free(&coredump->snapshot); + kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -260,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe) { return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } + #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 923cdf72a816..440d05d77a5a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot { struct xe_sched_job_snapshot *job; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; + + /** @read: devcoredump in human readable format */ + struct { + /** @read.size: size of devcoredump in human readable format */ + ssize_t size; + /** @read.buffer: buffer of devcoredump in human readable format */ + char *buffer; + } read; }; /** From d67c5649c1541dc93f202eeffc6f49220a4ed71d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:53 +0200 Subject: [PATCH 0345/1523] mptcp: fully established after ADD_ADDR echo on MPJ Before this patch, receiving an ADD_ADDR echo on the just connected MP_JOIN subflow -- initiator side, after the MP_JOIN 3WHS -- was resulting in an MP_RESET. That's because only ACKs with a DSS or ADD_ADDRs without the echo bit were allowed. Not allowing the ADD_ADDR echo after an MP_CAPABLE 3WHS makes sense, as we are not supposed to send an ADD_ADDR before because it requires to be in full established mode first. For the MP_JOIN 3WHS, that's different: the ADD_ADDR can be sent on a previous subflow, and the ADD_ADDR echo can be received on the recently created one. The other peer will already be in fully established, so it is allowed to send that. We can then relax the conditions here to accept the ADD_ADDR echo for MPJ subflows. Fixes: 67b12f792d5e ("mptcp: full fully established support after ADD_ADDR") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-1-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a68382a4fe9..ac2f1a54cc43 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ From 8af1f11865f259c882cce71d32f85ee9004e2660 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:54 +0200 Subject: [PATCH 0346/1523] mptcp: pm: deny endp with signal + subflow + port As mentioned in the 'Fixes' commit, the port flag is only supported by the 'signal' flag, and not by the 'subflow' one. Then if both the 'signal' and 'subflow' flags are set, the problem is the same: the feature cannot work with the 'subflow' flag. Technically, if both the 'signal' and 'subflow' flags are set, it will be possible to create the listening socket, but not to establish a subflow using this source port. So better to explicitly deny it, not to create some confusions because the expected behaviour is not possible. Fixes: 09f12c3ab7a5 ("mptcp: allow to use port and non-signal in set_flags") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-2-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 37954a0b087d..c921d07e5940 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1328,8 +1328,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + if (addr.addr.port && !address_use_port(&addr)) { + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } From c95eb32ced823a00be62202b43966b07b2f20b7f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:55 +0200 Subject: [PATCH 0347/1523] mptcp: pm: reduce indentation blocks That will simplify the following commits. No functional changes intended. Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-3-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c921d07e5940..780f4cca165c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -567,16 +567,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - if (local) { - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); - mptcp_pm_nl_addr_send_ack(msk); - } - } + if (!local) + goto subflow; + + if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + goto subflow; + + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); } +subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { From cd7c957f936f8cb80d03e5152f4013aae65bd986 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:56 +0200 Subject: [PATCH 0348/1523] mptcp: pm: don't try to create sf if alloc failed It sounds better to avoid wasting cycles and / or put extreme memory pressure on the system by trying to create new subflows if it was not possible to add a new item in the announce list. While at it, a warning is now printed if the entry was already in the list as it should not happen with the in-kernel path-manager. With this PM, mptcp_pm_alloc_anno_list() should only fail in case of memory pressure. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-4-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 780f4cca165c..2be7af377cda 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -348,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { - if (mptcp_pm_is_kernel(msk)) + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, @@ -555,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -567,11 +565,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; + local = select_signal_address(pernet, msk); if (!local) goto subflow; + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) - goto subflow; + return; __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; From 85df533a787bf07bf4367ce2a02b822ff1fba1a3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:57 +0200 Subject: [PATCH 0349/1523] mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set Up to the 'Fixes' commit, having an endpoint with both the 'signal' and 'subflow' flags, resulted in the creation of a subflow and an address announcement using the address linked to this endpoint. After this commit, only the address announcement was done, ignoring the 'subflow' flag. That's because the same bitmap is used for the two flags. It is OK to keep this single bitmap, the already selected local endpoint simply have to be re-used, but not via select_local_address() not to look at the just modified bitmap. Note that it is unusual to set the two flags together: creating a new subflow using a new local address will implicitly advertise it to the other peer. So in theory, no need to advertise it explicitly as well. Maybe there are use-cases -- the subflow might not reach the other peer that way, we can ask the other peer to try initiating the new subflow without delay -- or very likely the user is confused, and put both flags "just to be sure at least the right one is set". Still, if it is allowed, the kernel should do what has been asked: using this endpoint to announce the address and to create a new subflow from it. An alternative is to forbid the use of the two flags together, but that's probably too late, there are maybe use-cases, and it was working before. This patch will avoid people complaining subflows are not created using the endpoint they added with the 'subflow' and 'signal' flag. Note that with the current patch, the subflow might not be created in some corner cases, e.g. if the 'subflows' limit was reached when sending the ADD_ADDR, but changed later on. It is probably not worth splitting id_avail_bitmap per target ('signal', 'subflow'), which will add another large field to the msk "just" to track (again) endpoints. Anyway, currently when the limits are changed, the kernel doesn't check if new subflows can be created or removed, because we would need to keep track of the received ADD_ADDR, and more. It sounds OK to assume that the limits should be properly configured before establishing new connections. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-5-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 2be7af377cda..4cae2aa7be5c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -512,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *local; unsigned int add_addr_signal_max; unsigned int local_addr_max; struct pm_nl_pernet *pernet; @@ -579,6 +579,9 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); mptcp_pm_nl_addr_send_ack(msk); + + if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = local; } subflow: @@ -589,9 +592,14 @@ subflow: bool fullmesh; int i, nr; - local = select_local_address(pernet, msk); - if (!local) - break; + if (signal_and_subflow) { + local = signal_and_subflow; + signal_and_subflow = NULL; + } else { + local = select_local_address(pernet, msk); + if (!local) + break; + } fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); From bec1f3b119ebc613d08dfbcdbaef01a79aa7de92 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:58 +0200 Subject: [PATCH 0350/1523] selftests: mptcp: join: ability to invert ADD_ADDR check In the following commit, the client will initiate the ADD_ADDR, instead of the server. We need to way to verify the ADD_ADDR have been correctly sent. Note: the default expected counters for when the port number is given are never changed by the caller, no need to accept them as parameter then. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-6-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4df48f1f14ab..52a25ac43d10 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1415,18 +1415,28 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1438,7 +1448,7 @@ chk_add_nr() fi print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1449,7 +1459,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1459,7 +1469,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1470,7 +1480,7 @@ chk_add_nr() fi print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1481,7 +1491,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1492,7 +1502,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1503,7 +1513,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1513,6 +1523,8 @@ chk_add_nr() print_ok fi fi + + print_info "$extra_msg" } chk_add_tx_nr() From 4d2868b5d191c74262f7407972d68d1bf3245d6a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:59 +0200 Subject: [PATCH 0351/1523] selftests: mptcp: join: test both signal & subflow It should be quite uncommon to set both the subflow and the signal flags: the initiator of the connection is typically the one creating new subflows, not the other peer, then no need to announce additional local addresses, and use it to create subflows. But some people might be confused about the flags, and set both "just to be sure at least the right one is set". To verify the previous fix, and avoid future regressions, this specific case is now validated: the client announces a new address, and initiates a new subflow from the same address. While working on this, another bug has been noticed, where the client reset the new subflow because an ADD_ADDR echo got received as the 3rd ACK: this new test also explicitly checks that no RST have been sent by the client and server. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-7-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 52a25ac43d10..9ea6d698e9d3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1989,6 +1989,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 From ab9fd06cb8f0db0854291833fc40c789e43a361f Mon Sep 17 00:00:00 2001 From: Vamshi Gajjela Date: Wed, 24 Jul 2024 19:21:26 +0530 Subject: [PATCH 0352/1523] scsi: ufs: core: Fix hba->last_dme_cmd_tstamp timestamp updating logic The ufshcd_add_delay_before_dme_cmd() always introduces a delay of MIN_DELAY_BEFORE_DME_CMDS_US between DME commands even when it's not required. The delay is added when the UFS host controller supplies the quirk UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS. Fix the logic to update hba->last_dme_cmd_tstamp to ensure subsequent DME commands have the correct delay in the range of 0 to MIN_DELAY_BEFORE_DME_CMDS_US. Update the timestamp at the end of the function to ensure it captures the latest time after any necessary delay has been applied. Signed-off-by: Vamshi Gajjela Link: https://lore.kernel.org/r/20240724135126.1786126-1-vamshigajjela@google.com Fixes: cad2e03d8607 ("ufs: add support to allow non standard behaviours (quirks)") Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5e3c67e96956..0b3d0c8e0dda 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4100,11 +4100,16 @@ static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba) min_sleep_time_us = MIN_DELAY_BEFORE_DME_CMDS_US - delta; else - return; /* no more delay required */ + min_sleep_time_us = 0; /* no more delay required */ } - /* allow sleep for extra 50us if needed */ - usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + if (min_sleep_time_us > 0) { + /* allow sleep for extra 50us if needed */ + usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + } + + /* update the last_dme_cmd_tstamp */ + hba->last_dme_cmd_tstamp = ktime_get(); } /** From ffed586b8c4f1fdb772ee350e229863f145defb5 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 1 Aug 2024 14:42:34 +0900 Subject: [PATCH 0353/1523] scsi: sd: Move sd_read_cpr() out of the q->limits_lock region Commit 804e498e0496 ("sd: convert to the atomic queue limits API") introduced pairs of function calls to queue_limits_start_update() and queue_limits_commit_update(). These two functions lock and unlock q->limits_lock. In sd_revalidate_disk(), sd_read_cpr() is called after queue_limits_start_update() call and before queue_limits_commit_update() call. sd_read_cpr() locks q->sysfs_dir_lock and &q->sysfs_lock. Then new lock dependencies were created between q->limits_lock, q->sysfs_dir_lock and q->sysfs_lock, as follows: sd_revalidate_disk queue_limits_start_update mutex_lock(&q->limits_lock) sd_read_cpr disk_set_independent_access_ranges mutex_lock(&q->sysfs_dir_lock) mutex_lock(&q->sysfs_lock) mutex_unlock(&q->sysfs_lock) mutex_unlock(&q->sysfs_dir_lock) queue_limits_commit_update mutex_unlock(&q->limits_lock) However, the three locks already had reversed dependencies in other places. Then the new dependencies triggered the lockdep WARN "possible circular locking dependency detected" [1]. This WARN was observed by running the blktests test case srp/002. To avoid the WARN, move the sd_read_cpr() call in sd_revalidate_disk() after the queue_limits_commit_update() call. In other words, move the sd_read_cpr() call out of the q->limits_lock region. [1] https://lore.kernel.org/linux-scsi/vlmv53ni3ltwxplig5qnw4xsl2h6ccxijfbqzekx76vxoim5a5@dekv7q3es3tx/ Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240801054234.540532-1-shinichiro.kawasaki@wdc.com Tested-by: Luca Coelho Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8bb3a3611851..718eb91ba9a5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3753,7 +3753,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); - sd_read_cpr(sdkp); } sd_print_capacity(sdkp, old_capacity); @@ -3808,6 +3807,14 @@ static int sd_revalidate_disk(struct gendisk *disk) if (err) return err; + /* + * Query concurrent positioning ranges after + * queue_limits_commit_update() unlocked q->limits_lock to avoid + * deadlock with q->sysfs_dir_lock and q->sysfs_lock. + */ + if (sdkp->media_present && scsi_device_supports_vpd(sdp)) + sd_read_cpr(sdkp); + /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk From ab3de2c7ec91db6a3cf5fc07765852c81ca7d6ef Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Thu, 20 Jun 2024 13:43:03 +0300 Subject: [PATCH 0354/1523] thunderbolt: Fix memory leaks in {port|retimer}_sb_regs_write() Add missing free_page() call for the memory allocated by validate_and_copy_from_user(). Fixes: 6d241fa00159 ("thunderbolt: Add sideband register access to debugfs") Signed-off-by: Aapo Vienamo Reviewed-by: Przemek Kitszel Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92..9ed4bb2e8d05 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } From 113fd6372a5bb3689aba8ef5b8a265ed1529a78f Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Fri, 2 Aug 2024 12:47:36 +0800 Subject: [PATCH 0355/1523] drm/client: fix null pointer dereference in drm_client_modeset_probe In drm_client_modeset_probe(), the return value of drm_mode_duplicate() is assigned to modeset->mode, which will lead to a possible NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: cf13909aee05 ("drm/fb-helper: Move out modeset config code") Signed-off-by: Ma Ke Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240802044736.1570345-1-make24@iscas.ac.cn --- drivers/gpu/drm/drm_client_modeset.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a09..cee5eafbfb81 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; From 62b45bab010d1b0cea6166f818f1cd0666a6d8d8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 15 Jul 2024 18:35:51 +1000 Subject: [PATCH 0356/1523] drm/test: fix the gem shmem test to map the sg table. The test here creates an sg table, but never maps it, when we get to drm_gem_shmem_free, the helper tries to unmap and this causes warnings on some platforms and debug kernels. This also sets a 64-bit dma mask, as I see an swiotlb warning if I stick with the default 32-bit one. Fixes: 93032ae634d4 ("drm/test: add a test suite for GEM objects backed by shmem") Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Reviewed-by: Michael J. Ruhl Acked-by: Daniel Vetter Reviewed-by: Marco Pagani Link: https://patchwork.freedesktop.org/patch/msgid/20240715083551.777807-1-airlied@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_gem_shmem_test.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index c3758faa1b83..d8d0e4d1682f 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; From d1aa95e86f178dc597e80228cd9bd81fc3510f34 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Thu, 25 Jul 2024 10:31:25 +1200 Subject: [PATCH 0357/1523] hid-asus: add ROG Ally X prod ID to quirk list The new ASUS ROG Ally X functions almost exactly the same as the previous model, so we can use the same quirks. Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 +++ drivers/hid/hid-ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c2..a282388b7aa5 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1248,6 +1248,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b..6e3223389080 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 From 97155021ae17b86985121b33cf8098bcde00d497 Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Tue, 23 Jul 2024 10:44:35 +0200 Subject: [PATCH 0358/1523] HID: amd_sfh: free driver_data after destroying hid device HID driver callbacks aren't called anymore once hid_destroy_device() has been called. Hence, hid driver_data should be freed only after the hid_destroy_device() function returned as driver_data is used in several callbacks. I observed a crash with kernel 6.10.0 on my T14s Gen 3, after enabling KASAN to debug memory allocation, I got this output: [ 13.050438] ================================================================== [ 13.054060] BUG: KASAN: slab-use-after-free in amd_sfh_get_report+0x3ec/0x530 [amd_sfh] [ 13.054809] psmouse serio1: trackpoint: Synaptics TrackPoint firmware: 0x02, buttons: 3/3 [ 13.056432] Read of size 8 at addr ffff88813152f408 by task (udev-worker)/479 [ 13.060970] CPU: 5 PID: 479 Comm: (udev-worker) Not tainted 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.063978] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.067860] Call Trace: [ 13.069383] input: TPPS/2 Synaptics TrackPoint as /devices/platform/i8042/serio1/input/input8 [ 13.071486] [ 13.071492] dump_stack_lvl+0x5d/0x80 [ 13.074870] snd_hda_intel 0000:33:00.6: enabling device (0000 -> 0002) [ 13.078296] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.082199] print_report+0x174/0x505 [ 13.085776] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.089367] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.093255] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.097464] kasan_report+0xc8/0x150 [ 13.101461] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.105802] amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.110303] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.114879] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.119450] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.124097] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.127404] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.131925] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.136455] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.140197] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.143602] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.147234] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.150446] ? __devm_add_action+0x167/0x1d0 [ 13.155061] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.158581] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.161814] platform_probe+0xa2/0x150 [ 13.165029] really_probe+0x1e3/0x8a0 [ 13.168243] __driver_probe_device+0x18c/0x370 [ 13.171500] driver_probe_device+0x4a/0x120 [ 13.175000] __driver_attach+0x190/0x4a0 [ 13.178521] ? __pfx___driver_attach+0x10/0x10 [ 13.181771] bus_for_each_dev+0x106/0x180 [ 13.185033] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.188229] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.191446] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.194382] bus_add_driver+0x29e/0x4d0 [ 13.197328] driver_register+0x1a5/0x360 [ 13.200283] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.203362] do_one_initcall+0xa7/0x380 [ 13.206432] ? __pfx_do_one_initcall+0x10/0x10 [ 13.210175] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.213211] ? kasan_unpoison+0x44/0x70 [ 13.216688] do_init_module+0x238/0x750 [ 13.219696] load_module+0x5011/0x6af0 [ 13.223096] ? kasan_save_stack+0x30/0x50 [ 13.226743] ? kasan_save_track+0x14/0x30 [ 13.230080] ? kasan_save_free_info+0x3b/0x60 [ 13.233323] ? poison_slab_object+0x109/0x180 [ 13.236778] ? __pfx_load_module+0x10/0x10 [ 13.239703] ? poison_slab_object+0x109/0x180 [ 13.243070] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.245924] ? init_module_from_file+0x13d/0x150 [ 13.248745] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.251503] ? init_module_from_file+0xdf/0x150 [ 13.254198] init_module_from_file+0xdf/0x150 [ 13.256826] ? __pfx_init_module_from_file+0x10/0x10 [ 13.259428] ? kasan_save_track+0x14/0x30 [ 13.261959] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.264471] ? kasan_save_free_info+0x3b/0x60 [ 13.267026] ? poison_slab_object+0x109/0x180 [ 13.269494] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.271949] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.274324] ? _raw_spin_lock+0x85/0xe0 [ 13.276671] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.278963] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.281193] idempotent_init_module+0x23b/0x650 [ 13.283420] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.285619] ? __pfx___seccomp_filter+0x10/0x10 [ 13.287714] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.289828] ? __fget_light+0x57/0x420 [ 13.291870] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.293880] ? security_capable+0x74/0xb0 [ 13.295820] __x64_sys_finit_module+0xbe/0x130 [ 13.297874] do_syscall_64+0x82/0x190 [ 13.299898] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.301905] ? irqtime_account_irq+0x3d/0x1f0 [ 13.303877] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.305753] ? __irq_exit_rcu+0x4e/0x130 [ 13.307577] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.309489] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.311371] RIP: 0033:0x7a21f96ade9d [ 13.313234] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.317051] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.319024] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.321100] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.323314] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.325505] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.327637] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.329737] [ 13.333945] Allocated by task 139: [ 13.336111] kasan_save_stack+0x30/0x50 [ 13.336121] kasan_save_track+0x14/0x30 [ 13.336125] __kasan_kmalloc+0xaa/0xb0 [ 13.336129] amdtp_hid_probe+0xb1/0x440 [amd_sfh] [ 13.336138] amd_sfh_hid_client_init+0xb8a/0x10f0 [amd_sfh] [ 13.336144] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.336150] process_one_work+0x673/0xeb0 [ 13.336155] worker_thread+0x795/0x1250 [ 13.336160] kthread+0x290/0x350 [ 13.336164] ret_from_fork+0x34/0x70 [ 13.336169] ret_from_fork_asm+0x1a/0x30 [ 13.338175] Freed by task 139: [ 13.340064] kasan_save_stack+0x30/0x50 [ 13.340072] kasan_save_track+0x14/0x30 [ 13.340076] kasan_save_free_info+0x3b/0x60 [ 13.340081] poison_slab_object+0x109/0x180 [ 13.340085] __kasan_slab_free+0x32/0x50 [ 13.340089] kfree+0xe5/0x310 [ 13.340094] amdtp_hid_remove+0xb2/0x160 [amd_sfh] [ 13.340102] amd_sfh_hid_client_deinit+0x324/0x640 [amd_sfh] [ 13.340107] amd_sfh_hid_client_init+0x94a/0x10f0 [amd_sfh] [ 13.340113] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.340118] process_one_work+0x673/0xeb0 [ 13.340123] worker_thread+0x795/0x1250 [ 13.340127] kthread+0x290/0x350 [ 13.340132] ret_from_fork+0x34/0x70 [ 13.340136] ret_from_fork_asm+0x1a/0x30 [ 13.342482] The buggy address belongs to the object at ffff88813152f400 which belongs to the cache kmalloc-64 of size 64 [ 13.347357] The buggy address is located 8 bytes inside of freed 64-byte region [ffff88813152f400, ffff88813152f440) [ 13.347367] The buggy address belongs to the physical page: [ 13.355409] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x13152f [ 13.355416] anon flags: 0x2ffff8000000000(node=0|zone=2|lastcpupid=0x1ffff) [ 13.355423] page_type: 0xffffefff(slab) [ 13.355429] raw: 02ffff8000000000 ffff8881000428c0 ffffea0004c43a00 0000000000000005 [ 13.355435] raw: 0000000000000000 0000000000200020 00000001ffffefff 0000000000000000 [ 13.355439] page dumped because: kasan: bad access detected [ 13.357295] Memory state around the buggy address: [ 13.357299] ffff88813152f300: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357303] ffff88813152f380: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357306] >ffff88813152f400: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357309] ^ [ 13.357311] ffff88813152f480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 13.357315] ffff88813152f500: 00 00 00 00 00 00 00 06 fc fc fc fc fc fc fc fc [ 13.357318] ================================================================== [ 13.357405] Disabling lock debugging due to kernel taint [ 13.383534] Oops: general protection fault, probably for non-canonical address 0xe0a1bc4140000013: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 13.383544] KASAN: maybe wild-memory-access in range [0x050e020a00000098-0x050e020a0000009f] [ 13.383551] CPU: 3 PID: 479 Comm: (udev-worker) Tainted: G B 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.383561] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.383565] RIP: 0010:amd_sfh_get_report+0x81/0x530 [amd_sfh] [ 13.383580] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 78 03 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 08 49 8d 7c 24 10 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 1a 03 00 00 45 8b 74 24 10 45 [ 13.383585] RSP: 0018:ffff8881261f7388 EFLAGS: 00010212 [ 13.383592] RAX: dffffc0000000000 RBX: ffff88813152f400 RCX: 0000000000000002 [ 13.383597] RDX: 00a1c04140000013 RSI: 0000000000000008 RDI: 050e020a0000009b [ 13.383600] RBP: ffff88814d010000 R08: 0000000000000002 R09: fffffbfff3ddb8c0 [ 13.383604] R10: ffffffff9eedc607 R11: ffff88810ce98000 R12: 050e020a0000008b [ 13.383607] R13: ffff88814d010000 R14: dffffc0000000000 R15: 0000000000000004 [ 13.383611] FS: 00007a21f94d0880(0000) GS:ffff8887e7d80000(0000) knlGS:0000000000000000 [ 13.383615] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 13.383618] CR2: 00007e0014c438f0 CR3: 000000012614c000 CR4: 0000000000f50ef0 [ 13.383622] PKRU: 55555554 [ 13.383625] Call Trace: [ 13.383629] [ 13.383632] ? __die_body.cold+0x19/0x27 [ 13.383644] ? die_addr+0x46/0x70 [ 13.383652] ? exc_general_protection+0x150/0x240 [ 13.383664] ? asm_exc_general_protection+0x26/0x30 [ 13.383674] ? amd_sfh_get_report+0x81/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383686] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383697] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383706] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383713] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.383727] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383739] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383745] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383753] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.383762] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.383768] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.383790] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383795] ? __devm_add_action+0x167/0x1d0 [ 13.383806] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383818] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383826] platform_probe+0xa2/0x150 [ 13.383832] really_probe+0x1e3/0x8a0 [ 13.383838] __driver_probe_device+0x18c/0x370 [ 13.383844] driver_probe_device+0x4a/0x120 [ 13.383851] __driver_attach+0x190/0x4a0 [ 13.383857] ? __pfx___driver_attach+0x10/0x10 [ 13.383863] bus_for_each_dev+0x106/0x180 [ 13.383868] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.383874] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.383880] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383887] bus_add_driver+0x29e/0x4d0 [ 13.383895] driver_register+0x1a5/0x360 [ 13.383902] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383910] do_one_initcall+0xa7/0x380 [ 13.383919] ? __pfx_do_one_initcall+0x10/0x10 [ 13.383927] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383933] ? kasan_unpoison+0x44/0x70 [ 13.383943] do_init_module+0x238/0x750 [ 13.383955] load_module+0x5011/0x6af0 [ 13.383962] ? kasan_save_stack+0x30/0x50 [ 13.383968] ? kasan_save_track+0x14/0x30 [ 13.383973] ? kasan_save_free_info+0x3b/0x60 [ 13.383980] ? poison_slab_object+0x109/0x180 [ 13.383993] ? __pfx_load_module+0x10/0x10 [ 13.384007] ? poison_slab_object+0x109/0x180 [ 13.384012] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384018] ? init_module_from_file+0x13d/0x150 [ 13.384025] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384032] ? init_module_from_file+0xdf/0x150 [ 13.384037] init_module_from_file+0xdf/0x150 [ 13.384044] ? __pfx_init_module_from_file+0x10/0x10 [ 13.384050] ? kasan_save_track+0x14/0x30 [ 13.384055] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384060] ? kasan_save_free_info+0x3b/0x60 [ 13.384066] ? poison_slab_object+0x109/0x180 [ 13.384071] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384080] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384085] ? _raw_spin_lock+0x85/0xe0 [ 13.384091] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.384096] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.384106] idempotent_init_module+0x23b/0x650 [ 13.384114] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.384120] ? __pfx___seccomp_filter+0x10/0x10 [ 13.384129] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384135] ? __fget_light+0x57/0x420 [ 13.384142] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384147] ? security_capable+0x74/0xb0 [ 13.384157] __x64_sys_finit_module+0xbe/0x130 [ 13.384164] do_syscall_64+0x82/0x190 [ 13.384174] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384179] ? irqtime_account_irq+0x3d/0x1f0 [ 13.384188] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384193] ? __irq_exit_rcu+0x4e/0x130 [ 13.384201] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384206] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.384212] RIP: 0033:0x7a21f96ade9d [ 13.384263] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.384267] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.384273] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.384277] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.384280] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.384284] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.384288] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.384297] [ 13.384299] Modules linked in: soundwire_amd(+) hid_sensor_gyro_3d(+) hid_sensor_magn_3d hid_sensor_accel_3d soundwire_generic_allocation amdxcp hid_sensor_trigger drm_exec industrialio_triggered_buffer soundwire_bus gpu_sched kvm_amd kfifo_buf qmi_helpers joydev drm_buddy hid_sensor_iio_common mousedev snd_soc_core industrialio i2c_algo_bit mac80211 snd_compress drm_suballoc_helper kvm snd_hda_intel drm_ttm_helper ac97_bus snd_pcm_dmaengine snd_intel_dspcfg ttm thinkpad_acpi(+) snd_intel_sdw_acpi hid_sensor_hub snd_rpl_pci_acp6x drm_display_helper snd_hda_codec hid_multitouch libarc4 snd_acp_pci platform_profile think_lmi(+) hid_generic firmware_attributes_class wmi_bmof cec snd_acp_legacy_common sparse_keymap rapl snd_hda_core psmouse cfg80211 pcspkr snd_pci_acp6x snd_hwdep video snd_pcm snd_pci_acp5x snd_timer snd_rn_pci_acp3x ucsi_acpi snd_acp_config snd sp5100_tco rfkill snd_soc_acpi typec_ucsi thunderbolt amd_sfh k10temp mhi soundcore i2c_piix4 snd_pci_acp3x typec i2c_hid_acpi roles i2c_hid wmi acpi_tad amd_pmc [ 13.384454] mac_hid i2c_dev crypto_user loop nfnetlink zram ip_tables x_tables dm_crypt cbc encrypted_keys trusted asn1_encoder tee dm_mod crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic gf128mul ghash_clmulni_intel serio_raw sha512_ssse3 atkbd sha256_ssse3 libps2 sha1_ssse3 vivaldi_fmap nvme aesni_intel crypto_simd nvme_core cryptd ccp xhci_pci i8042 nvme_auth xhci_pci_renesas serio vfat fat btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq [ 13.384552] ---[ end trace 0000000000000000 ]--- KASAN reports a use-after-free of hid->driver_data in function amd_sfh_get_report(). The backtrace indicates that the function is called by amdtp_hid_request() which is one of the callbacks of hid device. The current make sure that driver_data is freed only once hid_destroy_device() returned. Note that I observed the crash both on v6.9.9 and v6.10.0. The code seems to be as it was from the early days of the driver. Signed-off-by: Olivier Sobrie Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_hid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b52337068..81f3024b7b1b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } From c8000deb68365b461b324d68c7ea89d730f0bb85 Mon Sep 17 00:00:00 2001 From: Dmitry Savin Date: Tue, 16 Jul 2024 23:27:57 +0100 Subject: [PATCH 0359/1523] HID: multitouch: Add support for GT7868Q GT7868Q has incorrect data in the report and needs a fixup. The change enables haptic touchpad on Lenovo ThinkBook 13x Gen 4 and has been tested on the device. Signed-off-by: Dmitry Savin Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-multitouch.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 6e3223389080..781c5aa29859 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -521,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f24..99812c0f830b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), From 1b8f9c1fb464968a5b18d3acc1da8c00bad24fad Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 30 Jul 2024 08:51:55 -0700 Subject: [PATCH 0360/1523] HID: wacom: Defer calculation of resolution until resolution_code is known The Wacom driver maps the HID_DG_TWIST usage to ABS_Z (rather than ABS_RZ) for historic reasons. When the code to support twist was introduced in commit 50066a042da5 ("HID: wacom: generic: Add support for height, tilt, and twist usages"), we were careful to write it in such a way that it had HID calculate the resolution of the twist axis assuming ABS_RZ instead (so that we would get correct angular behavior). This was broken with the introduction of commit 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets"), which moved the resolution calculation to occur *before* the adjustment from ABS_Z to ABS_RZ occurred. This commit moves the calculation of resolution after the point that we are finished setting things up for its proper use. Signed-off-by: Jason Gerecke Fixes: 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets") Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1f4564982b95..2541fa2e0fa3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1878,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; From 64a66f4a3c89b4602ee1e6cd23b28729fc4562b3 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Kopper Date: Thu, 1 Aug 2024 13:41:50 -0300 Subject: [PATCH 0361/1523] cpufreq: intel_pstate: Update Balance performance EPP for Emerald Rapids On Intel Emerald Rapids machines, we ship the Energy Performance Preference (EPP) default for balance_performance as 128. However, during an internal investigation together with Intel, we have determined that 32 is a more suitable value. This leads to significant improvements in both performance and energy: POV-Ray: 32% faster | 12% less energy OpenSSL: 12% faster | energy within 1% Build Linux Kernel: 29% faster | 18% less energy Therefore, we should move the default EPP for balance_performance to 32. This is in line with what has already been done for Sapphire Rapids. Signed-off-by: Pedro Henrique Kopper Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/Zqu6zjVMoiXwROBI@capivara Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 392a8000b238..c0278d023cfc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3405,6 +3405,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, 179, 64, 16)), X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, From becfa08bfefa2cbb22c84d9e583e81387f2f3bf2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Aug 2024 11:57:31 +0100 Subject: [PATCH 0362/1523] ASoC: cs42l43: Remove redundant semi-colon at end of function Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240802105734.2309788-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 92674314227c..80825777048a 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -608,7 +608,7 @@ static int cs42l43_sdw_hw_params(struct snd_pcm_substream *substream, return ret; return cs42l43_set_sample_rate(substream, params, dai); -}; +} static const struct snd_soc_dai_ops cs42l43_sdw_ops = { .startup = cs42l43_startup, From c8a132e2e032b00828d51141ab34f9aeb24f44ae Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Aug 2024 11:57:32 +0100 Subject: [PATCH 0363/1523] ASoC: soc-component: Add new snd_soc_component_get_kcontrol() helpers Add new helper functions snd_soc_component_get_kcontrol() and snd_soc_component_get_kcontrol_locked() that returns a kcontrol by name, but will factor in the components name_prefix, to handle situations where multiple components are present with the same controls. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240802105734.2309788-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 5 +++++ sound/soc/soc-component.c | 42 ++++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index ceca69b46a82..bf2e381cd124 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked( const char *pin); /* component controls */ +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl); +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 4d7c2e3c929a..42f481321919 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -236,19 +236,45 @@ int snd_soc_component_force_enable_pin_unlocked( } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); +static void soc_get_kcontrol_name(struct snd_soc_component *component, + char *buf, int size, const char * const ctl) +{ + /* When updating, change also snd_soc_dapm_widget_name_cmp() */ + if (component->name_prefix) + snprintf(buf, size, "%s %s", component->name_prefix, ctl); + else + snprintf(buf, size, "%s", ctl); +} + +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol); + +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol_locked(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol_locked); + int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl) { - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; struct snd_kcontrol *kctl; - /* When updating, change also snd_soc_dapm_widget_name_cmp() */ - if (component->name_prefix) - snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); - else - snprintf(name, ARRAY_SIZE(name), "%s", ctl); - - kctl = snd_soc_card_get_kcontrol(component->card, name); + kctl = snd_soc_component_get_kcontrol(component, ctl); if (!kctl) return soc_component_ret(component, -EINVAL); From 4791c422981350d0de4ad02a14a08b99c766d06f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Aug 2024 11:57:33 +0100 Subject: [PATCH 0364/1523] ASoC: cs35l45: Use new snd_soc_component_get_kcontrol_locked() helper No longer any need to hard code the addition of the name prefix, use the new helper function. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240802105734.2309788-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l45.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 2392c6effed8..1e9d73bee3b4 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -176,17 +176,10 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, struct snd_kcontrol *kcontrol; struct snd_kcontrol_volatile *vd; unsigned int index_offset; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - if (component->name_prefix) - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s", - component->name_prefix, ctl_name); - else - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - - kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); + kcontrol = snd_soc_component_get_kcontrol_locked(component, ctl_name); if (!kcontrol) { - dev_err(component->dev, "Can't find kcontrol %s\n", name); + dev_err(component->dev, "Can't find kcontrol %s\n", ctl_name); return -EINVAL; } From 93afd028fb5f06a46a32375fd1f0473451eb1c5a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Aug 2024 11:57:34 +0100 Subject: [PATCH 0365/1523] ASoC: cs42l43: Cache shutter IRQ control pointers The microphone/speaker privacy shutter ALSA control handlers need to call pm_runtime_resume, since the hardware needs to be powered up to check the hardware state of the shutter. The IRQ handler for the shutters also needs to notify the ALSA control to inform user-space the shutters updated. However this leads to a mutex inversion, between the sdw_dev_lock and the controls_rwsem. To avoid this mutex inversion cache the kctl pointers before the IRQ handler, which avoids the need to lookup the control and take the controls_rwsem. Suggested-by: Jaroslav Kysela Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240802105734.2309788-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 73 +++++++++++++++++++++++++++++--------- sound/soc/codecs/cs42l43.h | 2 ++ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 80825777048a..5183b4586424 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -252,24 +253,20 @@ CS42L43_IRQ_COMPLETE(load_detect) static irqreturn_t cs42l43_mic_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - static const char * const controls[] = { - "Decimator 1 Switch", - "Decimator 2 Switch", - "Decimator 3 Switch", - "Decimator 4 Switch", - }; - int i, ret; + struct snd_soc_component *component = priv->component; + int i; dev_dbg(priv->dev, "Microphone shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - for (i = 0; i < ARRAY_SIZE(controls); i++) { - ret = snd_soc_component_notify_control(priv->component, - controls[i]); - if (ret) + for (i = 1; i < ARRAY_SIZE(priv->kctl); i++) { + if (!priv->kctl[i]) return IRQ_NONE; + + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[i]->id); } return IRQ_HANDLED; @@ -278,18 +275,19 @@ static irqreturn_t cs42l43_mic_shutter(int irq, void *data) static irqreturn_t cs42l43_spk_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - int ret; + struct snd_soc_component *component = priv->component; dev_dbg(priv->dev, "Speaker shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - ret = snd_soc_component_notify_control(priv->component, - "Speaker Digital Switch"); - if (ret) + if (!priv->kctl[0]) return IRQ_NONE; + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[0]->id); + return IRQ_HANDLED; } @@ -590,7 +588,46 @@ static int cs42l43_asp_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mas return 0; } +static int cs42l43_dai_probe(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + static const char * const controls[] = { + "Speaker Digital Switch", + "Decimator 1 Switch", + "Decimator 2 Switch", + "Decimator 3 Switch", + "Decimator 4 Switch", + }; + int i; + + static_assert(ARRAY_SIZE(controls) == ARRAY_SIZE(priv->kctl)); + + for (i = 0; i < ARRAY_SIZE(controls); i++) { + if (priv->kctl[i]) + continue; + + priv->kctl[i] = snd_soc_component_get_kcontrol(component, controls[i]); + } + + return 0; +} + +static int cs42l43_dai_remove(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->kctl); i++) + priv->kctl[i] = NULL; + + return 0; +} + static const struct snd_soc_dai_ops cs42l43_asp_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .hw_params = cs42l43_asp_hw_params, .set_fmt = cs42l43_asp_set_fmt, @@ -611,6 +648,8 @@ static int cs42l43_sdw_hw_params(struct snd_pcm_substream *substream, } static const struct snd_soc_dai_ops cs42l43_sdw_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .set_stream = cs42l43_sdw_set_stream, .hw_params = cs42l43_sdw_hw_params, diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9924c13e1eb5..9c144e129535 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -100,6 +100,8 @@ struct cs42l43_codec { struct delayed_work hp_ilimit_clear_work; bool hp_ilimited; int hp_ilimit_count; + + struct snd_kcontrol *kctl[5]; }; #if IS_REACHABLE(CONFIG_SND_SOC_CS42L43_SDW) From fca5b78511e98bdff2cdd55c172b23200a7b3404 Mon Sep 17 00:00:00 2001 From: Barak Biber Date: Thu, 1 Aug 2024 09:26:04 -0300 Subject: [PATCH 0366/1523] iommu: Restore lost return in iommu_report_device_fault() When iommu_report_device_fault gets called with a partial fault it is supposed to collect the fault into the group and then return. Instead the return was accidently deleted which results in trying to process the fault and an eventual crash. Deleting the return was a typo, put it back. Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void") Signed-off-by: Barak Biber Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-e7153d9c8cee+1c6-iommu_fault_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e..81e9cc6e3164 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* From 90ec3a8a7fd0d43026fcca979713e077d4883b56 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 2 Aug 2024 16:22:13 +0100 Subject: [PATCH 0367/1523] spi: Add empty versions of ACPI functions Provide empty versions of acpi_spi_count_resources(), acpi_spi_device_alloc() and acpi_spi_find_controller_by_adev() if the real functions are not being built. This commit fixes two problems with the original definitions: 1) There wasn't an empty version of these functions 2) The #if only depended on CONFIG_ACPI. But the functions are implemented in the core spi.c so CONFIG_SPI_MASTER must also be enabled for the real functions to exist. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240802152215.20831-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a03..d47d5f14ff99 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* From 32b9a52f88a5713bf8a02dae66f2ad69705de69f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 23 Jul 2024 16:20:52 +0200 Subject: [PATCH 0368/1523] KVM: arm64: free kvm->arch.nested_mmus with kvfree() kvm->arch.nested_mmus is allocated with kvrealloc(), hence free it with kvfree() instead of kfree(). Fixes: 4f128f8e1aaa ("KVM: arm64: nv: Support multiple nested Stage-2 mmu structures") Signed-off-by: Danilo Krummrich Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240723142204.758796-1-dakr@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index de789e0f1ae9..bab27f9d8cc6 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -786,7 +786,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) if (!WARN_ON(atomic_read(&mmu->refcnt))) kvm_free_stage2_pgd(mmu); } - kfree(kvm->arch.nested_mmus); + kvfree(kvm->arch.nested_mmus); kvm->arch.nested_mmus = NULL; kvm->arch.nested_mmus_size = 0; kvm_uninit_stage2_mmu(kvm); From 963a08e586bd45fd55f4c1752e98029ce83fc091 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jul 2024 12:12:02 +0200 Subject: [PATCH 0369/1523] KVM: arm64: fix override-init warnings in W=1 builds Add -Wno-override-init to the build flags for sys_regs.c, handle_exit.c, and switch.c to fix warnings like the following: arch/arm64/kvm/hyp/vhe/switch.c:271:43: warning: initialized field overwritten [-Woverride-init] 271 | [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, | Signed-off-by: Sebastian Ott Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240723101204.7356-2-sebott@redhat.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/Makefile | 3 +++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 ++ arch/arm64/kvm/hyp/vhe/Makefile | 2 ++ 3 files changed, 7 insertions(+) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8..86a629aaf0a1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,9 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ +CFLAGS_sys_regs.o += -Wno-override-init +CFLAGS_handle_exit.o += -Wno-override-init + kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 782b34b004be..b43426a493df 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) +CFLAGS_switch.nvhe.o += -Wno-override-init + hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 3b9e5464b5b3..afc4aed9231a 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,6 +6,8 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ +CFLAGS_switch.o += -Wno-override-init + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o From 0aa34b37a78d063da58838b84b20a68a94d919fd Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jul 2024 12:12:03 +0200 Subject: [PATCH 0370/1523] KVM: arm64: fix kdoc warnings in W=1 builds Fix kdoc warnings by adding missing function parameter descriptions or by conversion to a normal comment. Signed-off-by: Sebastian Ott Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240723101204.7356-3-sebott@redhat.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/vgic/vgic-irqfd.c | 7 ++++--- arch/arm64/kvm/vgic/vgic-its.c | 18 +++++++++++------- arch/arm64/kvm/vgic/vgic-v3.c | 2 +- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a7ca776b51ec..23e1fa56c02d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,7 @@ static int kvm_arm_default_max_vcpus(void) /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct + * @type: kvm device type */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 8c711deb25aa..c314c016659a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -9,7 +9,7 @@ #include #include "vgic.h" -/** +/* * vgic_irqfd_set_irq: inject the IRQ corresponding to the * irqchip routing entry * @@ -75,7 +75,8 @@ static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, msi->flags = e->msi.flags; msi->devid = e->msi.devid; } -/** + +/* * kvm_set_msi: inject the MSI corresponding to the * MSI routing entry * @@ -98,7 +99,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return vgic_its_inject_msi(kvm, &msi); } -/** +/* * kvm_arch_set_irq_inatomic: fast-path for irqfd injection */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 40bb43f20bf3..ba945ba78cc7 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2040,6 +2040,7 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, * @start_id: the ID of the first entry in the table * (non zero for 2d level tables) * @fn: function to apply on each entry + * @opaque: pointer to opaque data * * Return: < 0 on error, 0 if last element was identified, 1 otherwise * (the last element may not be found on second level tables) @@ -2079,7 +2080,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return 1; } -/** +/* * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, @@ -2099,6 +2100,8 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, /** * vgic_its_restore_ite - restore an interrupt translation entry + * + * @its: its handle * @event_id: id used for indexing * @ptr: pointer to the ITE entry * @opaque: pointer to the its_device @@ -2231,6 +2234,7 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA + * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, gpa_t ptr, int dte_esz) @@ -2313,7 +2317,7 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, return 1; } -/** +/* * vgic_its_save_device_tables - Save the device table and all ITT * into guest RAM * @@ -2386,7 +2390,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, return ret; } -/** +/* * vgic_its_restore_device_tables - Restore the device table and all ITT * from guest RAM to internal data structs */ @@ -2478,7 +2482,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) return 1; } -/** +/* * vgic_its_save_collection_table - Save the collection table into * guest RAM */ @@ -2518,7 +2522,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_restore_collection_table - reads the collection table * in guest memory and restores the ITS internal state. Requires the * BASER registers to be restored before. @@ -2556,7 +2560,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM * according to v0 ABI */ @@ -2571,7 +2575,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its) return vgic_its_save_collection_table(its); } -/** +/* * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM * to internal data structs according to V0 ABI * diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ed6e412cd74b..3eecdd2f4b8f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -370,7 +370,7 @@ static void map_all_vpes(struct kvm *kvm) dist->its_vm.vpes[i]->irq)); } -/** +/* * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ From 19d837bc881b2f9f72f9eb506b46c2e2d983896d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jul 2024 12:12:04 +0200 Subject: [PATCH 0371/1523] KVM: arm64: vgic: fix unexpected unlock sparse warnings Get rid of unexpected unlock sparse warnings in vgic code by adding an annotation to vgic_queue_irq_unlock(). arch/arm64/kvm/vgic/vgic.c:334:17: warning: context imbalance in 'vgic_queue_irq_unlock' - unexpected unlock arch/arm64/kvm/vgic/vgic.c:419:5: warning: context imbalance in 'kvm_vgic_inject_irq' - different lock contexts for basic block Signed-off-by: Sebastian Ott Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240723101204.7356-4-sebott@redhat.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic.c | 2 +- arch/arm64/kvm/vgic/vgic.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f07b3ddff7d4..974849ea7101 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -313,7 +313,7 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * with all locks dropped. */ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 03d356a12377..ba8f790431bd 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -186,7 +186,7 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags); + unsigned long flags) __releases(&irq->irq_lock); void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); From e0391e92f9ab4fb3dbdeb139c967dcfa7ac4b115 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Aug 2024 09:38:51 +0100 Subject: [PATCH 0372/1523] btrfs: fix double inode unlock for direct IO sync writes If we do a direct IO sync write, at btrfs_sync_file(), and we need to skip inode logging or we get an error starting a transaction or an error when flushing delalloc, we end up unlocking the inode when we shouldn't under the 'out_release_extents' label, and then unlock it again at btrfs_direct_write(). Fix that by checking if we have to skip inode unlocking under that label. Reported-by: syzbot+7dbbb74af6291b5a5a8b@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/000000000000dfd631061eaeb4bc@google.com/ Fixes: 939b656bc8ab ("btrfs: fix corruption after buffer fault in during direct IO append write") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9f10a9f23fcc..9914419f3b7d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1868,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } From 12653ec36112ab55fa06c01db7c4432653d30a8d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 19 Jul 2024 18:56:46 +0930 Subject: [PATCH 0373/1523] btrfs: avoid using fixed char array size for tree names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There is a bug report that using the latest trunk GCC 15, btrfs would cause unterminated-string-initialization warning: linux-6.6/fs/btrfs/print-tree.c:29:49: error: initializer-string for array of ‘char’ is too long [-Werror=unterminated-string-initialization] 29 | { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" }, | ^~~~~~~~~~~~~~~~~~ [CAUSE] To print tree names we have an array of root_name_map structure, which uses "char name[16];" to store the name string of a tree. But the following trees have names exactly at 16 chars length: - "BLOCK_GROUP_TREE" - "RAID_STRIPE_TREE" This means we will have no space for the terminating '\0', and can lead to unexpected access when printing the name. [FIX] Instead of "char name[16];" use "const char *" instead. Since the name strings are all read-only data, and are all NULL terminated by default, there is not much need to bother the length at all. Reported-by: Sam James Reported-by: Alejandro Colomar Fixes: edde81f1abf29 ("btrfs: add raid stripe tree pretty printer") Fixes: 9c54e80ddc6bd ("btrfs: add code to support the block group root") CC: stable@vger.kernel.org # 6.1+ Suggested-by: Alejandro Colomar Reviewed-by: Johannes Thumshirn Reviewed-by: Alejandro Colomar Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/print-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da3..fc821aa446f0 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { From 2009e808bc3e0df6d4d83e2271bc25ae63a4ac05 Mon Sep 17 00:00:00 2001 From: Akshata Jahagirdar Date: Fri, 2 Aug 2024 07:12:03 -0700 Subject: [PATCH 0374/1523] drm/xe/xe2: Introduce performance changes Add Compression Performance Improvement Changes in Xe2 v2: Rebase v3: Rebase, updated as per latest changes on bspec, Removed unnecessary default actions (Matt) formatting nits (Tejas) v4: Formatting nits, removed default set action for bit 14 (Matt) Bspec: 72161 Signed-off-by: Akshata Jahagirdar Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/c2dd753fdc55df6a6432026f2df9c2684a0d25c1.1722607628.git.akshata.jahagirdar@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_tuning.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3b87f95f9ecf..c50643ab4c84 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -367,6 +367,9 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG2 XE_REG_MCR(0xb104) +#define COMPMEMRD256BOVRFETCHEN REG_BIT(20) + #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77d4eec0118d..3817b7743b0c 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -45,6 +45,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, {} }; From eeef5f183f1c1bdc3ea42b26ded1da2a8a5c69d9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 1 Aug 2024 09:28:42 -0700 Subject: [PATCH 0375/1523] MAINTAINERS: update status of sky2 and skge drivers The old SysKonnect NIc's are not used or actively maintained anymore. My sky2 NIC's are all in box in back corner of attic. Signed-off-by: Stephen Hemminger Link: https://patch.msgid.link/20240801162930.212299-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e..714e113d6eed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13539,7 +13539,7 @@ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner M: Stephen Hemminger L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER From 9ab0faa7f9ffe31296dbb9bbe6f76c72c14eea18 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 31 Jul 2024 16:46:24 -0700 Subject: [PATCH 0376/1523] sctp: Fix null-ptr-deref in reuseport_add_sock(). syzbot reported a null-ptr-deref while accessing sk2->sk_reuseport_cb in reuseport_add_sock(). [0] The repro first creates a listener with SO_REUSEPORT. Then, it creates another listener on the same port and concurrently closes the first listener. The second listen() calls reuseport_add_sock() with the first listener as sk2, where sk2->sk_reuseport_cb is not expected to be cleared concurrently, but the close() does clear it by reuseport_detach_sock(). The problem is SCTP does not properly synchronise reuseport_alloc(), reuseport_add_sock(), and reuseport_detach_sock(). The caller of reuseport_alloc() and reuseport_{add,detach}_sock() must provide synchronisation for sockets that are classified into the same reuseport group. Otherwise, such sockets form multiple identical reuseport groups, and all groups except one would be silently dead. 1. Two sockets call listen() concurrently 2. No socket in the same group found in sctp_ep_hashtable[] 3. Two sockets call reuseport_alloc() and form two reuseport groups 4. Only one group hit first in __sctp_rcv_lookup_endpoint() receives incoming packets Also, the reported null-ptr-deref could occur. TCP/UDP guarantees that would not happen by holding the hash bucket lock. Let's apply the locking strategy to __sctp_hash_endpoint() and __sctp_unhash_endpoint(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 UID: 0 PID: 10230 Comm: syz-executor119 Not tainted 6.10.0-syzkaller-12585-g301927d2d2eb #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024 RIP: 0010:reuseport_add_sock+0x27e/0x5e0 net/core/sock_reuseport.c:350 Code: 00 0f b7 5d 00 bf 01 00 00 00 89 de e8 1b a4 ff f7 83 fb 01 0f 85 a3 01 00 00 e8 6d a0 ff f7 49 8d 7e 12 48 89 f8 48 c1 e8 03 <42> 0f b6 04 28 84 c0 0f 85 4b 02 00 00 41 0f b7 5e 12 49 8d 7e 14 RSP: 0018:ffffc9000b947c98 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff8880252ddf98 RCX: ffff888079478000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000012 RBP: 0000000000000001 R08: ffffffff8993e18d R09: 1ffffffff1fef385 R10: dffffc0000000000 R11: fffffbfff1fef386 R12: ffff8880252ddac0 R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f24e45b96c0(0000) GS:ffff8880b9300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffcced5f7b8 CR3: 00000000241be000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __sctp_hash_endpoint net/sctp/input.c:762 [inline] sctp_hash_endpoint+0x52a/0x600 net/sctp/input.c:790 sctp_listen_start net/sctp/socket.c:8570 [inline] sctp_inet_listen+0x767/0xa20 net/sctp/socket.c:8625 __sys_listen_socket net/socket.c:1883 [inline] __sys_listen+0x1b7/0x230 net/socket.c:1894 __do_sys_listen net/socket.c:1902 [inline] __se_sys_listen net/socket.c:1900 [inline] __x64_sys_listen+0x5a/0x70 net/socket.c:1900 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f24e46039b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 91 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f24e45b9228 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 00007f24e468e428 RCX: 00007f24e46039b9 RDX: 00007f24e46039b9 RSI: 0000000000000003 RDI: 0000000000000004 RBP: 00007f24e468e420 R08: 00007f24e45b96c0 R09: 00007f24e45b96c0 R10: 00007f24e45b96c0 R11: 0000000000000246 R12: 00007f24e468e42c R13: 00007f24e465a5dc R14: 0020000000000001 R15: 00007ffcced5f7d8 Modules linked in: Fixes: 6ba845740267 ("sctp: process sk_reuseport in sctp_get_port_local") Reported-by: syzbot+e6979a5d2f10ecb700e4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e6979a5d2f10ecb700e4 Tested-by: syzbot+e6979a5d2f10ecb700e4@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20240731234624.94055-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9..a8a254a5008e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); } From 89108cb5c28527c1882df2987394e5c261a1f4aa Mon Sep 17 00:00:00 2001 From: Kyle Swenson Date: Wed, 31 Jul 2024 15:42:14 +0000 Subject: [PATCH 0377/1523] net: pse-pd: tps23881: Fix the device ID check The DEVID register contains two pieces of information: the device ID in the upper nibble, and the silicon revision number in the lower nibble. The driver should work fine with any silicon revision, so let's mask that out in the device ID check. Fixes: 20e6d190ffe1 ("net: pse-pd: Add TI TPS23881 PSE controller driver") Signed-off-by: Kyle Swenson Reviewed-by: Thomas Petazzoni Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20240731154152.4020668-1-kyle.swenson@est.tech Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/tps23881.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index 61f6ad9c1934..f90db758554b 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -29,6 +29,8 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 +#define TPS23881_REG_DEVID_MASK 0xF0 +#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -750,7 +752,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (ret != 0x22) { + if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } From fba917b169bea5f8f2ee300e19d5f7a6341a5251 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Thu, 1 Aug 2024 13:56:19 -0700 Subject: [PATCH 0378/1523] gve: Fix use of netif_carrier_ok() GVE driver wrongly relies on netif_carrier_ok() to check the interface administrative state when resources are being allocated/deallocated for queue(s). netif_carrier_ok() needs to be replaced with netif_running() for all such cases. Administrative state is the result of "ip link set dev up/down". It reflects whether the administrator wants to use the device for traffic and the corresponding resources have been allocated. Fixes: 5f08cd3d6423 ("gve: Alloc before freeing when adjusting queues") Signed-off-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240801205619.987396-1-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3480ff5c7ed6..5a8b490ab3ad 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -495,7 +495,7 @@ static int gve_set_channels(struct net_device *netdev, return -EINVAL; } - if (!netif_carrier_ok(netdev)) { + if (!netif_running(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; return 0; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9744b426940e..661566db68c8 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv, rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; - if (netif_carrier_ok(priv->dev)) { + if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { + if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) goto revert_features; @@ -2359,7 +2359,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); From f874d7210d882cb1c58a8e3da66f61cdc63cd4b4 Mon Sep 17 00:00:00 2001 From: Li Feng Date: Thu, 18 Jul 2024 16:07:22 +0800 Subject: [PATCH 0379/1523] scsi: sd: Keep the discard mode stable There is a scenario where a large number of discard commands are issued when the iscsi initiator connects to the target and then performs a session rescan operation. There is a time window, most of the commands are in UNMAP mode, and some discard commands become WRITE SAME with UNMAP. The discard mode has been negotiated during the SCSI probe. If the mode is temporarily changed from UNMAP to WRITE SAME with UNMAP, an I/O ERROR may occur because the target may not implement WRITE SAME with UNMAP. Keep the discard mode stable to fix this issue. Signed-off-by: Li Feng Link: https://lore.kernel.org/r/20240718080751.313102-2-fengli@smartx.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 718eb91ba9a5..699f4f9674d9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2711,8 +2711,6 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; - - sd_config_discard(sdkp, lim, SD_LBP_WS16); } sdkp->capacity = lba + 1; @@ -3365,8 +3363,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); - config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); @@ -3755,6 +3751,8 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_zbc_read_zones(sdkp, &lim, buffer); } + sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); + sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); From 14ab4792ee120c022f276a7e4768f4dcb08f0cdd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Thu, 1 Aug 2024 01:13:28 +0100 Subject: [PATCH 0380/1523] net/tcp: Disable TCP-AO static key after RCU grace period The lifetime of TCP-AO static_key is the same as the last tcp_ao_info. On the socket destruction tcp_ao_info ceases to be with RCU grace period, while tcp-ao static branch is currently deferred destructed. The static key definition is : DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); which means that if RCU grace period is delayed by more than a second and tcp_ao_needed is in the process of disablement, other CPUs may yet see tcp_ao_info which atent dead, but soon-to-be. And that breaks the assumption of static_key_fast_inc_not_disabled(). See the comment near the definition: > * The caller must make sure that the static key can't get disabled while > * in this function. It doesn't patch jump labels, only adds a user to > * an already enabled static key. Originally it was introduced in commit eb8c507296f6 ("jump_label: Prevent key->enabled int overflow"), which is needed for the atomic contexts, one of which would be the creation of a full socket from a request socket. In that atomic context, it's known by the presence of the key (md5/ao) that the static branch is already enabled. So, the ref counter for that static branch is just incremented instead of holding the proper mutex. static_key_fast_inc_not_disabled() is just a helper for such usage case. But it must not be used if the static branch could get disabled in parallel as it's not protected by jump_label_mutex and as a result, races with jump_label_update() implementation details. Happened on netdev test-bot[1], so not a theoretical issue: [] jump_label: Fatal kernel bug, unexpected op at tcp_inbound_hash+0x1a7/0x870 [ffffffffa8c4e9b7] (eb 50 0f 1f 44 != 66 90 0f 1f 00)) size:2 type:1 [] ------------[ cut here ]------------ [] kernel BUG at arch/x86/kernel/jump_label.c:73! [] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI [] CPU: 3 PID: 243 Comm: kworker/3:3 Not tainted 6.10.0-virtme #1 [] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [] Workqueue: events jump_label_update_timeout [] RIP: 0010:__jump_label_patch+0x2f6/0x350 ... [] Call Trace: [] [] arch_jump_label_transform_queue+0x6c/0x110 [] __jump_label_update+0xef/0x350 [] __static_key_slow_dec_cpuslocked.part.0+0x3c/0x60 [] jump_label_update_timeout+0x2c/0x40 [] process_one_work+0xe3b/0x1670 [] worker_thread+0x587/0xce0 [] kthread+0x28a/0x350 [] ret_from_fork+0x31/0x70 [] ret_from_fork_asm+0x1a/0x30 [] [] Modules linked in: veth [] ---[ end trace 0000000000000000 ]--- [] RIP: 0010:__jump_label_patch+0x2f6/0x350 [1]: https://netdev-3.bots.linux.dev/vmksft-tcp-ao-dbg/results/696681/5-connect-deny-ipv6/stderr Cc: stable@kernel.org Fixes: 67fa83f7c86a ("net/tcp: Add static_key for TCP-AO") Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ao.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 85531437890c..db6516092daf 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +static void tcp_ao_info_free_rcu(struct rcu_head *head) { - struct tcp_ao_info *ao; + struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu); struct tcp_ao_key *key; struct hlist_node *n; + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); + } + kfree(ao); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao) +{ + size_t total_ao_sk_mem = 0; + struct tcp_ao_key *key; + + hlist_for_each_entry(key, &ao->head, node) + total_ao_sk_mem += tcp_ao_sizeof_key(key); + atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + if (twsk) { ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); - tcp_twsk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL); } else { ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL); } if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; - hlist_for_each_entry_safe(key, n, &ao->head, node) { - hlist_del_rcu(&key->node); - if (!twsk) - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); - call_rcu(&key->rcu, tcp_ao_key_free_rcu); - } - - kfree_rcu(ao, rcu); - static_branch_slow_dec_deferred(&tcp_ao_needed); + if (!twsk) + tcp_ao_sk_omem_free(sk, ao); + call_rcu(&ao->rcu, tcp_ao_info_free_rcu); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) From f6c29f710c1ff2590109f83be3e212b86c01e0f3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 30 Jul 2024 07:19:41 -0700 Subject: [PATCH 0381/1523] i2c: smbus: Send alert notifications to all devices if source not found If a SMBus alert is received and the originating device is not found, the reason may be that the address reported on the SMBus alert address is corrupted, for example because multiple devices asserted alert and do not correctly implement SMBus arbitration. If this happens, call alert handlers on all devices connected to the given I2C bus, in the hope that this cleans up the situation. This change reliably fixed the problem on a system with multiple devices on a single bus. Example log where the device on address 0x18 (ADM1021) and on address 0x4c (ADT7461A) both had the alert line asserted: smbus_alert 3-000c: SMBALERT# from dev 0x0c, flag 0 smbus_alert 3-000c: no driver alert()! smbus_alert 3-000c: SMBALERT# from dev 0x0c, flag 0 smbus_alert 3-000c: no driver alert()! lm90 3-0018: temp1 out of range, please check! lm90 3-0018: Disabling ALERT# lm90 3-0029: Everything OK lm90 3-002a: Everything OK lm90 3-004c: temp1 out of range, please check! lm90 3-004c: temp2 out of range, please check! lm90 3-004c: Disabling ALERT# Fixes: b5527a7766f0 ("i2c: Add SMBus alert support") Signed-off-by: Guenter Roeck [wsa: fixed a typo in the commit message] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-smbus.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 836c247e7684..8256f7aed0cf 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -65,6 +65,32 @@ static int smbus_do_alert(struct device *dev, void *addrp) return ret; } +/* Same as above, but call back all drivers with alert handler */ + +static int smbus_do_alert_force(struct device *dev, void *addrp) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct alert_data *data = addrp; + struct i2c_driver *driver; + + if (!client || (client->flags & I2C_CLIENT_TEN)) + return 0; + + /* + * Drivers should either disable alerts, or provide at least + * a minimal handler. Lock so the driver won't change. + */ + device_lock(dev); + if (client->dev.driver) { + driver = to_i2c_driver(client->dev.driver); + if (driver->alert) + driver->alert(client, data->type, data->data); + } + device_unlock(dev); + + return 0; +} + /* * The alert IRQ handler needs to hand work off to a task which can issue * SMBus calls, because those sleeping calls can't be made in IRQ context. @@ -106,13 +132,19 @@ static irqreturn_t smbus_alert(int irq, void *d) /* * If we read the same address more than once, and the alert * was not handled by a driver, it won't do any good to repeat - * the loop because it will never terminate. - * Bail out in this case. + * the loop because it will never terminate. Try again, this + * time calling the alert handlers of all devices connected to + * the bus, and abort the loop afterwards. If this helps, we + * are all set. If it doesn't, there is nothing else we can do, + * so we might as well abort the loop. * Note: This assumes that a driver with alert handler handles * the alert properly and clears it if necessary. */ - if (data.addr == prev_addr && status != -EBUSY) + if (data.addr == prev_addr && status != -EBUSY) { + device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert_force); break; + } prev_addr = data.addr; } From f17c06c6608ad4ecd2ccf321753fb511812d821b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 2 Aug 2024 16:22:14 +0100 Subject: [PATCH 0382/1523] i2c: Fix conditional for substituting empty ACPI functions Add IS_ENABLED(CONFIG_I2C) to the conditional around a bunch of ACPI functions. The conditional around these functions depended only on CONFIG_ACPI. But the functions are implemented in I2C core, so are only present if CONFIG_I2C is enabled. Signed-off-by: Richard Fitzgerald Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 07e33bbc9256..7eedd0c662da 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); From 70d76b0e85ad126358baec1b44f797e61e3ebecc Mon Sep 17 00:00:00 2001 From: Felix Kaechele Date: Sat, 3 Aug 2024 23:13:09 -0400 Subject: [PATCH 0383/1523] dt-bindings: input: touchscreen: edt-ft5x06: Document FT8201 support Document FocalTech FT8201 support by adding the compatible. Signed-off-by: Felix Kaechele Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240804031310.331871-2-felix@kaechele.ca Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/edt-ft5x06.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 379721027bf8..51d48d4130d3 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -42,6 +42,7 @@ properties: - focaltech,ft5426 - focaltech,ft5452 - focaltech,ft6236 + - focaltech,ft8201 - focaltech,ft8719 reg: From fc289d3e8698f9b11edad6d73f371ebf35944c57 Mon Sep 17 00:00:00 2001 From: Felix Kaechele Date: Sat, 3 Aug 2024 23:13:10 -0400 Subject: [PATCH 0384/1523] Input: edt-ft5x06 - add support for FocalTech FT8201 The driver supports the FT8201 chip as well. It registers up to 10 touch points. Tested on: Lenovo ThinkSmart View (CD-18781Y), LCM: BOE TV080WXM-LL4 Signed-off-by: Felix Kaechele Link: https://lore.kernel.org/r/20240804031310.331871-3-felix@kaechele.ca Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/edt-ft5x06.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 42f99e57fbb7..e70415f189a5 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1474,6 +1474,10 @@ static const struct edt_i2c_chip_data edt_ft6236_data = { .max_support_points = 2, }; +static const struct edt_i2c_chip_data edt_ft8201_data = { + .max_support_points = 10, +}; + static const struct edt_i2c_chip_data edt_ft8719_data = { .max_support_points = 10, }; @@ -1485,6 +1489,7 @@ static const struct i2c_device_id edt_ft5x06_ts_id[] = { { .name = "ft5452", .driver_data = (long)&edt_ft5452_data }, /* Note no edt- prefix for compatibility with the ft6236.c driver */ { .name = "ft6236", .driver_data = (long)&edt_ft6236_data }, + { .name = "ft8201", .driver_data = (long)&edt_ft8201_data }, { .name = "ft8719", .driver_data = (long)&edt_ft8719_data }, { /* sentinel */ } }; @@ -1500,6 +1505,7 @@ static const struct of_device_id edt_ft5x06_of_match[] = { { .compatible = "focaltech,ft5452", .data = &edt_ft5452_data }, /* Note focaltech vendor prefix for compatibility with ft6236.c */ { .compatible = "focaltech,ft6236", .data = &edt_ft6236_data }, + { .compatible = "focaltech,ft8201", .data = &edt_ft8201_data }, { .compatible = "focaltech,ft8719", .data = &edt_ft8719_data }, { /* sentinel */ } }; From 206f533a0a7c683982af473079c4111f4a0f9f5e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Aug 2024 17:50:25 -0700 Subject: [PATCH 0385/1523] Input: uinput - reject requests with unreasonable number of slots From: Dmitry Torokhov When exercising uinput interface syzkaller may try setting up device with a really large number of slots, which causes memory allocation failure in input_mt_init_slots(). While this allocation failure is handled properly and request is rejected, it results in syzkaller reports. Additionally, such request may put undue burden on the system which will try to free a lot of memory for a bogus request. Fix it by limiting allowed number of slots to 100. This can easily be extended if we see devices that can track more than 100 contacts. Reported-by: Tetsuo Handa Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=0122fa359a69694395d5 Link: https://lore.kernel.org/r/Zqgi7NYEbpRsJfa2@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index d23f3225b00f..445856c9127a 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -417,6 +417,20 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } + /* + * Limit number of contacts to a reasonable value (100). This + * ensures that we need less than 2 pages for struct input_mt + * (we are not using in-kernel slot assignment so not going to + * allocate memory for the "red" table), and we should have no + * trouble getting this much memory. + */ + if (code == ABS_MT_SLOT && max > 99) { + printk(KERN_DEBUG + "%s: unreasonably large number of slots requested: %d\n", + UINPUT_NAME, max); + return -EINVAL; + } + return 0; } From 0e8b53979ac86eddb3fd76264025a70071a25574 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 5 Aug 2024 14:01:21 +0900 Subject: [PATCH 0386/1523] bpf: kprobe: remove unused declaring of bpf_kprobe_override After the commit 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one"), "bpf_kprobe_override" is not used anywhere anymore, and we can remove it now. Link: https://lore.kernel.org/all/20240710085939.11520-1-dongml2@chinatelecom.cn/ Fixes: 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one") Signed-off-by: Menglong Dong Acked-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- include/linux/trace_events.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626..9435185c10ef 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -880,7 +880,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); From 8c8acb8f26cbde665b233dd1b9bbcbb9b86822dc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 2 Aug 2024 22:53:15 +0900 Subject: [PATCH 0387/1523] kprobes: Fix to check symbol prefixes correctly Since str_has_prefix() takes the prefix as the 2nd argument and the string as the first, is_cfi_preamble_symbol() always fails to check the prefix. Fix the function parameter order so that it correctly check the prefix. Link: https://lore.kernel.org/all/172260679559.362040.7360872132937227206.stgit@devnote2/ Fixes: de02f2ac5d8c ("kprobes: Prohibit probing on CFI preamble symbol") Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e85de37d9e1e..da59c68df841 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr) if (lookup_symbol_name(addr, symbuf)) return false; - return str_has_prefix("__cfi_", symbuf) || - str_has_prefix("__pfx_", symbuf); + return str_has_prefix(symbuf, "__cfi_") || + str_has_prefix(symbuf, "__pfx_"); } static int check_kprobe_address_safe(struct kprobe *p, From 34e1b1bb73244219b3b3e24911e56c6e7b2b679e Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 1 Aug 2024 14:31:39 +0000 Subject: [PATCH 0388/1523] ALSA: hda: cs35l56: Stop creating ALSA controls for firmware coefficients A number of laptops have gone to market with old firmware versions that export controls that have since been hidden, but we can't just install a newer firmware because the firmware for each product is customized and qualified by the OEM. The issue is that alsactl save and restore has no idea what controls are good to persist which can lead to misconfiguration. There is no reason that the UCM or user should need to interact with any of the ALSA controls for the firmware coefficients so they can be removed entirely, this also simplifies the driver. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20240801143139.34549-1-simont@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 38 +------------------------------------ sound/pci/hda/cs35l56_hda.h | 1 - 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 96d3f13c5abf..31cc92bac89a 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -559,18 +559,6 @@ static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmw kfree(coeff_filename); } -static void cs35l56_hda_create_dsp_controls_work(struct work_struct *work) -{ - struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, control_work); - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l56->amp_name; - info.fw_type = HDA_CS_DSP_FW_MISC; - info.card = cs35l56->codec->card; - - hda_cs_dsp_add_controls(&cs35l56->cs_dsp, &info); -} - static void cs35l56_hda_apply_calibration(struct cs35l56_hda *cs35l56) { int ret; @@ -595,26 +583,15 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) char *wmfw_filename = NULL; unsigned int preloaded_fw_ver; bool firmware_missing; - bool add_dsp_controls_required = false; int ret; - /* - * control_work must be flushed before proceeding, but we can't do that - * here as it would create a deadlock on controls_rwsem so it must be - * performed before queuing dsp_work. - */ - WARN_ON_ONCE(work_busy(&cs35l56->control_work)); - /* * Prepare for a new DSP power-up. If the DSP has had firmware * downloaded previously then it needs to be powered down so that it - * can be updated and if hadn't been patched before then the controls - * will need to be added once firmware download succeeds. + * can be updated. */ if (cs35l56->base.fw_patched) cs_dsp_power_down(&cs35l56->cs_dsp); - else - add_dsp_controls_required = true; cs35l56->base.fw_patched = false; @@ -698,15 +675,6 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) CS35L56_FIRMWARE_MISSING); cs35l56->base.fw_patched = true; - /* - * Adding controls is deferred to prevent a lock inversion - ALSA takes - * the controls_rwsem when adding a control, the get() / put() - * functions of a control are called holding controls_rwsem and those - * that depend on running firmware wait for dsp_work() to complete. - */ - if (add_dsp_controls_required) - queue_work(system_long_wq, &cs35l56->control_work); - ret = cs_dsp_run(&cs35l56->cs_dsp); if (ret) dev_dbg(cs35l56->base.dev, "%s: cs_dsp_run ret %d\n", __func__, ret); @@ -753,7 +721,6 @@ static int cs35l56_hda_bind(struct device *dev, struct device *master, void *mas strscpy(comp->name, dev_name(dev), sizeof(comp->name)); comp->playback_hook = cs35l56_hda_playback_hook; - flush_work(&cs35l56->control_work); queue_work(system_long_wq, &cs35l56->dsp_work); cs35l56_hda_create_controls(cs35l56); @@ -775,7 +742,6 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * struct hda_component *comp; cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->control_work); cs35l56_hda_remove_controls(cs35l56); @@ -806,7 +772,6 @@ static int cs35l56_hda_system_suspend(struct device *dev) struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); cs35l56_hda_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->control_work); if (cs35l56->playing) cs35l56_hda_pause(cs35l56); @@ -1026,7 +991,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) dev_set_drvdata(cs35l56->base.dev, cs35l56); INIT_WORK(&cs35l56->dsp_work, cs35l56_hda_dsp_work); - INIT_WORK(&cs35l56->control_work, cs35l56_hda_create_dsp_controls_work); ret = cs35l56_hda_read_acpi(cs35l56, hid, id); if (ret) diff --git a/sound/pci/hda/cs35l56_hda.h b/sound/pci/hda/cs35l56_hda.h index c40d159507c2..38d94fb213a5 100644 --- a/sound/pci/hda/cs35l56_hda.h +++ b/sound/pci/hda/cs35l56_hda.h @@ -23,7 +23,6 @@ struct cs35l56_hda { struct cs35l56_base base; struct hda_codec *codec; struct work_struct dsp_work; - struct work_struct control_work; int index; const char *system_name; From 312c04cee408a8448ec8b639fe7f0434017d7161 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 1 Aug 2024 16:50:44 +0100 Subject: [PATCH 0389/1523] ALSA: hda: cs35l41: Stop creating ALSA Controls for firmware coefficients When the CS35L41 loads its firmware, it has a number of controls to affect its behaviour. Currently, these controls are exposed as ALSA Controls. These controls were never intended to be exposed to users but the firmware doesn't mark them hidden, so make the driver ignore them. Any changes in the coefficients handled by these controls needs to be matched to the individual system by SSID, which is already handled using the tuning file, when firmware is loaded, so UCM should not be setting these controls anyway. Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20240801155047.456540-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 4b411ed8c3fe..3a92e98da72d 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -133,17 +133,6 @@ static const struct reg_sequence cs35l41_hda_mute[] = { { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_HPF_PCM_EN = 1, AMP_VOL_PCM Mute }; -static void cs35l41_add_controls(struct cs35l41_hda *cs35l41) -{ - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l41->amp_name; - info.fw_type = cs35l41->firmware_type; - info.card = cs35l41->codec->card; - - hda_cs_dsp_add_controls(&cs35l41->cs_dsp, &info); -} - static const struct cs_dsp_client_ops client_ops = { .control_remove = hda_cs_dsp_control_remove, }; @@ -603,8 +592,6 @@ static int cs35l41_init_dsp(struct cs35l41_hda *cs35l41) if (ret) goto err; - cs35l41_add_controls(cs35l41); - cs35l41_hda_apply_calibration(cs35l41); err: From da1878b61c8d480c361ba6a39ce8a31c80b65826 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Thu, 1 Aug 2024 16:41:41 +0530 Subject: [PATCH 0390/1523] drm/i915/display: correct dual pps handling for MTL_PCH+ On the PCH side the second PPS was introduced in ICP+.Add condition On MTL_PCH and greater platform also having the second PPS. Note that DG1/2 south block only has the single PPS, so need to exclude the fake DG1/2 PCHs Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11488 Fixes: 93cbc1accbce ("drm/i915/mtl: Add fake PCH for Meteor Lake") Cc: # v6.9+ Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240801111141.574854-1-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/display/intel_backlight.c | 3 +++ drivers/gpu/drm/i915/display/intel_pps.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 6f678c039ed8..18933b003cbe 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba86..7ce926241e83 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; From 38055789d15155109b41602ad719d770af507030 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 1 Aug 2024 18:04:07 +0300 Subject: [PATCH 0391/1523] wifi: ath12k: use 128 bytes aligned iova in transmit path for WCN7850 In transmit path, it is likely that the iova is not aligned to PCIe TLP max payload size, which is 128 for WCN7850. Normally in such cases hardware is expected to split the packet into several parts in a manner such that they, other than the first one, have aligned iova. However due to hardware limitations, WCN7850 does not behave like that properly with some specific unaligned iova in transmit path. This easily results in target hang in a KPI transmit test: packet send/receive failure, WMI command send timeout etc. Also fatal error seen in PCIe level: ... Capabilities: ... ... DevSta: ... FatalErr+ ... ... ... Work around this by manually moving/reallocating payload buffer such that we can map it to a 128 bytes aligned iova. The moving requires sufficient head room or tail room in skb: for the former we can do ourselves a favor by asking some extra bytes when registering with mac80211, while for the latter we can do nothing. Moving/reallocating buffer consumes additional CPU cycles, but the good news is that an aligned iova increases PCIe efficiency. In my tests on some X86 platforms the KPI results are almost consistent. Since this is seen only with WCN7850, add a new hardware parameter to differentiate from others. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Baochen Qiang Cc: Tested-by: Mark Pearson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240715023814.20242-1-quic_bqiang@quicinc.com --- drivers/net/wireless/ath/ath12k/dp_tx.c | 72 +++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/hw.c | 6 +++ drivers/net/wireless/ath/ath12k/hw.h | 4 ++ drivers/net/wireless/ath/ath12k/mac.c | 1 + 4 files changed, 83 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index d08c04343e90..44406e0b4a34 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -162,6 +162,60 @@ static int ath12k_dp_prepare_htt_metadata(struct sk_buff *skb) return 0; } +static void ath12k_dp_tx_move_payload(struct sk_buff *skb, + unsigned long delta, + bool head) +{ + unsigned long len = skb->len; + + if (head) { + skb_push(skb, delta); + memmove(skb->data, skb->data + delta, len); + skb_trim(skb, len); + } else { + skb_put(skb, delta); + memmove(skb->data + delta, skb->data, len); + skb_pull(skb, delta); + } +} + +static int ath12k_dp_tx_align_payload(struct ath12k_base *ab, + struct sk_buff **pskb) +{ + u32 iova_mask = ab->hw_params->iova_mask; + unsigned long offset, delta1, delta2; + struct sk_buff *skb2, *skb = *pskb; + unsigned int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + int ret = 0; + + offset = (unsigned long)skb->data & iova_mask; + delta1 = offset; + delta2 = iova_mask - offset + 1; + + if (headroom >= delta1) { + ath12k_dp_tx_move_payload(skb, delta1, true); + } else if (tailroom >= delta2) { + ath12k_dp_tx_move_payload(skb, delta2, false); + } else { + skb2 = skb_realloc_headroom(skb, iova_mask); + if (!skb2) { + ret = -ENOMEM; + goto out; + } + + dev_kfree_skb_any(skb); + + offset = (unsigned long)skb2->data & iova_mask; + if (offset) + ath12k_dp_tx_move_payload(skb2, offset, true); + *pskb = skb2; + } + +out: + return ret; +} + int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, struct sk_buff *skb) { @@ -184,6 +238,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, bool tcl_ring_retry; bool msdu_ext_desc = false; bool add_htt_metadata = false; + u32 iova_mask = ab->hw_params->iova_mask; if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) return -ESHUTDOWN; @@ -279,6 +334,23 @@ tcl_ring_sel: goto fail_remove_tx_buf; } + if (iova_mask && + (unsigned long)skb->data & iova_mask) { + ret = ath12k_dp_tx_align_payload(ab, &skb); + if (ret) { + ath12k_warn(ab, "failed to align TX buffer %d\n", ret); + /* don't bail out, give original buffer + * a chance even unaligned. + */ + goto map; + } + + /* hdr is pointing to a wrong place after alignment, + * so refresh it for later use. + */ + hdr = (void *)skb->data; + } +map: ti.paddr = dma_map_single(ab->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(ab->dev, ti.paddr)) { atomic_inc(&ab->soc_stats.tx_err.misc_fail); diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 2e11ea763574..7b0b6a7f4701 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -924,6 +924,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, { .name = "wcn7850 hw2.0", @@ -1000,6 +1002,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = &wcn7850_uuid, .supports_dynamic_smps_6ghz = false, + + .iova_mask = ATH12K_PCIE_MAX_PAYLOAD_SIZE - 1, }, { .name = "qcn9274 hw2.0", @@ -1072,6 +1076,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, }; diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index e792eb6b249b..b1d302c48326 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -96,6 +96,8 @@ #define ATH12K_M3_FILE "m3.bin" #define ATH12K_REGDB_FILE_NAME "regdb.bin" +#define ATH12K_PCIE_MAX_PAYLOAD_SIZE 128 + enum ath12k_hw_rate_cck { ATH12K_HW_RATE_CCK_LP_11M = 0, ATH12K_HW_RATE_CCK_LP_5_5M, @@ -215,6 +217,8 @@ struct ath12k_hw_params { const guid_t *acpi_guid; bool supports_dynamic_smps_6ghz; + + u32 iova_mask; }; struct ath12k_hw_ops { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8106297f0bc1..ce41c8153080 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9193,6 +9193,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) hw->vif_data_size = sizeof(struct ath12k_vif); hw->sta_data_size = sizeof(struct ath12k_sta); + hw->extra_tx_headroom = ab->hw_params->iova_mask; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); From 1b85bdb0fadb42f5ef75ddcd259fc1ef13ec04de Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Thu, 1 Aug 2024 16:41:41 +0530 Subject: [PATCH 0392/1523] drm/i915/display: correct dual pps handling for MTL_PCH+ On the PCH side the second PPS was introduced in ICP+.Add condition On MTL_PCH and greater platform also having the second PPS. Note that DG1/2 south block only has the single PPS, so need to exclude the fake DG1/2 PCHs Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11488 Fixes: 93cbc1accbce ("drm/i915/mtl: Add fake PCH for Meteor Lake") Cc: # v6.9+ Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240801111141.574854-1-dnyaneshwar.bhadane@intel.com (cherry picked from commit da1878b61c8d480c361ba6a39ce8a31c80b65826) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_backlight.c | 3 +++ drivers/gpu/drm/i915/display/intel_pps.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d1..6c3333136737 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba86..7ce926241e83 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; From b50f2af9fbc5c00103ca8b72752b15310bd77762 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Thu, 1 Aug 2024 21:23:37 +0800 Subject: [PATCH 0393/1523] virtio-net: check feature before configuring the vq coalescing command Virtio spec says: The driver MUST have negotiated the VIRTIO_NET_F_VQ_NOTF_COAL feature when issuing commands VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET and VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET. So we add the feature negotiation check to virtnet_send_{r,t}x_ctrl_coal_vq_cmd as a basis for the next bugfix patch. Suggested-by: Michael S. Tsirkin Signed-off-by: Heng Qi Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0383a3e136d6..b1176be8fcfd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3658,6 +3658,9 @@ static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) @@ -3675,6 +3678,9 @@ static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) From 4ba8d97083707409822264fd1776aad7233f353e Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Thu, 1 Aug 2024 21:23:38 +0800 Subject: [PATCH 0394/1523] virtio-net: unbreak vq resizing when coalescing is not negotiated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't break the resize action if the vq coalescing feature named VIRTIO_NET_F_VQ_NOTF_COAL is not negotiated. Fixes: f61fe5f081cf ("virtio-net: fix the vq coalescing setting for vq resize") Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Eugenio Pé rez Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b1176be8fcfd..3f10c72743e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3749,7 +3749,11 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); - if (err) + + /* Don't break the tx resize action if the vq coalescing is not + * supported. The same is true for rx resize below. + */ + if (err && err != -EOPNOTSUPP) return err; } @@ -3764,7 +3768,7 @@ static int virtnet_set_ringparam(struct net_device *dev, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); - if (err) + if (err && err != -EOPNOTSUPP) return err; } } From 7ab107544b777c3bd7feb9fe447367d8edd5b202 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 1 Aug 2024 15:55:12 +0200 Subject: [PATCH 0395/1523] net: usb: qmi_wwan: fix memory leak for not ip packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the unused skb when not ip packets arrive. Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 386d62769ded..cfda32047cff 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -201,6 +201,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } From 50359c9c3cb3e55e840e3485f5ee37da5b2b16b6 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 17 Jul 2024 10:03:33 +0200 Subject: [PATCH 0396/1523] pmdomain: imx: scu-pd: Remove duplicated clocks These clocks are already added to the list. Remove the duplicates ones. Fixes: a67d780720ff ("genpd: imx: scu-pd: add more PDs") Signed-off-by: Alexander Stein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240717080334.2210988-1-alexander.stein@ew.tq-group.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/scu-pd.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pmdomain/imx/scu-pd.c b/drivers/pmdomain/imx/scu-pd.c index 05841b0bf7f3..01d465d88f60 100644 --- a/drivers/pmdomain/imx/scu-pd.c +++ b/drivers/pmdomain/imx/scu-pd.c @@ -223,11 +223,6 @@ static const struct imx_sc_pd_range imx8qxp_scu_pd_ranges[] = { { "lvds1-pwm", IMX_SC_R_LVDS_1_PWM_0, 1, false, 0 }, { "lvds1-lpi2c", IMX_SC_R_LVDS_1_I2C_0, 2, true, 0 }, - { "mipi1", IMX_SC_R_MIPI_1, 1, 0 }, - { "mipi1-pwm0", IMX_SC_R_MIPI_1_PWM_0, 1, 0 }, - { "mipi1-i2c", IMX_SC_R_MIPI_1_I2C_0, 2, 1 }, - { "lvds1", IMX_SC_R_LVDS_1, 1, 0 }, - /* DC SS */ { "dc0", IMX_SC_R_DC_0, 1, false, 0 }, { "dc0-pll", IMX_SC_R_DC_0_PLL_0, 2, true, 0 }, From 45b4acab4cac79503663f0a4be9eb3752db04d4b Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Mon, 5 Aug 2024 10:27:20 +0000 Subject: [PATCH 0397/1523] ASoC: wm_adsp: Add control_add callback and export wm_adsp_control_add() The callback allows codec drivers to affect how firmware coefficients are added as controls. For example a codec driver may selectively add controls by choosing to call wm_adsp_control_add() based on some filter logic. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20240805102721.30102-2-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 17 ++++++++++++++--- sound/soc/codecs/wm_adsp.h | 3 +++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 9f8549b34e30..e69283195f36 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -583,7 +583,7 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(kcontrol); } -static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) { struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); struct cs_dsp *cs_dsp = &dsp->cs_dsp; @@ -658,6 +658,17 @@ err_ctl: return ret; } +EXPORT_SYMBOL_GPL(wm_adsp_control_add); + +static int wm_adsp_control_add_cb(struct cs_dsp_coeff_ctl *cs_ctl) +{ + struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); + + if (dsp->control_add) + return (dsp->control_add)(dsp, cs_ctl); + else + return wm_adsp_control_add(cs_ctl); +} static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) { @@ -2072,12 +2083,12 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data) EXPORT_SYMBOL_GPL(wm_halo_wdt_expire); static const struct cs_dsp_client_ops wm_adsp1_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, }; static const struct cs_dsp_client_ops wm_adsp2_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, .pre_run = wm_adsp_pre_run, .post_run = wm_adsp_event_post_run, diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index e53dfcf1f78f..edc5b02ae765 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -37,6 +37,7 @@ struct wm_adsp { bool wmfw_optional; struct work_struct boot_work; + int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); int (*pre_run)(struct wm_adsp *dsp); bool preloaded; @@ -132,6 +133,8 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, int wm_adsp_compr_copy(struct snd_soc_component *component, struct snd_compr_stream *stream, char __user *buf, size_t count); + +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl); int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, From 2c3640b82213cf2beb7c1cc3cfce2ecf5349b0de Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Mon, 5 Aug 2024 10:27:21 +0000 Subject: [PATCH 0398/1523] ASoC: cs35l56: Stop creating ALSA controls for firmware coefficients A number of laptops have gone to market with old firmware versions that export controls that have since been hidden, but we can't just install a newer firmware because the firmware for each product is customized and qualified by the OEM. The issue is that alsactl save and restore has no idea what controls are good to persist which can lead to misconfiguration. There is no reason that the UCM or user should need to interact with any of the ALSA controls for the firmware coefficients so they can be removed entirely. Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20240805102721.30102-3-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 84c34f5b1a51..757ade6373ed 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1095,6 +1095,11 @@ int cs35l56_system_resume(struct device *dev) } EXPORT_SYMBOL_GPL(cs35l56_system_resume); +static int cs35l56_control_add_nop(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl) +{ + return 0; +} + static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) { struct wm_adsp *dsp; @@ -1117,6 +1122,12 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) dsp->fw = 12; dsp->wmfw_optional = true; + /* + * None of the firmware controls need to be exported so add a no-op + * callback that suppresses creating an ALSA control. + */ + dsp->control_add = &cs35l56_control_add_nop; + dev_dbg(cs35l56->base.dev, "DSP system name: '%s'\n", dsp->system_name); ret = wm_halo_init(dsp); From dc268085e499666b9f4f0fcb4c5a94e1c0b193b3 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 5 Aug 2024 12:42:22 +0100 Subject: [PATCH 0399/1523] ASoC: cs-amp-lib: Fix NULL pointer crash if efi.get_variable is NULL Call efi_rt_services_supported() to check that efi.get_variable exists before calling it. Signed-off-by: Richard Fitzgerald Fixes: 1cad8725f2b9 ("ASoC: cs-amp-lib: Add helpers for factory calibration data") Link: https://patch.msgid.link/20240805114222.15722-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 287ac01a3873..605964af8afa 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -108,7 +108,7 @@ static efi_status_t cs_amp_get_efi_variable(efi_char16_t *name, KUNIT_STATIC_STUB_REDIRECT(cs_amp_get_efi_variable, name, guid, size, buf); - if (IS_ENABLED(CONFIG_EFI)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return efi.get_variable(name, guid, &attr, size, buf); return EFI_NOT_FOUND; From ce4a995884ecedb98ba00e2e0b8ce94cde2060ce Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 11:59:31 +0200 Subject: [PATCH 0400/1523] drm/omap: add CONFIG_MMU dependency Compile-testing with CONFIG_MMU disabled causes a link error in omapdrm: arm-linux-gnueabi-ld: drivers/gpu/drm/omapdrm/omap_gem.o: in function `omap_gem_fault_2d': omap_gem.c:(.text+0x36e): undefined reference to `vmf_insert_mixed' arm-linux-gnueabi-ld: drivers/gpu/drm/omapdrm/omap_gem.o: in function `omap_gem_fault': omap_gem.c:(.text+0x74a): undefined reference to `vmf_insert_mixed' Avoid this by adding a Kconfig dependency. Fixes: dc6fcaaba5a5 ("drm/omap: Allow build with COMPILE_TEST=y") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20240719095942.3841009-1-arnd@kernel.org --- drivers/gpu/drm/omapdrm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d2..64e440a2649b 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER From 15b7a03205b31bc5623378c190d22b7ff60026f1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 5 Aug 2024 15:01:28 +0200 Subject: [PATCH 0401/1523] ALSA: line6: Fix racy access to midibuf There can be concurrent accesses to line6 midibuf from both the URB completion callback and the rawmidi API access. This could be a cause of KMSAN warning triggered by syzkaller below (so put as reported-by here). This patch protects the midibuf call of the former code path with a spinlock for avoiding the possible races. Reported-by: syzbot+78eccfb8b3c9a85fc6c5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/00000000000000949c061df288c5@google.com Cc: Link: https://patch.msgid.link/20240805130129.10872-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index f4437015d43a..9df49a880b75 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -286,12 +286,14 @@ static void line6_data_received(struct urb *urb) { struct usb_line6 *line6 = (struct usb_line6 *)urb->context; struct midi_buffer *mb = &line6->line6midi->midibuf_in; + unsigned long flags; int done; if (urb->status == -ESHUTDOWN) return; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_write(mb, urb->transfer_buffer, urb->actual_length); @@ -300,12 +302,15 @@ static void line6_data_received(struct urb *urb) dev_dbg(line6->ifcdev, "%d %d buffer overflow - message skipped\n", done, urb->actual_length); } + spin_unlock_irqrestore(&line6->line6midi->lock, flags); for (;;) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN, LINE6_MIDIBUF_READ_RX); + spin_unlock_irqrestore(&line6->line6midi->lock, flags); if (done <= 0) break; From 042b8711a0beafb2c3b888bebe3c300ab4c817fa Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 18 Jul 2024 10:24:10 +0200 Subject: [PATCH 0402/1523] drm/mediatek: Set sensible cursor width/height values to fix crash Hardware-speaking, there is no feature-reduced cursor specific plane, so this driver reserves the last all Overlay plane as a Cursor plane, but sets the maximum cursor width/height to the maximum value that the full overlay plane can use. While this could be ok, it raises issues with common userspace using libdrm (especially Mutter, but other compositors too) which will crash upon performing allocations and/or using said cursor plane. Reduce the maximum width/height for the cursor to 512x512 pixels, value taken from IGT's maximum cursor size test, which succeeds. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Fei Shao Tested-by: Fei Shao Reviewed-by: Daniel Stone Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20240718082410.204459-1-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e..77b50c56c124 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); From 9438f970296f9c3a6dd340ae0ad01d2f056c88e6 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Wed, 31 Jul 2024 07:48:04 +0200 Subject: [PATCH 0403/1523] arm64: dts: ti: k3-am62-verdin-dahlia: Keep CTRL_SLEEP_MOCI# regulator on This reverts commit 3935fbc87ddebea5439f3ab6a78b1e83e976bf88. CTRL_SLEEP_MOCI# is a signal that is defined for all the SoM implementing the Verdin family specification, this signal is supposed to control the power enable in the carrier board when the system is in deep sleep mode. However this is not possible with Texas Instruments AM62 SoC, IOs output buffer is disabled in deep sleep and IOs are in tri-state mode. Given that we cannot properly control this pin, force it to be always high to minimize potential issues. Fixes: 3935fbc87dde ("arm64: dts: ti: k3-am62-verdin-dahlia: support sleep-moci") Cc: Link: https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1361669/am625-gpio-output-state-in-deep-sleep/5244802 Signed-off-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240731054804.6061-1-francesco@dolcini.it Signed-off-by: Nishanth Menon --- .../boot/dts/ti/k3-am62-verdin-dahlia.dtsi | 22 ------------------- arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 6 ----- 2 files changed, 28 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi index e8f4d136e5df..9202181fbd65 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi @@ -43,15 +43,6 @@ sound-dai = <&mcasp0>; }; }; - - reg_usb_hub: regulator-usb-hub { - compatible = "regulator-fixed"; - enable-active-high; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; - regulator-boot-on; - regulator-name = "HUB_PWR_EN"; - }; }; /* Verdin ETHs */ @@ -193,11 +184,6 @@ status = "okay"; }; -/* Do not force CTRL_SLEEP_MOCI# always enabled */ -®_force_sleep_moci { - status = "disabled"; -}; - /* Verdin SD_1 */ &sdhci1 { status = "okay"; @@ -218,15 +204,7 @@ }; &usb1 { - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - usb-hub@1 { - compatible = "usb424,2744"; - reg = <1>; - vdd-supply = <®_usb_hub>; - }; }; /* Verdin CTRL_WAKE1_MICO# */ diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index 359f53f3e019..5bef31b8577b 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -138,12 +138,6 @@ vin-supply = <®_1v8>; }; - /* - * By default we enable CTRL_SLEEP_MOCI#, this is required to have - * peripherals on the carrier board powered. - * If more granularity or power saving is required this can be disabled - * in the carrier board device tree files. - */ reg_force_sleep_moci: regulator-force-sleep-moci { compatible = "regulator-fixed"; enable-active-high; From 87d571d6fb77ec342a985afa8744bb9bb75b3622 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 17 May 2024 20:22:44 +0000 Subject: [PATCH 0404/1523] ntp: Clamp maxerror and esterror to operating range Using syzkaller alongside the newly reintroduced signed integer overflow sanitizer spits out this report: UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:461:16 9223372036854775807 + 500 cannot be represented in type 'long' Call Trace: handle_overflow+0x171/0x1b0 second_overflow+0x2d6/0x500 accumulate_nsecs_to_secs+0x60/0x160 timekeeping_advance+0x1fe/0x890 update_wall_time+0x10/0x30 time_maxerror is unconditionally incremented and the result is checked against NTP_PHASE_LIMIT, but the increment itself can overflow, resulting in wrap-around to negative space. Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user supplied value was sanity checked to be in the operating range. That change removed the sanity check and relied on clamping in handle_overflow() which does not work correctly when the user supplied value is in the overflow zone of the '+ 500' operation. The operation requires CAP_SYS_TIME and the side effect of the overflow is NTP getting out of sync. Miroslav confirmed that the input value should be clamped to the operating range and the same applies to time_esterror. The latter is not used by the kernel, but the value still should be in the operating range as it was before the sanity check got removed. Clamp them to the operating range. [ tglx: Changed it to clamping and included time_esterror ] Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") Signed-off-by: Justin Stitt Signed-off-by: Thomas Gleixner Cc: Miroslav Lichvar Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-usec-v2-1-d539180f2b79@google.com Closes: https://github.com/KSPP/linux/issues/354 --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 406dccb79c2b..502e1e5b7f7f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -727,10 +727,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, } if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; + time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { time_constant = txc->constant; From 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 17 May 2024 00:47:10 +0000 Subject: [PATCH 0405/1523] ntp: Safeguard against time_constant overflow Using syzkaller with the recently reintroduced signed integer overflow sanitizer produces this UBSAN report: UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:738:18 9223372036854775806 + 4 cannot be represented in type 'long' Call Trace: handle_overflow+0x171/0x1b0 __do_adjtimex+0x1236/0x1440 do_adjtimex+0x2be/0x740 The user supplied time_constant value is incremented by four and then clamped to the operating range. Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user supplied value was sanity checked to be in the operating range. That change removed the sanity check and relied on clamping after incrementing which does not work correctly when the user supplied value is in the overflow zone of the '+ 4' operation. The operation requires CAP_SYS_TIME and the side effect of the overflow is NTP getting out of sync. Similar to the fixups for time_maxerror and time_esterror, clamp the user space supplied value to the operating range. [ tglx: Switch to clamping ] Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") Signed-off-by: Justin Stitt Signed-off-by: Thomas Gleixner Cc: Miroslav Lichvar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-c-v2-1-f3a80096f36f@google.com Closes: https://github.com/KSPP/linux/issues/352 --- kernel/time/ntp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 502e1e5b7f7f..8d2dd214ec68 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -733,11 +733,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; + time_constant = clamp(txc->constant, 0, MAXTC); if (!(time_status & STA_NANO)) time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); + time_constant = clamp(time_constant, 0, MAXTC); } if (txc->modes & ADJ_TAI && From 5916be8a53de6401871bdd953f6c60237b47d6d3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 3 Aug 2024 17:07:51 +0200 Subject: [PATCH 0406/1523] timekeeping: Fix bogus clock_was_set() invocation in do_adjtimex() The addition of the bases argument to clock_was_set() fixed up all call sites correctly except for do_adjtimex(). This uses CLOCK_REALTIME instead of CLOCK_SET_WALL as argument. CLOCK_REALTIME is 0. As a result the effect of that clock_was_set() notification is incomplete and might result in timers expiring late because the hrtimer code does not re-evaluate the affected clock bases. Use CLOCK_SET_WALL instead of CLOCK_REALTIME to tell the hrtimers code which clock bases need to be re-evaluated. Fixes: 17a1b8826b45 ("hrtimer: Add bases argument to clock_was_set()") Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/877ccx7igo.ffs@tglx --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2fa87dcfeda9..5391e4167d60 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2606,7 +2606,7 @@ int do_adjtimex(struct __kernel_timex *txc) clock_set |= timekeeping_advance(TK_ADV_FREQ); if (clock_set) - clock_was_set(CLOCK_REALTIME); + clock_was_set(CLOCK_SET_WALL); ntp_notify_cmos_timer(); From 1fb0847392e220890c9cf8908e3ab8e7e1227ff6 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Thu, 11 Jul 2024 14:26:55 +0300 Subject: [PATCH 0407/1523] drm/bridge-connector: Fix double free in error handling paths The recent switch to drmm allocation in drm_bridge_connector_init() may cause double free on bridge_connector in some of the error handling paths. Drop the explicit kfree() calls on bridge_connector. Fixes: c12907be57b1 ("drm/bridge-connector: switch to using drmm allocations") Signed-off-by: Cristian Ciocaltea Signed-off-by: default avatarRobert Foss Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240711-bridge-connector-fix-dbl-free-v1-1-d558b2d0eb93@collabora.com --- drivers/gpu/drm/drm_bridge_connector.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 0869b663f17e..a4fbf1eb7ac5 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -443,10 +443,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, panel_bridge = bridge; } - if (connector_type == DRM_MODE_CONNECTOR_Unknown) { - kfree(bridge_connector); + if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - } if (bridge_connector->bridge_hdmi) ret = drmm_connector_hdmi_init(drm, connector, @@ -461,10 +459,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) { - kfree(bridge_connector); + if (ret) return ERR_PTR(ret); - } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); From b93d16bee557302d4e588375ececd833cc048acc Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 3 Aug 2024 14:10:41 +0800 Subject: [PATCH 0408/1523] i2c: qcom-geni: Add missing clk_disable_unprepare in geni_i2c_runtime_resume Add the missing clk_disable_unprepare() before return in geni_i2c_runtime_resume(). Fixes: 14d02fbadb5d ("i2c: qcom-geni: add desc struct to prepare support for I2C Master Hub variant") Signed-off-by: Gaosheng Cui Reviewed-by: Vladimir Zapolskiy Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0a8b95ce35f7..78f43648e9f3 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -990,8 +990,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = geni_se_resources_on(&gi2c->se); - if (ret) + if (ret) { + clk_disable_unprepare(gi2c->core_clk); return ret; + } enable_irq(gi2c->irq); gi2c->suspended = 0; From 9a1af1e218779724ff29ca75f2b9397dc3ed11e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 29 Jul 2024 15:13:51 +0200 Subject: [PATCH 0409/1523] ASoC: codecs: lpass-macro: fix missing codec version Recent changes that started checking the codec version broke audio on the Lenovo ThinkPad X13s: wsa_macro 3240000.codec: Unsupported Codec version (0) wsa_macro 3240000.codec: probe with driver wsa_macro failed with error -22 rx_macro 3200000.rxmacro: Unsupported Codec version (0) rx_macro 3200000.rxmacro: probe with driver rx_macro failed with error -22 Add the missing codec version to the lookup table so that the codec drivers probe successfully. Note that I'm just assuming that this is a 2.0 codec based on the fact that this device uses the older register layout. Fixes: 378918d59181 ("ASoC: codecs: lpass-macro: add helpers to get codec version") Fixes: dbacef05898d ("ASoC: codec: lpass-rx-macro: prepare driver to accomdate new codec versions") Fixes: 727de4fbc546 ("ASoC: codecs: lpass-wsa-macro: Correct support for newer v2.5 version") Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240729131351.27886-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-va-macro.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index b852cc7ffad9..a62ccd09bacd 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1472,6 +1472,8 @@ static void va_macro_set_lpass_codec_version(struct va_macro *va) if ((core_id_0 == 0x01) && (core_id_1 == 0x0F)) version = LPASS_CODEC_VERSION_2_0; + if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && core_id_2 == 0x01) + version = LPASS_CODEC_VERSION_2_0; if ((core_id_0 == 0x02) && (core_id_1 == 0x0E)) version = LPASS_CODEC_VERSION_2_1; if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && (core_id_2 == 0x50 || core_id_2 == 0x51)) From e42066df07c0fcedebb32ed56f8bc39b4bf86337 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 5 Aug 2024 15:08:39 +0100 Subject: [PATCH 0410/1523] ASoC: cs35l56: Handle OTP read latency over SoundWire Use the late-read buffer in the CS35L56 SoundWire interface to read OTP memory. The OTP memory has a longer access latency than chip registers and cannot guarantee to return the data value in the SoundWire control response if the bus clock is >4.8 MHz. The Cirrus SoundWire peripheral IP exposes the bridge-to-bus read buffer and status bits. For a read from OTP the bridge status bits are polled to wait for the OTP data to be loaded into the read buffer and the data is then read from there. Signed-off-by: Richard Fitzgerald Fixes: e1830f66f6c6 ("ASoC: cs35l56: Add helper functions for amp calibration") Link: https://patch.msgid.link/20240805140839.26042-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 5 +++ sound/soc/codecs/cs35l56-sdw.c | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a6aa112e5741..a51acefa785f 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba return 0; } +static inline bool cs35l56_is_otp_register(unsigned int reg) +{ + return (reg >> 16) == 3; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index fc03bb7ecae1..7c9a17fe2195 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -23,6 +23,79 @@ /* Register addresses are offset when sent over SoundWire */ #define CS35L56_SDW_ADDR_OFFSET 0x8000 +/* Cirrus bus bridge registers */ +#define CS35L56_SDW_MEM_ACCESS_STATUS 0xd0 +#define CS35L56_SDW_MEM_READ_DATA 0xd8 + +#define CS35L56_SDW_LAST_LATE BIT(3) +#define CS35L56_SDW_CMD_IN_PROGRESS BIT(2) +#define CS35L56_SDW_RDATA_RDY BIT(0) + +#define CS35L56_LATE_READ_POLL_US 10 +#define CS35L56_LATE_READ_TIMEOUT_US 1000 + +static int cs35l56_sdw_poll_mem_status(struct sdw_slave *peripheral, + unsigned int mask, + unsigned int match) +{ + int ret, val; + + ret = read_poll_timeout(sdw_read_no_pm, val, + (val < 0) || ((val & mask) == match), + CS35L56_LATE_READ_POLL_US, CS35L56_LATE_READ_TIMEOUT_US, + false, peripheral, CS35L56_SDW_MEM_ACCESS_STATUS); + if (ret < 0) + return ret; + + if (val < 0) + return val; + + return 0; +} + +static int cs35l56_sdw_slow_read(struct sdw_slave *peripheral, unsigned int reg, + u8 *buf, size_t val_size) +{ + int ret, i; + + reg += CS35L56_SDW_ADDR_OFFSET; + + for (i = 0; i < val_size; i += sizeof(u32)) { + /* Poll for bus bridge idle */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_CMD_IN_PROGRESS, + 0); + if (ret < 0) { + dev_err(&peripheral->dev, "!CMD_IN_PROGRESS fail: %d\n", ret); + return ret; + } + + /* Reading LSByte triggers read of register to holding buffer */ + sdw_read_no_pm(peripheral, reg + i); + + /* Wait for data available */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_RDATA_RDY, + CS35L56_SDW_RDATA_RDY); + if (ret < 0) { + dev_err(&peripheral->dev, "RDATA_RDY fail: %d\n", ret); + return ret; + } + + /* Read data from buffer */ + ret = sdw_nread_no_pm(peripheral, CS35L56_SDW_MEM_READ_DATA, + sizeof(u32), &buf[i]); + if (ret) { + dev_err(&peripheral->dev, "Late read @%#x failed: %d\n", reg + i, ret); + return ret; + } + + swab32s((u32 *)&buf[i]); + } + + return 0; +} + static int cs35l56_sdw_read_one(struct sdw_slave *peripheral, unsigned int reg, void *buf) { int ret; @@ -48,6 +121,10 @@ static int cs35l56_sdw_read(void *context, const void *reg_buf, int ret; reg = le32_to_cpu(*(const __le32 *)reg_buf); + + if (cs35l56_is_otp_register(reg)) + return cs35l56_sdw_slow_read(peripheral, reg, buf8, val_size); + reg += CS35L56_SDW_ADDR_OFFSET; if (val_size == 4) From be1dec570b6f5a29ce9c99334c52bea94c28914b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 4 Aug 2024 23:20:57 -0700 Subject: [PATCH 0411/1523] drm/xe/observation: Drop empty sysctl table entry An empty sysctl table entry was inadvertently left behind for observation sysctl. The breaks on 6.11 with the following errors: [ 219.654850] sysctl table check failed: dev/xe/(null) procname is null [ 219.654862] sysctl table check failed: dev/xe/(null) No proc_handler Drop the empty entry. Fixes: 8169b2097d88 ("drm/xe/uapi: Rename xe perf layer as xe observation layer") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2419 Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240805062057.3547560-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_observation.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** From 4e436f6fb95e507131df78c0d98052237db60ecc Mon Sep 17 00:00:00 2001 From: Jared McArthur Date: Thu, 1 Aug 2024 16:04:12 -0500 Subject: [PATCH 0412/1523] arm64: dts: ti: k3-am62p: Add gpio-ranges for mcu_gpio0 Commit d72d73a44c3c ("arm64: dts: ti: k3-am62p: Add gpio-ranges properties") introduced pinmux range definition for gpio-ranges, however missed introducing the range description for the mcu_gpio node. As a result, automatic mapping of GPIO to pin control for mcu gpios is broken. Fix this by introducing the proper ranges. Fixes: d72d73a44c3c ("arm64: dts: ti: k3-am62p: Add gpio-ranges properties") Signed-off-by: Jared McArthur Link: https://lore.kernel.org/r/20240801210414.715306-2-j-mcarthur@ti.com Signed-off-by: Nishanth Menon --- arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi index e65db6ce02bf..df7945156397 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi @@ -146,6 +146,8 @@ power-domains = <&k3_pds 79 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 79 0>; clock-names = "gpio"; + gpio-ranges = <&mcu_pmx0 0 0 21>, <&mcu_pmx0 21 23 1>, + <&mcu_pmx0 22 32 2>; }; mcu_rti0: watchdog@4880000 { From 98897a300859dca62f834a5d1f60267032a9fe54 Mon Sep 17 00:00:00 2001 From: Jared McArthur Date: Thu, 1 Aug 2024 16:04:13 -0500 Subject: [PATCH 0413/1523] arm64: dts: ti: k3-am62p: Fix gpio-range for main_pmx0 Commit d72d73a44c3c ("arm64: dts: ti: k3-am62p: Add gpio-ranges properties") introduced pinmux range definition for gpio-ranges, however missed a hole within gpio-range for main_pmx0. As a result, automatic mapping of GPIO to pin control for gpios within the main_pmx0 domain is broken. Fix this by correcting the gpio-range. Fixes: d72d73a44c3c ("arm64: dts: ti: k3-am62p: Add gpio-ranges properties") Signed-off-by: Jared McArthur Link: https://lore.kernel.org/r/20240801210414.715306-3-j-mcarthur@ti.com Signed-off-by: Nishanth Menon --- arch/arm64/boot/dts/ti/k3-am62p-main.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi index 57383bd2eaeb..0ce9721b4176 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi @@ -45,7 +45,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 92 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 22 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 149 2 PIN_GPIO_RANGE_IOPAD>; From 04c90681144c40619524367c69e40736a6fa690c Mon Sep 17 00:00:00 2001 From: Jared McArthur Date: Thu, 1 Aug 2024 16:04:14 -0500 Subject: [PATCH 0414/1523] arm64: dts: ti: k3-j722s: Fix gpio-range for main_pmx0 Commit 5e5c50964e2e ("arm64: dts: ti: k3-j722s: Add gpio-ranges properties") introduced pinmux range definition for gpio-ranges, however missed a hole within gpio-range for main_pmx0. As a result, automatic mapping of GPIO to pin control for gpios within the main_pmx0 domain is broken. Fix this by correcting the gpio-range. Fixes: 5e5c50964e2e ("arm64: dts: ti: k3-j722s: Add gpio-ranges properties") Signed-off-by: Jared McArthur Link: https://lore.kernel.org/r/20240801210414.715306-4-j-mcarthur@ti.com Signed-off-by: Nishanth Menon --- arch/arm64/boot/dts/ti/k3-j722s-main.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index c797980528ec..dde4bd5c6645 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -193,7 +193,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 55 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 17 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 101 25 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, From 7fef1eb0b013eaa42019a95a08f71368e5a22dba Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Tue, 6 Jun 2023 16:46:28 +0100 Subject: [PATCH 0415/1523] docs: KVM: Fix register ID of SPSR_FIQ Fixes the register ID of SPSR_FIQ. SPSR_FIQ is a 64-bit register and the 64-bit register size mask is 0x0030000000000000ULL. Fixes: fd3bc912d3d1 ("KVM: Documentation: Document arm64 core registers in detail") Signed-off-by: Takahiro Itazuri Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230606154628.95498-1-itazur@amazon.com Signed-off-by: Oliver Upton --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe722c5dada9..87865c8897ca 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2592,7 +2592,7 @@ Specifically: 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT] 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] - 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] + 0x6030 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] [1]_ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] [1]_ ... From 26fef9d0bbeba6bf5d18386bd20aff2c83caa0ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 5 Aug 2024 22:35:43 +0200 Subject: [PATCH 0416/1523] syscalls: fix fstat() entry again The previous patch to fix the newfstatat() syscall entry ended up breaking fstat() instead. Unfortunately these two are not handled the same way, so I messed this one up the exact opposite way. Fixes: 343416f0c11c ("syscalls: fix syscall macros for newfstat/newfstatat") Signed-off-by: Arnd Bergmann --- scripts/syscall.tbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index 4586a18dfe9b..b93d43561a2c 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -100,7 +100,7 @@ 79 stat64 fstatat64 sys_fstatat64 79 64 newfstatat sys_newfstatat 80 stat64 fstat64 sys_fstat64 -80 64 newfstat sys_newfstat +80 64 fstat sys_newfstat 81 common sync sys_sync 82 common fsync sys_fsync 83 common fdatasync sys_fdatasync From df24373435f5899a2a98b7d377479c8d4376613b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 2 Aug 2024 22:47:34 +0300 Subject: [PATCH 0417/1523] drm/msm/dpu: don't play tricks with debug macros DPU debugging macros need to be converted to a proper drm_debug_* macros, however this is a going an intrusive patch, not suitable for a fix. Wire DPU_DEBUG and DPU_DEBUG_DRIVER to always use DRM_DEBUG_DRIVER to make sure that DPU debugging messages always end up in the drm debug messages and are controlled via the usual drm.debug mask. I don't think that it is a good idea for a generic DPU_DEBUG macro to be tied to DRM_UT_KMS. It is used to report a debug message from driver, so by default it should go to the DRM_UT_DRIVER channel. While refactoring debug macros later on we might end up with particular messages going to ATOMIC or KMS, but DRIVER should be the default. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/606932/ Link: https://lore.kernel.org/r/20240802-dpu-fix-wb-v2-2-7eac9eb8e895@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index e2adc937ea63..935ff6fd172c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -31,24 +31,14 @@ * @fmt: Pointer to format string */ #define DPU_DEBUG(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /** * DPU_DEBUG_DRIVER - macro for hardware driver logging * @fmt: Pointer to format string */ #define DPU_DEBUG_DRIVER(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) #define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) From d19d5b8d8f6dab942ce5ddbcf34bf7275e778250 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 5 Aug 2024 13:20:08 -0700 Subject: [PATCH 0418/1523] drm/msm/dp: fix the max supported bpp logic Fix the dp_panel_get_supported_bpp() API to return the minimum supported bpp correctly for relevant cases and use this API to correct the behavior of DP driver which hard-codes the max supported bpp to 30. This is incorrect because the number of lanes and max data rate supported by the lanes need to be taken into account. Replace the hardcoded limit with the appropriate math which accounts for the accurate number of lanes and max data rate. changes in v2: - Fix the dp_panel_get_supported_bpp() and use it - Drop the max_t usage as dp_panel_get_supported_bpp() already returns the min_bpp correctly now changes in v3: - replace min_t with just min as all params are u32 Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Reported-by: Dmitry Baryshkov Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/43 Tested-by: Dmitry Baryshkov # SM8350-HDK Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/607073/ Link: https://lore.kernel.org/r/20240805202009.1120981-1-quic_abhinavk@quicinc.com Signed-off-by: Stephen Boyd Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_panel.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index a916b5f3b317..6ff6c9ef351f 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel) static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { - struct dp_link_info *link_info; + const struct dp_link_info *link_info; const u32 max_supported_bpp = 30, min_supported_bpp = 18; - u32 bpp = 0, data_rate_khz = 0; + u32 bpp, data_rate_khz; - bpp = min_t(u32, mode_edid_bpp, max_supported_bpp); + bpp = min(mode_edid_bpp, max_supported_bpp); link_info = &dp_panel->link_info; data_rate_khz = link_info->num_lanes * link_info->rate * 8; - while (bpp > min_supported_bpp) { + do { if (mode_pclk_khz * bpp <= data_rate_khz) - break; + return bpp; bpp -= 6; - } + } while (bpp > min_supported_bpp); - return bpp; + return min_supported_bpp; } int dp_panel_read_sink_caps(struct dp_panel *dp_panel, @@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel) drm_mode->clock); drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp); - dp_panel->dp_mode.bpp = max_t(u32, 18, - min_t(u32, dp_panel->dp_mode.bpp, 30)); + dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp, + dp_panel->dp_mode.drm_mode.clock); + drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n", dp_panel->dp_mode.bpp); From 959ab6350add903e352890af53e86663739fcb9a Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Sun, 4 Aug 2024 21:30:15 -0400 Subject: [PATCH 0419/1523] cgroup/cpuset: fix panic caused by partcmd_update We find a bug as below: BUG: unable to handle page fault for address: 00000003 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 358 Comm: bash Tainted: G W I 6.6.0-10893-g60d6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/4 RIP: 0010:partition_sched_domains_locked+0x483/0x600 Code: 01 48 85 d2 74 0d 48 83 05 29 3f f8 03 01 f3 48 0f bc c2 89 c0 48 9 RSP: 0018:ffffc90000fdbc58 EFLAGS: 00000202 RAX: 0000000100000003 RBX: ffff888100b3dfa0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000002fe80 RBP: ffff888100b3dfb0 R08: 0000000000000001 R09: 0000000000000000 R10: ffffc90000fdbcb0 R11: 0000000000000004 R12: 0000000000000002 R13: ffff888100a92b48 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f44a5425740(0000) GS:ffff888237d80000(0000) knlGS:0000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000100030973 CR3: 000000010722c000 CR4: 00000000000006e0 Call Trace: ? show_regs+0x8c/0xa0 ? __die_body+0x23/0xa0 ? __die+0x3a/0x50 ? page_fault_oops+0x1d2/0x5c0 ? partition_sched_domains_locked+0x483/0x600 ? search_module_extables+0x2a/0xb0 ? search_exception_tables+0x67/0x90 ? kernelmode_fixup_or_oops+0x144/0x1b0 ? __bad_area_nosemaphore+0x211/0x360 ? up_read+0x3b/0x50 ? bad_area_nosemaphore+0x1a/0x30 ? exc_page_fault+0x890/0xd90 ? __lock_acquire.constprop.0+0x24f/0x8d0 ? __lock_acquire.constprop.0+0x24f/0x8d0 ? asm_exc_page_fault+0x26/0x30 ? partition_sched_domains_locked+0x483/0x600 ? partition_sched_domains_locked+0xf0/0x600 rebuild_sched_domains_locked+0x806/0xdc0 update_partition_sd_lb+0x118/0x130 cpuset_write_resmask+0xffc/0x1420 cgroup_file_write+0xb2/0x290 kernfs_fop_write_iter+0x194/0x290 new_sync_write+0xeb/0x160 vfs_write+0x16f/0x1d0 ksys_write+0x81/0x180 __x64_sys_write+0x21/0x30 x64_sys_call+0x2f25/0x4630 do_syscall_64+0x44/0xb0 entry_SYSCALL_64_after_hwframe+0x78/0xe2 RIP: 0033:0x7f44a553c887 It can be reproduced with cammands: cd /sys/fs/cgroup/ mkdir test cd test/ echo +cpuset > ../cgroup.subtree_control echo root > cpuset.cpus.partition cat /sys/fs/cgroup/cpuset.cpus.effective 0-3 echo 0-3 > cpuset.cpus // taking away all cpus from root This issue is caused by the incorrect rebuilding of scheduling domains. In this scenario, test/cpuset.cpus.partition should be an invalid root and should not trigger the rebuilding of scheduling domains. When calling update_parent_effective_cpumask with partcmd_update, if newmask is not null, it should recheck newmask whether there are cpus is available for parect/cs that has tasks. Fixes: 0c7f293efc87 ("cgroup/cpuset: Add cpuset.cpus.exclusive.effective for v2") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 40ec4abaf440..a9b6d56eeffa 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1991,6 +1991,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ + nocpu |= tasks_nocpu_error(parent, cs, newmask); /* * partcmd_update with newmask: From 311a1bdc44a8e06024df4fd3392be0dfc8298655 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 4 Aug 2024 21:30:16 -0400 Subject: [PATCH 0420/1523] cgroup/cpuset: Clear effective_xcpus on cpus_allowed clearing only if cpus.exclusive not set Commit e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") adds a user writable cpuset.cpus.exclusive file for setting exclusive CPUs to be used for the creation of partitions. Since then effective_xcpus depends on both the cpuset.cpus and cpuset.cpus.exclusive setting. If cpuset.cpus.exclusive is set, effective_xcpus will depend only on cpuset.cpus.exclusive. When it is not set, effective_xcpus will be set according to the cpuset.cpus value when the cpuset becomes a valid partition root. When cpuset.cpus is being cleared by the user, effective_xcpus should only be cleared when cpuset.cpus.exclusive is not set. However, that is not currently the case. # cd /sys/fs/cgroup/ # mkdir test # echo +cpuset > cgroup.subtree_control # cd test # echo 3 > cpuset.cpus.exclusive # cat cpuset.cpus.exclusive.effective 3 # echo > cpuset.cpus # cat cpuset.cpus.exclusive.effective // was cleared Fix it by clearing effective_xcpus only if cpuset.cpus.exclusive is not set. Fixes: e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") Cc: stable@vger.kernel.org # v6.7+ Reported-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a9b6d56eeffa..97d02612b3a6 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2525,7 +2525,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); - cpumask_clear(trialcs->effective_xcpus); + if (cpumask_empty(trialcs->exclusive_cpus)) + cpumask_clear(trialcs->effective_xcpus); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) From ff0ce721ec213499ec5a532041fb3a1db2dc5ecb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 4 Aug 2024 21:30:17 -0400 Subject: [PATCH 0421/1523] cgroup/cpuset: Eliminate unncessary sched domains rebuilds in hotplug It was found that some hotplug operations may cause multiple rebuild_sched_domains_locked() calls. Some of those intermediate calls may use cpuset states not in the final correct form leading to incorrect sched domain setting. Fix this problem by using the existing force_rebuild flag to inhibit immediate rebuild_sched_domains_locked() calls if set and only doing one final call at the end. Also renaming the force_rebuild flag to force_sd_rebuild to make its meaning for clear. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 97d02612b3a6..4bd9e50bcc8e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -232,6 +232,13 @@ static cpumask_var_t isolated_cpus; /* List of remote partition root children */ static struct list_head remote_children; +/* + * A flag to force sched domain rebuild at the end of an operation while + * inhibiting it in the intermediate stages when set. Currently it is only + * set in hotplug code. + */ +static bool force_sd_rebuild; + /* * Partition root states: * @@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); } - if (rebuild_domains) + if (rebuild_domains && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(child, tmp); disable_cnt++; } - if (disable_cnt) + if (disable_cnt && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -2442,7 +2449,8 @@ get_css: } rcu_read_unlock(); - if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD)) + if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) && + !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -3104,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); - if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) + if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed && + !force_sd_rebuild) rebuild_sched_domains_locked(); if (spread_flag_changed) @@ -4501,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs, update_tasks_nodemask(cs); } -static bool force_rebuild; - void cpuset_force_rebuild(void) { - force_rebuild = true; + force_sd_rebuild = true; } /** @@ -4653,15 +4660,9 @@ static void cpuset_handle_hotplug(void) !cpumask_empty(subpartitions_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); - /* - * In the rare case that hotplug removes all the cpus in - * subpartitions_cpus, we assumed that cpus are updated. - */ - if (!cpus_updated && !cpumask_empty(subpartitions_cpus)) - cpus_updated = true; - /* For v1, synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { + cpuset_force_rebuild(); spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); @@ -4717,8 +4718,8 @@ static void cpuset_handle_hotplug(void) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; + if (force_sd_rebuild) { + force_sd_rebuild = false; rebuild_sched_domains_cpuslocked(); } From aedf02e46eb549dac8db4821a6b9f0c6bf6e3990 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 31 Jul 2024 12:17:22 -0700 Subject: [PATCH 0422/1523] drm/msm/dpu: move dpu_encoder's connector assignment to atomic_enable() For cases where the crtc's connectors_changed was set without enable/active getting toggled , there is an atomic_enable() call followed by an atomic_disable() but without an atomic_mode_set(). This results in a NULL ptr access for the dpu_encoder_get_drm_fmt() call in the atomic_enable() as the dpu_encoder's connector was cleared in the atomic_disable() but not re-assigned as there was no atomic_mode_set() call. Fix the NULL ptr access by moving the assignment for atomic_enable() and also use drm_atomic_get_new_connector_for_encoder() to get the connector from the atomic_state. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Reported-by: Dmitry Baryshkov Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/59 Suggested-by: Dmitry Baryshkov Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # SM8350-HDK Patchwork: https://patchwork.freedesktop.org/patch/606729/ Link: https://lore.kernel.org/r/20240731191723.3050932-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 34c56e855af7..3b171bf227d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, cstate->num_mixers = num_lm; - dpu_enc->connector = conn_state->connector; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; @@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->commit_done_timedout = false; + dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc); + cur_mode = &dpu_enc->base.crtc->state->adjusted_mode; dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc); From 319aca883bfa1b85ee08411541b51b9a934ac858 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 25 Jul 2024 15:04:50 -0700 Subject: [PATCH 0423/1523] drm/msm/dp: reset the link phy params before link training Before re-starting link training reset the link phy params namely the pre-emphasis and voltage swing levels otherwise the next link training begins at the previously cached levels which can result in link training failures. Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # SM8350-HDK Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/605946/ Link: https://lore.kernel.org/r/20240725220450.131245-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 7bc8a9f0657a..f342fc5ae41e 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, link_info.rate = ctrl->link->link_params.rate; link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING; + dp_link_reset_phy_params_vx_px(ctrl->link); + dp_aux_link_configure(ctrl->aux, &link_info); if (drm_dp_max_downspread(dpcd)) From bfa1a6283be390947d3649c482e5167186a37016 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 25 Jun 2024 00:13:41 +0300 Subject: [PATCH 0424/1523] drm/msm/dpu: cleanup FB if dpu_format_populate_layout fails If the dpu_format_populate_layout() fails, then FB is prepared, but not cleaned up. This ends up leaking the pin_count on the GEM object and causes a splat during DRM file closure: msm_obj->pin_count WARNING: CPU: 2 PID: 569 at drivers/gpu/drm/msm/msm_gem.c:121 update_lru_locked+0xc4/0xcc [...] Call trace: update_lru_locked+0xc4/0xcc put_pages+0xac/0x100 msm_gem_free_object+0x138/0x180 drm_gem_object_free+0x1c/0x30 drm_gem_object_handle_put_unlocked+0x108/0x10c drm_gem_object_release_handle+0x58/0x70 idr_for_each+0x68/0xec drm_gem_release+0x28/0x40 drm_file_free+0x174/0x234 drm_release+0xb0/0x160 __fput+0xc0/0x2c8 __fput_sync+0x50/0x5c __arm64_sys_close+0x38/0x7c invoke_syscall+0x48/0x118 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x4c/0x120 el0t_64_sync_handler+0x100/0x12c el0t_64_sync+0x190/0x194 irq event stamp: 129818 hardirqs last enabled at (129817): [] console_unlock+0x118/0x124 hardirqs last disabled at (129818): [] el1_dbg+0x24/0x8c softirqs last enabled at (129808): [] handle_softirqs+0x4c8/0x4e8 softirqs last disabled at (129785): [] __do_softirq+0x14/0x20 Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/600714/ Link: https://lore.kernel.org/r/20240625-dpu-mode-config-width-v5-1-501d984d634f@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 40c4dd2c3139..a62ac0c0c06a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane, new_state->fb, &layout); if (ret) { DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret); + if (pstate->aspace) + msm_framebuffer_cleanup(new_state->fb, pstate->aspace, + pstate->needs_dirtyfb); return ret; } From 2db13c4a631505029ada9404e09a2b06a268c1c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:55 +0300 Subject: [PATCH 0425/1523] drm/msm/dpu: limit QCM2290 to RGB formats only The QCM2290 doesn't have CSC blocks, so it can not support YUV formats even on ViG blocks. Fix the formats declared by _VIG_SBLK_NOSCALE(). Fixes: 5334087ee743 ("drm/msm: add support for QCM2290 MDSS") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/601048/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-1-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index fc178ec73907..648c8d0a4c36 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = { { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } From cb18195914e353ece0e789e365a5a16872169805 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:56 +0300 Subject: [PATCH 0426/1523] drm/msm/dpu: relax YUV requirements YUV formats require only CSC to be enabled. Even decimated formats should not require scaler. Relax the requirement and don't check for the scaler block while checking if YUV format can be enabled. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/601049/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-2-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index a62ac0c0c06a..dc1fd95e767b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -747,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; if (MSM_FORMAT_IS_YUV(fmt) && - (!pipe->sspp->cap->sblk->scaler_blk.len || - !pipe->sspp->cap->sblk->csc_blk.len)) { + !pipe->sspp->cap->sblk->csc_blk.len) { DPU_DEBUG_PLANE(pdpu, - "plane doesn't have scaler/csc for yuv\n"); + "plane doesn't have csc for yuv\n"); return -EINVAL; } From d3a785e4f983f523380e023d8a05fb6d04402957 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:57 +0300 Subject: [PATCH 0427/1523] drm/msm/dpu: take plane rotation into account for wide planes Take into account the plane rotation and flipping when calculating src positions for the wide plane parts. This is not an issue yet, because rotation is only supported for the UBWC planes and wide UBWC planes are rejected anyway because in parallel multirect case only the half of the usual width is supported for tiled formats. However it's better to fix this now rather than stumbling upon it later. Fixes: 80e8ae3b38ab ("drm/msm/dpu: add support for wide planes") Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/601059/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-3-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index dc1fd95e767b..29298e066163 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -866,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; + drm_rect_rotate(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* @@ -915,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } + drm_rect_rotate_inv(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if (r_pipe->sspp) + drm_rect_rotate_inv(&r_pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; From 8d5309b7f67571bc0c95d430f4722a99d38f3b79 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 5 Aug 2024 13:02:33 -0700 Subject: [PATCH 0428/1523] drm/xe: Only check last fence on user binds We only set the last fence on user binds, so no need to check last fence kernel issued binds. Will avoid blowing up last fence lockdep asserts. Cc: Francois Dugast Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240805200233.3050325-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 97a6a0b0b8ba..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1149,10 +1149,12 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { + if (job) + err = xe_sched_job_last_fence_add_dep(job, vm); + else + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + } for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); From 843f10ce6539cd5e4989415e18cb809f4cff8b91 Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Wed, 31 Jul 2024 21:26:14 +0530 Subject: [PATCH 0429/1523] drm/i915/gt: Add Wa_14019789679 Wa_14019789679 implementation for MTL, ARL and DG2. v2: Corrected condition v3: - Fix indentation (Jani Nikula) - dword size should be 0x1 and initialize dword to 0 instead of MI_NOOP (Tejas) - Use IS_GFX_GT_IP_RANGE() (Tejas) v4: - 3DSTATE_MESH_CONTROL instruction is 3 dwords long Align with dword size. (Roper, Matthew D) - Add RCS engine check. (Tejas) Bspec: 47083 Signed-off-by: Nitin Gote Reviewed-by: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240731155614.3460645-1-nitin.r.gote@intel.com --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2bd8d98d2110..5394bc7d4daf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -220,6 +220,7 @@ #define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3)) #define XY_CTRL_SURF_INSTR_SIZE 5 #define MI_FLUSH_DW_SIZE 3 diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 09a287c1aedd..bfe6d8fc820f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -974,7 +974,12 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) if (ret) return ret; - cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) + cs = intel_ring_begin(rq, (wal->count * 2 + 6)); + else + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1004,6 +1009,15 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } *cs++ = MI_NOOP; + /* Wa_14019789679 */ + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) { + *cs++ = CMD_3DSTATE_MESH_CONTROL; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + } + intel_uncore_forcewake_put__locked(uncore, fw); spin_unlock(&uncore->lock); intel_gt_mcr_unlock(wal->gt, flags); From f91f7ac900e7342e0fd66093dfbf7cb8cb585a99 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 17 Jul 2024 15:00:23 +0200 Subject: [PATCH 0430/1523] refcount: Report UAF for refcount_sub_and_test(0) when counter==0 When a reference counter is at zero and refcount_sub_and_test() is invoked to subtract zero, the function accepts this request without any warning and returns true. This behavior does not seem ideal because the counter being already at zero indicates a use-after-free. Furthermore, returning true by refcount_sub_and_test() in this case potentially results in a double-free done by its caller. Modify the underlying function __refcount_sub_and_test() to warn about this case as a use-after-free and have it return false to avoid the potential double-free. Signed-off-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240717130023.5675-1-petr.pavlu@suse.com Signed-off-by: Kees Cook --- drivers/misc/lkdtm/refcount.c | 16 ++++++++++++++++ include/linux/refcount.h | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 5cd488f54cfa..8f744bee6fbd 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { @@ -400,6 +415,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(REFCOUNT_DEC_NEGATIVE), CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_ZERO), CRASHTYPE(REFCOUNT_INC_ZERO), CRASHTYPE(REFCOUNT_ADD_ZERO), CRASHTYPE(REFCOUNT_INC_SATURATED), diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d..35f039ecb272 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; From f32e90c0688a3d1f8079ac18ed39b752d22e92bd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 23 Jul 2024 18:53:31 +0200 Subject: [PATCH 0431/1523] gcc-plugins: randstruct: Remove GCC 4.7 or newer requirement Since the kernel currently requires GCC 5.1 as a minimum, remove the unnecessary GCC version >= 4.7 check. Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20240723165332.1947-1-thorsten.blum@toblux.com Signed-off-by: Kees Cook --- scripts/gcc-plugins/randomize_layout_plugin.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 746ff2d272f2..5694df3da2e9 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -19,10 +19,6 @@ #include "gcc-common.h" #include "randomize_layout_seed.h" -#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7) -#error "The RANDSTRUCT plugin requires GCC 4.7 or newer." -#endif - #define ORIG_TYPE_NAME(node) \ (TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous") From fceff12e52985e49c464f402e11b2f97bce3cc24 Mon Sep 17 00:00:00 2001 From: Krzysztof Niemiec Date: Thu, 1 Aug 2024 17:40:48 +0200 Subject: [PATCH 0432/1523] drm/i915/gt: Empty uabi engines list during intel_engines_release() While the uabi_engines_llist is populated in intel_engines_init() during driver load, the corresponding function intel_engines_release() does not correctly get rid of it. This can lead to a UAF if, after failed initialization (for example when gt is set wedged on init), we try to access the engines. Suggested-by: Chris Wilson Signed-off-by: Krzysztof Niemiec Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240801154047.115176-2-krzysztof.niemiec@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3b740ca25000..4d30a86016f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -693,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt) memset(&engine->reset, 0, sizeof(engine->reset)); } + + llist_del_all(>->i915->uabi_engines_llist); } void intel_engine_free_request_pool(struct intel_engine_cs *engine) From 9a2fa1472083580b6c66bdaf291f591e1170123a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Aug 2024 18:02:00 -0400 Subject: [PATCH 0433/1523] fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE copy_fd_bitmaps(new, old, count) is expected to copy the first count/BITS_PER_LONG bits from old->full_fds_bits[] and fill the rest with zeroes. What it does is copying enough words (BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest. That works fine, *if* all bits past the cutoff point are clear. Otherwise we are risking garbage from the last word we'd copied. For most of the callers that is true - expand_fdtable() has count equal to old->max_fds, so there's no open descriptors past count, let alone fully occupied words in ->open_fds[], which is what bits in ->full_fds_bits[] correspond to. The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds), which is the smallest multiple of BITS_PER_LONG that covers all opened descriptors below max_fds. In the common case (copying on fork()) max_fds is ~0U, so all opened descriptors will be below it and we are fine, by the same reasons why the call in expand_fdtable() is safe. Unfortunately, there is a case where max_fds is less than that and where we might, indeed, end up with junk in ->full_fds_bits[] - close_range(from, to, CLOSE_RANGE_UNSHARE) with * descriptor table being currently shared * 'to' being above the current capacity of descriptor table * 'from' being just under some chunk of opened descriptors. In that case we end up with observably wrong behaviour - e.g. spawn a child with CLONE_FILES, get all descriptors in range 0..127 open, then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending up with descriptor #128, despite #64 being observably not open. The minimally invasive fix would be to deal with that in dup_fd(). If this proves to add measurable overhead, we can go that way, but let's try to fix copy_fd_bitmaps() first. * new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size). * make copy_fd_bitmaps() take the bitmap size in words, rather than bits; it's 'count' argument is always a multiple of BITS_PER_LONG, so we are not losing any information, and that way we can use the same helper for all three bitmaps - compiler will see that count is a multiple of BITS_PER_LONG for the large ones, so it'll generate plain memcpy()+memset(). Reproducer added to tools/testing/selftests/core/close_range_test.c Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/file.c | 28 +++++++-------- include/linux/bitmap.h | 12 +++++++ .../testing/selftests/core/close_range_test.c | 35 +++++++++++++++++++ 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/fs/file.c b/fs/file.c index a11e59b5d602..655338effe9c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu) #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) +#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ -static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, - unsigned int count) +static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, + unsigned int copy_words) { - unsigned int cpy, set; + unsigned int nwords = fdt_words(nfdt); - cpy = count / BITS_PER_BYTE; - set = (nfdt->max_fds - count) / BITS_PER_BYTE; - memcpy(nfdt->open_fds, ofdt->open_fds, cpy); - memset((char *)nfdt->open_fds + cpy, 0, set); - memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); - memset((char *)nfdt->close_on_exec + cpy, 0, set); - - cpy = BITBIT_SIZE(count); - set = BITBIT_SIZE(nfdt->max_fds) - cpy; - memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); - memset((char *)nfdt->full_fds_bits + cpy, 0, set); + bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits, + copy_words, nwords); } /* @@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); + copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt)); } /* @@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int open_files = sane_fdtable_size(old_fdt, max_fds); } - copy_fd_bitmaps(new_fdt, old_fdt, open_files); + copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG); old_fds = old_fdt->fd; new_fds = new_fdt->fd; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01..d3b66d77df7a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } +static inline void bitmap_copy_and_extend(unsigned long *to, + const unsigned long *from, + unsigned int count, unsigned int size) +{ + unsigned int copy = BITS_TO_LONGS(count); + + memcpy(to, from, copy * sizeof(long)); + if (count % BITS_PER_LONG) + to[copy - 1] &= BITMAP_LAST_WORD_MASK(count); + memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long)); +} + /* * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64 * machines the order of hi and lo parts of numbers match the bitmap structure. diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 991c473e3859..12b4eb9d0434 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot) EXPECT_EQ(close(fd3), 0); } +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + TEST_HARNESS_MAIN From 92c4ee25208d0f35dafc3213cdf355fbe449e078 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 2 Aug 2024 11:07:30 +0300 Subject: [PATCH 0434/1523] net: bridge: mcast: wait for previous gc cycles when removing port syzbot hit a use-after-free[1] which is caused because the bridge doesn't make sure that all previous garbage has been collected when removing a port. What happens is: CPU 1 CPU 2 start gc cycle remove port acquire gc lock first wait for lock call br_multicasg_gc() directly acquire lock now but free port the port can be freed while grp timers still running Make sure all previous gc cycles have finished by using flush_work before freeing the port. [1] BUG: KASAN: slab-use-after-free in br_multicast_port_group_expired+0x4c0/0x550 net/bridge/br_multicast.c:861 Read of size 8 at addr ffff888071d6d000 by task syz.5.1232/9699 CPU: 1 PID: 9699 Comm: syz.5.1232 Not tainted 6.10.0-rc5-syzkaller-00021-g24ca36a562d6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 br_multicast_port_group_expired+0x4c0/0x550 net/bridge/br_multicast.c:861 call_timer_fn+0x1a3/0x610 kernel/time/timer.c:1792 expire_timers kernel/time/timer.c:1843 [inline] __run_timers+0x74b/0xaf0 kernel/time/timer.c:2417 __run_timer_base kernel/time/timer.c:2428 [inline] __run_timer_base kernel/time/timer.c:2421 [inline] run_timer_base+0x111/0x190 kernel/time/timer.c:2437 Reported-by: syzbot+263426984509be19c9a0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=263426984509be19c9a0 Fixes: e12cec65b554 ("net: bridge: mcast: destroy all entries via gc") Signed-off-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20240802080730.3206303-1-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9a1cb5079a7a..b2ae0d2434d2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); - hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); - br_multicast_gc(&deleted_head); + flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } From 44732f1dad20457d64c525549cd63dcef2563c23 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Fri, 19 Jul 2024 17:30:16 +0300 Subject: [PATCH 0435/1523] workqueue: doc: Fix function name, remove markers - s/alloc_ordered_queue()/alloc_ordered_workqueue()/ - remove markers to convert it into a link. Signed-off-by: Nikita Shubin Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index bcc370c876be..16f861c9791e 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -260,7 +260,7 @@ Some users depend on strict execution ordering where only one work item is in flight at any given time and the work items are processed in queueing order. While the combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to achieve this behavior, this is no longer the -case. Use ``alloc_ordered_queue()`` instead. +case. Use alloc_ordered_workqueue() instead. Example Execution Scenarios From 38f7e14519d39cf524ddc02d4caee9b337dad703 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Jul 2024 12:44:31 +0100 Subject: [PATCH 0436/1523] workqueue: Fix UBSAN 'subtraction overflow' error in shift_and_mask() UBSAN reports the following 'subtraction overflow' error when booting in a virtual machine on Android: | Internal error: UBSAN: integer subtraction overflow: 00000000f2005515 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.10.0-00006-g3cbe9e5abd46-dirty #4 | Hardware name: linux,dummy-virt (DT) | pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cancel_delayed_work+0x34/0x44 | lr : cancel_delayed_work+0x2c/0x44 | sp : ffff80008002ba60 | x29: ffff80008002ba60 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: ffff1f65014cd3c0 | x20: ffffc0e84c9d0da0 x19: ffffc0e84cab3558 x18: ffff800080009058 | x17: 00000000247ee1f8 x16: 00000000247ee1f8 x15: 00000000bdcb279d | x14: 0000000000000001 x13: 0000000000000075 x12: 00000a0000000000 | x11: ffff1f6501499018 x10: 00984901651fffff x9 : ffff5e7cc35af000 | x8 : 0000000000000001 x7 : 3d4d455453595342 x6 : 000000004e514553 | x5 : ffff1f6501499265 x4 : ffff1f650ff60b10 x3 : 0000000000000620 | x2 : ffff80008002ba78 x1 : 0000000000000000 x0 : 0000000000000000 | Call trace: | cancel_delayed_work+0x34/0x44 | deferred_probe_extend_timeout+0x20/0x70 | driver_register+0xa8/0x110 | __platform_driver_register+0x28/0x3c | syscon_init+0x24/0x38 | do_one_initcall+0xe4/0x338 | do_initcall_level+0xac/0x178 | do_initcalls+0x5c/0xa0 | do_basic_setup+0x20/0x30 | kernel_init_freeable+0x8c/0xf8 | kernel_init+0x28/0x1b4 | ret_from_fork+0x10/0x20 | Code: f9000fbf 97fffa2f 39400268 37100048 (d42aa2a0) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: UBSAN: integer subtraction overflow: Fatal exception This is due to shift_and_mask() using a signed immediate to construct the mask and being called with a shift of 31 (WORK_OFFQ_POOL_SHIFT) so that it ends up decrementing from INT_MIN. Use an unsigned constant '1U' to generate the mask in shift_and_mask(). Cc: Tejun Heo Cc: Lai Jiangshan Fixes: 1211f3b21c2a ("workqueue: Preserve OFFQ bits in cancel[_sync] paths") Signed-off-by: Will Deacon Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede..b35f8ce80bc7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { - return (v >> shift) & ((1 << bits) - 1); + return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) From 98cc1730c89467fc26e2dc2ceb2a014f332daa97 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 25 Jul 2024 09:04:37 +0800 Subject: [PATCH 0437/1523] workqueue: Remove incorrect "WARN_ON_ONCE(!list_empty(&worker->entry));" from dying worker The commit 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") changes the procedure of destroying workers; the dying workers are kept in the cull_list in wake_dying_workers() with the pool lock held and removed from the cull_list by the newly added reap_dying_workers() without the pool lock. This can cause a warning if the dying worker is wokenup earlier than reaped as reported by Marc: 2024/07/23 18:01:21 [M83LP63]: [ 157.267727] ------------[ cut here ]------------ 2024/07/23 18:01:21 [M83LP63]: [ 157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267746] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc dm_service_time s390_trng vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel 2024/07/23 18:01:21 [M83LP63]: loop dm_multipath configfs nfnetlink lcs ctcm fsm zfcp scsi_transport_fc ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common scm_block eadm_sch scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt rng_core autofs4 2024/07/23 18:01:21 [M83LP63]: [ 157.267792] CPU: 21 PID: 725 Comm: kworker/dying Not tainted 6.10.0-rc2-00239-g68f83057b913 #95 2024/07/23 18:01:21 [M83LP63]: [ 157.267796] Hardware name: IBM 3906 M04 704 (LPAR) 2024/07/23 18:01:21 [M83LP63]: [ 157.267802] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 2024/07/23 18:01:21 [M83LP63]: [ 157.267797] Krnl PSW : 0704d00180000000 000003d600fcd9fa (worker_thread+0x552/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267806] Krnl GPRS: 6479696e6700776f 000002c901b62780 000003d602493ec8 000002c914954600 2024/07/23 18:01:21 [M83LP63]: [ 157.267809] 0000000000000000 0000000000000008 000002c901a85400 000002c90719e840 2024/07/23 18:01:21 [M83LP63]: [ 157.267811] 000002c90719e880 000002c901a85420 000002c91127adf0 000002c901a85400 2024/07/23 18:01:21 [M83LP63]: [ 157.267813] 000002c914954600 0000000000000000 000003d600fcd772 000003560452bd98 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] Krnl Code: 000003d600fcd9ec: c0e500674262 brasl %r14,000003d601cb5eb0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9f2: a7f4ffc8 brc 15,000003d600fcd982 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] #000003d600fcd9f6: af000000 mc 0,0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] >000003d600fcd9fa: a7f4fec2 brc 15,000003d600fcd77e 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9fe: 0707 bcr 0,%r7 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda00: c00400682e10 brcl 0,000003d601cd3620 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda06: eb7ff0500024 stmg %r7,%r15,80(%r15) 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda0c: b90400ef lgr %r14,%r15 2024/07/23 18:01:21 [M83LP63]: [ 157.267853] Call Trace: 2024/07/23 18:01:21 [M83LP63]: [ 157.267855] [<000003d600fcd9fa>] worker_thread+0x552/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267859] ([<000003d600fcd772>] worker_thread+0x2ca/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267862] [<000003d600fd6c80>] kthread+0x120/0x128 2024/07/23 18:01:21 [M83LP63]: [ 157.267865] [<000003d600f5305c>] __ret_from_fork+0x3c/0x58 2024/07/23 18:01:21 [M83LP63]: [ 157.267868] [<000003d601cc746a>] ret_from_fork+0xa/0x30 2024/07/23 18:01:21 [M83LP63]: [ 157.267873] Last Breaking-Event-Address: 2024/07/23 18:01:21 [M83LP63]: [ 157.267874] [<000003d600fcd778>] worker_thread+0x2d0/0x558 Since the procedure of destroying workers is changed, the WARN_ON_ONCE() becomes incorrect and should be removed. Cc: Marc Hartmayer Link: https://lore.kernel.org/lkml/87le1sjd2e.fsf@linux.ibm.com/ Reported-by: Marc Hartmayer Fixes: 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b35f8ce80bc7..d56bd2277e58 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3351,7 +3351,6 @@ woke_up: set_pf_worker(false); ida_free(&pool->worker_ida, worker->id); - WARN_ON_ONCE(!list_empty(&worker->entry)); return 0; } From 8bc35475ef1a23b0e224f3242eb11c76cab0ea88 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Aug 2024 09:37:25 -1000 Subject: [PATCH 0438/1523] workqueue: Fix spruious data race in __flush_work() When flushing a work item for cancellation, __flush_work() knows that it exclusively owns the work item through its PENDING bit. 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") added a read of @work->data to determine whether to use busy wait for BH work items that are being canceled. While the read is safe when @from_cancel, @work->data was read before testing @from_cancel to simplify code structure: data = *work_data_bits(work); if (from_cancel && !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { While the read data was never used if !@from_cancel, this could trigger KCSAN data race detection spuriously: ================================================================== BUG: KCSAN: data-race in __flush_work / __flush_work write to 0xffff8881223aa3e8 of 8 bytes by task 3998 on cpu 0: instrument_write include/linux/instrumented.h:41 [inline] ___set_bit include/asm-generic/bitops/instrumented-non-atomic.h:28 [inline] insert_wq_barrier kernel/workqueue.c:3790 [inline] start_flush_work kernel/workqueue.c:4142 [inline] __flush_work+0x30b/0x570 kernel/workqueue.c:4178 flush_work kernel/workqueue.c:4229 [inline] ... read to 0xffff8881223aa3e8 of 8 bytes by task 50 on cpu 1: __flush_work+0x42a/0x570 kernel/workqueue.c:4188 flush_work kernel/workqueue.c:4229 [inline] flush_delayed_work+0x66/0x70 kernel/workqueue.c:4251 ... value changed: 0x0000000000400000 -> 0xffff88810006c00d Reorganize the code so that @from_cancel is tested before @work->data is accessed. The only problem is triggering KCSAN detection spuriously. This shouldn't need READ_ONCE() or other access qualifiers. No functional changes. Signed-off-by: Tejun Heo Reported-by: syzbot+b3e4f2f51ed645fd5df2@syzkaller.appspotmail.com Fixes: 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") Link: http://lkml.kernel.org/r/000000000000ae429e061eea2157@google.com Cc: Jens Axboe --- kernel/workqueue.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d56bd2277e58..ef174d8c1f63 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4166,7 +4166,6 @@ already_gone: static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; - unsigned long data; if (WARN_ON(!wq_online)) return false; @@ -4184,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ - data = *work_data_bits(work); - if (from_cancel && - !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { - /* - * On RT, prevent a live lock when %current preempted soft - * interrupt processing or prevents ksoftirqd from running by - * keeping flipping BH. If the BH work item runs on a different - * CPU then this has no effect other than doing the BH - * disable/enable dance for nothing. This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). - */ - while (!try_wait_for_completion(&barr.done)) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); - } else { - cpu_relax(); + if (from_cancel) { + unsigned long data = *work_data_bits(work); + + if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && + (data & WORK_OFFQ_BH)) { + /* + * On RT, prevent a live lock when %current preempted + * soft interrupt processing or prevents ksoftirqd from + * running by keeping flipping BH. If the BH work item + * runs on a different CPU then this has no effect other + * than doing the BH disable/enable dance for nothing. + * This is copied from + * kernel/softirq.c::tasklet_unlock_spin_wait(). + */ + while (!try_wait_for_completion(&barr.done)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } } + goto out_destroy; } - } else { - wait_for_completion(&barr.done); } + wait_for_completion(&barr.done); + +out_destroy: destroy_work_on_stack(&barr.work); return true; } From c4c8f369b6a6d21ce27286de1501137771e01dc3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 5 Aug 2024 09:30:29 +0200 Subject: [PATCH 0439/1523] workqueue: Correct declaration of cpu_pwq in struct workqueue_struct cpu_pwq is used in various percpu functions that expect variable in __percpu address space. Correct the declaration of cpu_pwq to struct pool_workqueue __rcu * __percpu *cpu_pwq to declare the variable as __percpu pointer. The patch also fixes following sparse errors: workqueue.c:380:37: warning: duplicate [noderef] workqueue.c:380:37: error: multiple address spaces given: __rcu & __percpu workqueue.c:2271:15: error: incompatible types in comparison expression (different address spaces): workqueue.c:2271:15: struct pool_workqueue [noderef] __rcu * workqueue.c:2271:15: struct pool_workqueue [noderef] __percpu * and uncovers a couple of exisiting "incorrect type in assignment" warnings (from __rcu address space), which this patch does not address. Found by GCC's named address space checks. There were no changes in the resulting object files. Signed-off-by: Uros Bizjak Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ef174d8c1f63..e7b005ff3750 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -377,7 +377,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ - struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ + struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; From 8fcd8d1e63c05c48b3ac16d0c3e2cd6a7a5c8ec4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Jul 2024 04:23:14 +0900 Subject: [PATCH 0440/1523] kbuild: clean up code duplication in cmd_fdtoverlay When resolving a merge conflict, Linus noticed the fdtoverlay command duplication introduced by commit 49636c5680b9 ("kbuild: verify dtoverlay files against schema"). He suggested a clean-up. I eliminated the duplication and refactored the code a little further. No functional changes are intended, except for the short logs. The log will look as follows: $ make ARCH=arm64 defconfig dtbs_check [ snip ] DTC [C] arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxca.dtb DTC [C] arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dtb DTC [C] arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dtb DTC [C] arch/arm64/boot/dts/freescale/imx95-19x19-evk.dtb DTC arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-imx219.dtbo OVL [C] arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-imx219.dtb The tag [C] indicates that the schema check is executed. Link: https://lore.kernel.org/lkml/CAHk-=wiF3yeWehcvqY-4X7WNb8n4yw_5t0H1CpEpKi7JMjaMfw@mail.gmail.com/#t Requested-by: Linus Torvalds Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/Makefile.lib | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index fe3668dc4954..207325eaf1d1 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -400,26 +400,23 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE $(obj)/%.dtbo.S: $(obj)/%.dtbo FORCE $(call if_changed,wrap_S_dtb) -quiet_cmd_dtc = DTC $@ +quiet_dtb_check_tag = $(if $(dtb-check-enabled),[C], ) +cmd_dtb_check = $(if $(dtb-check-enabled),; $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ || true) + +quiet_cmd_dtc = DTC $(quiet_dtb_check_tag) $@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ $(DTC) -o $@ -b 0 \ $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \ -d $(depfile).dtc.tmp $(dtc-tmp) ; \ - cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) - -DT_CHECK_CMD = $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) + cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) \ + $(cmd_dtb_check) # NOTE: # Do not replace $(filter %.dtb %.dtbo, $^) with $(real-prereqs). When a single # DTB is turned into a multi-blob DTB, $^ will contain header file dependencies # recorded in the .*.cmd file. -ifneq ($(CHECK_DTBS),) -quiet_cmd_fdtoverlay = DTOVLCH $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_fdtoverlay = DTOVL $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) -endif +quiet_cmd_fdtoverlay = OVL $(quiet_dtb_check_tag) $@ + cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) $(cmd_dtb_check) $(multi-dtb-y): FORCE $(call if_changed,fdtoverlay) @@ -430,16 +427,11 @@ DT_CHECKER ?= dt-validate DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),-l $(DT_SCHEMA_FILES),-m) DT_BINDING_DIR := Documentation/devicetree/bindings DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.json - -quiet_cmd_dtb = DTC_CHK $@ - cmd_dtb = $(cmd_dtc) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_dtb = $(quiet_cmd_dtc) - cmd_dtb = $(cmd_dtc) +dtb-check-enabled = $(if $(filter %.dtb, $@),y) endif $(obj)/%.dtb: $(obj)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_dep,dtb) + $(call if_changed_dep,dtc) $(obj)/%.dtbo: $(src)/%.dtso $(DTC) FORCE $(call if_changed_dep,dtc) From 6fc9aacad49e3fbecd270c266850d50c453d52ef Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 4 Aug 2024 14:50:57 +0900 Subject: [PATCH 0441/1523] Makefile: add $(srctree) to dependency of compile_commands.json target When trying to build compile_commands.json for an external module against the kernel built in a separate output directory, the following error is displayed: make[1]: *** No rule to make target 'scripts/clang-tools/gen_compile_commands.py', needed by 'compile_commands.json'. Stop. This is because gen_compile_commands.py was previously looked up using a relative path to $(srctree), but commit b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") stopped defining VPATH for external module builds. Prefixing gen_compile_commands.py with $(srctree) fixes the problem. Fixes: b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") Signed-off-by: Alexandre Courbot Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 44c02a6f60a1..44c33de28e93 100644 --- a/Makefile +++ b/Makefile @@ -1980,7 +1980,7 @@ nsdeps: modules quiet_cmd_gen_compile_commands = GEN $@ cmd_gen_compile_commands = $(PYTHON3) $< -a $(AR) -o $@ $(filter-out $<, $(real-prereqs)) -$(extmod_prefix)compile_commands.json: scripts/clang-tools/gen_compile_commands.py \ +$(extmod_prefix)compile_commands.json: $(srctree)/scripts/clang-tools/gen_compile_commands.py \ $(if $(KBUILD_EXTMOD),, vmlinux.a $(KBUILD_VMLINUX_LIBS)) \ $(if $(CONFIG_MODULES), $(MODORDER)) FORCE $(call if_changed,gen_compile_commands) From e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 13 Jun 2024 15:05:03 +0300 Subject: [PATCH 0442/1523] thunderbolt: Mark XDomain as unplugged when router is removed I noticed that when we do discrete host router NVM upgrade and it gets hot-removed from the PCIe side as a result of NVM firmware authentication, if there is another host connected with enabled paths we hang in tearing them down. This is due to fact that the Thunderbolt networking driver also tries to cleanup the paths and ends up blocking in tb_disconnect_xdomain_paths() waiting for the domain lock. However, at this point we already cleaned the paths in tb_stop() so there is really no need for tb_disconnect_xdomain_paths() to do that anymore. Furthermore it already checks if the XDomain is unplugged and bails out early so take advantage of that and mark the XDomain as unplugged when we remove the parent router. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880..6a2116cbb06f 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } From 33330bcf031818e60a816db0cfd3add9eecc3b28 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 5 Aug 2024 11:22:34 +0200 Subject: [PATCH 0443/1523] scripts: kconfig: merge_config: config files: add a trailing newline When merging files without trailing newlines at the end of the file, two config fragments end up at the same row if file1.config doens't have a trailing newline at the end of the file. file1.config "CONFIG_1=y" file2.config "CONFIG_2=y" ./scripts/kconfig/merge_config.sh -m .config file1.config file2.config This will generate a .config looking like this. cat .config ... CONFIG_1=yCONFIG_2=y" Making sure so we add a newline at the end of every config file that is passed into the script. Signed-off-by: Anders Roxell Signed-off-by: Masahiro Yamada --- scripts/kconfig/merge_config.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 902eb429b9db..0b7952471c18 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -167,6 +167,8 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do sed -i "/$CFG[ =]/d" $MERGE_FILE fi done + # In case the previous file lacks a new line at the end + echo >> $TMP_FILE cat $MERGE_FILE >> $TMP_FILE done From 5a44bb061d04b0306f2aa8add761d86d152b9377 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 1 Aug 2024 14:31:09 +0200 Subject: [PATCH 0444/1523] KVM: s390: fix validity interception issue when gisa is switched off We might run into a SIE validity if gisa has been disabled either via using kernel parameter "kvm.use_gisa=0" or by setting the related sysfs attribute to N (echo N >/sys/module/kvm/parameters/use_gisa). The validity is caused by an invalid value in the SIE control block's gisa designation. That happens because we pass the uninitialized gisa origin to virt_to_phys() before writing it to the gisa designation. To fix this we return 0 in kvm_s390_get_gisa_desc() if the origin is 0. kvm_s390_get_gisa_desc() is used to determine which gisa designation to set in the SIE control block. A value of 0 in the gisa designation disables gisa usage. The issue surfaces in the host kernel with the following kernel message as soon a new kvm guest start is attemted. kvm: unhandled validity intercept 0x1011 WARNING: CPU: 0 PID: 781237 at arch/s390/kvm/intercept.c:101 kvm_handle_sie_intercept+0x42e/0x4d0 [kvm] Modules linked in: vhost_net tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT xt_tcpudp nft_compat x_tables nf_nat_tftp nf_conntrack_tftp vfio_pci_core irqbypass vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc mlx5_ib ib_uverbs ib_core mlx5_core uvdevice s390_trng eadm_sch vfio_ccw zcrypt_cex4 mdev vfio_iommu_type1 vfio sch_fq_codel drm i2c_core loop drm_panel_orientation_quirks configfs nfnetlink lcs ctcm fsm dm_service_time ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common dm_mirror dm_region_hash dm_log zfcp scsi_transport_fc scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt dm_multipath rng_core autofs4 [last unloaded: vfio_pci] CPU: 0 PID: 781237 Comm: CPU 0/KVM Not tainted 6.10.0-08682-gcad9f11498ea #6 Hardware name: IBM 3931 A01 701 (LPAR) Krnl PSW : 0704c00180000000 000003d93deb0122 (kvm_handle_sie_intercept+0x432/0x4d0 [kvm]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 000003d900000027 000003d900000023 0000000000000028 000002cd00000000 000002d063a00900 00000359c6daf708 00000000000bebb5 0000000000001eff 000002cfd82e9000 000002cfd80bc000 0000000000001011 000003d93deda412 000003ff8962df98 000003d93de77ce0 000003d93deb011e 00000359c6daf960 Krnl Code: 000003d93deb0112: c020fffe7259 larl %r2,000003d93de7e5c4 000003d93deb0118: c0e53fa8beac brasl %r14,000003d9bd3c7e70 #000003d93deb011e: af000000 mc 0,0 >000003d93deb0122: a728ffea lhi %r2,-22 000003d93deb0126: a7f4fe24 brc 15,000003d93deafd6e 000003d93deb012a: 9101f0b0 tm 176(%r15),1 000003d93deb012e: a774fe48 brc 7,000003d93deafdbe 000003d93deb0132: 40a0f0ae sth %r10,174(%r15) Call Trace: [<000003d93deb0122>] kvm_handle_sie_intercept+0x432/0x4d0 [kvm] ([<000003d93deb011e>] kvm_handle_sie_intercept+0x42e/0x4d0 [kvm]) [<000003d93deacc10>] vcpu_post_run+0x1d0/0x3b0 [kvm] [<000003d93deaceda>] __vcpu_run+0xea/0x2d0 [kvm] [<000003d93dead9da>] kvm_arch_vcpu_ioctl_run+0x16a/0x430 [kvm] [<000003d93de93ee0>] kvm_vcpu_ioctl+0x190/0x7c0 [kvm] [<000003d9bd728b4e>] vfs_ioctl+0x2e/0x70 [<000003d9bd72a092>] __s390x_sys_ioctl+0xc2/0xd0 [<000003d9be0e9222>] __do_syscall+0x1f2/0x2e0 [<000003d9be0f9a90>] system_call+0x70/0x98 Last Breaking-Event-Address: [<000003d9bd3c7f58>] __warn_printk+0xe8/0xf0 Cc: stable@vger.kernel.org Reported-by: Christian Borntraeger Fixes: fe0ef0030463 ("KVM: s390: sort out physical vs virtual pointers usage") Signed-off-by: Michael Mueller Tested-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240801123109.2782155-1-mimu@linux.ibm.com Message-ID: <20240801123109.2782155-1-mimu@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bf8534218af3..e680c6bf0c9d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); + u32 gd; + + if (!kvm->arch.gisa_int.origin) + return 0; + + gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; From 7e1e206b99f4b3345aeb49d94584a420b7887f1d Mon Sep 17 00:00:00 2001 From: Steven 'Steve' Kendall Date: Tue, 6 Aug 2024 00:08:24 +0000 Subject: [PATCH 0445/1523] ALSA: hda: Add HP MP9 G4 Retail System AMS to force connect list In recent HP UEFI firmware (likely v2.15 and above, tested on 2.27), these pins are incorrectly set for HDMI/DP audio. Tested on HP MP9 G4 Retail System AMS. Tested audio with two monitors connected via DisplayPort. Link: https://forum.manjaro.org/t/intel-cannon-lake-pch-cavs-conexant-cx20632-no-sound-at-hdmi-or-displayport/133494 Link: https://bbs.archlinux.org/viewtopic.php?id=270523 Signed-off-by: Steven 'Steve' Kendall Cc: Link: https://patch.msgid.link/20240806-hdmi-audio-hp-wrongpins-v2-1-d9eb4ad41043@chromium.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 707d203ba652..4e7361d1d518 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1989,6 +1989,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), From b82c1d235a30622177ce10dcb94dfd691a49922f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 5 Aug 2024 22:38:29 +0200 Subject: [PATCH 0446/1523] syscalls: add back legacy __NR_nfsservctl macro The conversion from the old unistd.h file to syscall.tbl dropped the nfsservctl macro. This one was handled inconsistently across architectures in the original introduction of the syscall.tbl format, and I went the other way on this. The syscall was already gone in linux-3.1 before the current users of the generic table (other than openrisc) first appeared, so nobody could actally use it, but putting the number back helps for consistency since there are build scripts that check the presence of all these macros. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2301919 Signed-off-by: Arnd Bergmann --- scripts/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index b93d43561a2c..845e24eb372e 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -53,6 +53,7 @@ 39 common umount2 sys_umount 40 common mount sys_mount 41 common pivot_root sys_pivot_root +42 common nfsservctl sys_ni_syscall 43 32 statfs64 sys_statfs64 compat_sys_statfs64 43 64 statfs sys_statfs 44 32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 From 176fd1511dd9086ab4fa9323cb232177c6235288 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Aug 2024 08:49:16 +0200 Subject: [PATCH 0447/1523] ALSA: hda/hdmi: Yet more pin fix for HP EliteDesk 800 G4 HP EliteDesk 800 G4 (PCI SSID 103c:83e2) is another Kabylake machine where BIOS misses the HDMI pin initializations. Add the quirk entry. Cc: Link: https://patch.msgid.link/20240806064918.11132-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4e7361d1d518..78042ac2b71f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1989,6 +1989,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1), SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), From 3196763851b5fb9f7c8c488e233e947292cd71a6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:50 +0300 Subject: [PATCH 0448/1523] drm/i915: Replace to_bpp_x16() with fxp_q4_from_int() Replace the to_bpp_x16() helper defined by the driver with the equivalent fxp_q4_from_int() helper defined by DRM core. v2: Rebase on the s/drm_x16/fxp_q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 5 +++-- .../gpu/drm/i915/display/intel_display_types.h | 5 ----- drivers/gpu/drm/i915/display/intel_dp.c | 17 +++++++++-------- drivers/gpu/drm/i915/display/intel_dp_mst.c | 14 +++++++------- drivers/gpu/drm/i915/display/intel_fdi.c | 4 +++- drivers/gpu/drm/i915/display/intel_link_bw.c | 4 +++- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 460e83f0d5a5..83fa093ccf8b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "i915_drv.h" #include "i915_reg.h" @@ -3521,8 +3522,8 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, crtc_state->pipe_bpp = bpc * 3; - crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(min(crtc_state->pipe_bpp, - VBT_DSC_MAX_BPP(dsc->max_bpp))); + crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(min(crtc_state->pipe_bpp, + VBT_DSC_MAX_BPP(dsc->max_bpp))); /* * FIXME: This is ugly, and slice count should take DSC engine diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 327a4133030f..3df6a058a352 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2218,11 +2218,6 @@ static inline int to_bpp_int_roundup(int bpp_x16) return (bpp_x16 + 0xf) >> 4; } -static inline int to_bpp_x16(int bpp) -{ - return bpp << 4; -} - /* * Conversion functions/macros from various pointer types to struct * intel_display pointer. diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 09789d62001f..c63fb59fa03c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include "g4x_dp.h" @@ -2022,7 +2023,7 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, timeslots); if (ret == 0) { pipe_config->dsc.compressed_bpp_x16 = - to_bpp_x16(valid_dsc_bpp[i]); + fxp_q4_from_int(valid_dsc_bpp[i]); return 0; } } @@ -2275,7 +2276,7 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); pipe_config->dsc.compressed_bpp_x16 = - to_bpp_x16(max(dsc_min_bpp, dsc_max_bpp)); + fxp_q4_from_int(max(dsc_min_bpp, dsc_max_bpp)); pipe_config->pipe_bpp = pipe_bpp; @@ -2407,15 +2408,15 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, int max_link_bpp_x16; max_link_bpp_x16 = min(crtc_state->max_link_bpp_x16, - to_bpp_x16(limits->pipe.max_bpp)); + fxp_q4_from_int(limits->pipe.max_bpp)); if (!dsc) { - max_link_bpp_x16 = rounddown(max_link_bpp_x16, to_bpp_x16(2 * 3)); + max_link_bpp_x16 = rounddown(max_link_bpp_x16, fxp_q4_from_int(2 * 3)); - if (max_link_bpp_x16 < to_bpp_x16(limits->pipe.min_bpp)) + if (max_link_bpp_x16 < fxp_q4_from_int(limits->pipe.min_bpp)) return false; - limits->link.min_bpp_x16 = to_bpp_x16(limits->pipe.min_bpp); + limits->link.min_bpp_x16 = fxp_q4_from_int(limits->pipe.min_bpp); } else { /* * TODO: set the DSC link limits already here, atm these are @@ -3061,8 +3062,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (pipe_config->dsc.compression_enable) link_bpp_x16 = pipe_config->dsc.compressed_bpp_x16; else - link_bpp_x16 = to_bpp_x16(intel_dp_output_bpp(pipe_config->output_format, - pipe_config->pipe_bpp)); + link_bpp_x16 = fxp_q4_from_int(intel_dp_output_bpp(pipe_config->output_format, + pipe_config->pipe_bpp)); if (intel_dp->mso_link_count) { int n = intel_dp->mso_link_count; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index faee7af0a8a4..c861a4263a72 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -212,8 +212,8 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp); - link_bpp_x16 = to_bpp_x16(dsc ? bpp : - intel_dp_output_bpp(crtc_state->output_format, bpp)); + link_bpp_x16 = fxp_q4_from_int(dsc ? bpp : + intel_dp_output_bpp(crtc_state->output_format, bpp)); local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, false, dsc, link_bpp_x16); @@ -290,7 +290,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, if (!dsc) crtc_state->pipe_bpp = bpp; else - crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(bpp); + crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(bpp); drm_dbg_kms(&i915->drm, "Got %d slots for pipe bpp %d dsc %d\n", slots, bpp, dsc); } @@ -479,10 +479,10 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name); - if (limits->link.max_bpp_x16 < to_bpp_x16(24)) + if (limits->link.max_bpp_x16 < fxp_q4_from_int(24)) return false; - limits->link.min_bpp_x16 = to_bpp_x16(24); + limits->link.min_bpp_x16 = fxp_q4_from_int(24); return true; } @@ -490,9 +490,9 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne drm_WARN_ON(&i915->drm, limits->min_rate != limits->max_rate); if (limits->max_rate < 540000) - min_bpp_x16 = to_bpp_x16(13); + min_bpp_x16 = fxp_q4_from_int(13); else if (limits->max_rate < 810000) - min_bpp_x16 = to_bpp_x16(10); + min_bpp_x16 = fxp_q4_from_int(10); if (limits->link.min_bpp_x16 >= min_bpp_x16) return true; diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index d33befd7994d..d08331805d75 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -5,6 +5,8 @@ #include +#include + #include "i915_reg.h" #include "intel_atomic.h" #include "intel_crtc.h" @@ -340,7 +342,7 @@ int ilk_fdi_compute_config(struct intel_crtc *crtc, pipe_config->fdi_lanes = lane; - intel_link_compute_m_n(to_bpp_x16(pipe_config->pipe_bpp), + intel_link_compute_m_n(fxp_q4_from_int(pipe_config->pipe_bpp), lane, fdi_dotclock, link_bw, intel_dp_bw_fec_overhead(false), diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index dfd7d5e23f3f..5db0724b6520 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include + #include "i915_drv.h" #include "intel_atomic.h" @@ -93,7 +95,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, * is based on the pipe bpp value, set the actual link bpp * limit here once the MST BW allocation is fixed. */ - link_bpp_x16 = to_bpp_x16(crtc_state->pipe_bpp); + link_bpp_x16 = fxp_q4_from_int(crtc_state->pipe_bpp); if (link_bpp_x16 > max_bpp_x16) { max_bpp_x16 = link_bpp_x16; From 8466a14173e5ff7c2a52d8700a13f4b841d0e17c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:51 +0300 Subject: [PATCH 0449/1523] drm/i915: Replace to_bpp_int() with fxp_q4_to_int() Replace the to_bpp_int() helper defined by the driver with the equivalent fxp_q4_to_int() helper defined by DRM core. v2: Rebase on the s/drm_x16/fxp_q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-3-imre.deak@intel.com --- drivers/gpu/drm/i915/display/icl_dsi.c | 9 +++++---- drivers/gpu/drm/i915/display/intel_display.c | 3 ++- drivers/gpu/drm/i915/display/intel_display_types.h | 7 +------ drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++---- drivers/gpu/drm/i915/display/intel_dp_mst.c | 6 +++--- drivers/gpu/drm/i915/display/intel_fdi.c | 2 +- drivers/gpu/drm/i915/display/intel_vdsc.c | 5 +++-- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ae8f6617aa70..55dd57d1bf94 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -27,6 +27,7 @@ #include #include +#include #include #include "i915_reg.h" @@ -330,7 +331,7 @@ static int afe_clk(struct intel_encoder *encoder, int bpp; if (crtc_state->dsc.compression_enable) - bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + bpp = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -863,7 +864,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * compressed and non-compressed bpp. */ if (crtc_state->dsc.compression_enable) { - mul = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + mul = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); } @@ -887,7 +888,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, int bpp, line_time_us, byte_clk_period_ns; if (crtc_state->dsc.compression_enable) - bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + bpp = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -1470,7 +1471,7 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder, &pipe_config->hw.adjusted_mode; if (pipe_config->dsc.compressed_bpp_x16) { - int div = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); + int div = fxp_q4_to_int(pipe_config->dsc.compressed_bpp_x16); int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); adjusted_mode->crtc_htotal = diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 01a5faa3fea5..8172e8e70005 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -4679,7 +4680,7 @@ intel_modeset_pipe_config(struct intel_atomic_state *state, crtc_state->fec_enable = limits->force_fec_pipes & BIT(crtc->pipe); crtc_state->max_link_bpp_x16 = limits->max_bpp_x16[crtc->pipe]; - if (crtc_state->pipe_bpp > to_bpp_int(crtc_state->max_link_bpp_x16)) { + if (crtc_state->pipe_bpp > fxp_q4_to_int(crtc_state->max_link_bpp_x16)) { drm_dbg_kms(&i915->drm, "[CRTC:%d:%s] Link bpp limited to " BPP_X16_FMT "\n", crtc->base.base.id, crtc->base.name, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 3df6a058a352..5f1f3e1a9e08 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2200,18 +2200,13 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL; } -static inline int to_bpp_int(int bpp_x16) -{ - return bpp_x16 >> 4; -} - static inline int to_bpp_frac(int bpp_x16) { return bpp_x16 & 0xf; } #define BPP_X16_FMT "%d.%04d" -#define BPP_X16_ARGS(bpp_x16) to_bpp_int(bpp_x16), (to_bpp_frac(bpp_x16) * 625) +#define BPP_X16_ARGS(bpp_x16) fxp_q4_to_int(bpp_x16), (to_bpp_frac(bpp_x16) * 625) static inline int to_bpp_int_roundup(int bpp_x16) { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c63fb59fa03c..7355b1fd7329 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1694,8 +1694,8 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, int bpp, i, lane_count, clock = intel_dp_mode_clock(pipe_config, conn_state); int mode_rate, link_rate, link_avail; - for (bpp = to_bpp_int(limits->link.max_bpp_x16); - bpp >= to_bpp_int(limits->link.min_bpp_x16); + for (bpp = fxp_q4_to_int(limits->link.max_bpp_x16); + bpp >= fxp_q4_to_int(limits->link.min_bpp_x16); bpp -= 2 * 3) { int link_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp); @@ -2113,7 +2113,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, adjusted_mode->hdisplay, pipe_config->joiner_pipes); dsc_max_bpp = min(dsc_max_bpp, dsc_joiner_max_bpp); - dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); + dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); if (DISPLAY_VER(i915) >= 13) return xelpd_dsc_compute_link_config(intel_dp, connector, pipe_config, limits, @@ -2270,7 +2270,7 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, pipe_config, pipe_bpp / 3); dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; - dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); + dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index c861a4263a72..a4fbcae3139f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -309,8 +309,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, * YUV420 is only half of the pipe bpp value. */ slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, - to_bpp_int(limits->link.max_bpp_x16), - to_bpp_int(limits->link.min_bpp_x16), + fxp_q4_to_int(limits->link.max_bpp_x16), + fxp_q4_to_int(limits->link.min_bpp_x16), limits, conn_state, 2 * 3, false); @@ -375,7 +375,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, crtc_state, max_bpp / 3); max_compressed_bpp = min(max_compressed_bpp, - to_bpp_int(limits->link.max_bpp_x16)); + fxp_q4_to_int(limits->link.max_bpp_x16)); min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); min_compressed_bpp = max(min_compressed_bpp, diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index d08331805d75..222cd0e1a2bc 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -306,7 +306,7 @@ int intel_fdi_link_freq(struct drm_i915_private *i915, bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state) { int pipe_bpp = min(crtc_state->pipe_bpp, - to_bpp_int(crtc_state->max_link_bpp_x16)); + fxp_q4_to_int(crtc_state->max_link_bpp_x16)); pipe_bpp = rounddown(pipe_bpp, 2 * 3); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index b9687b7692b8..99902fbfeec4 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -8,6 +8,7 @@ #include #include +#include #include "i915_drv.h" #include "intel_crtc.h" @@ -76,7 +77,7 @@ intel_vdsc_set_min_max_qp(struct drm_dsc_config *vdsc_cfg, int buf, static void calculate_rc_params(struct drm_dsc_config *vdsc_cfg) { - int bpp = to_bpp_int(vdsc_cfg->bits_per_pixel); + int bpp = fxp_q4_to_int(vdsc_cfg->bits_per_pixel); int bpc = vdsc_cfg->bits_per_component; int qp_bpc_modifier = (bpc - 8) * 2; int uncompressed_bpg_rate; @@ -263,7 +264,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; - u16 compressed_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); + u16 compressed_bpp = fxp_q4_to_int(pipe_config->dsc.compressed_bpp_x16); int err; int ret; From ce9b1466f5a9a1b2acc80d29f12d69559fa97174 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:52 +0300 Subject: [PATCH 0450/1523] drm/i915: Replace to_bpp_int_roundup() with fxp_q4_to_int_roundup() Replace the to_bpp_int_roundup() helper defined by the driver with the equivalent fxp_q4_to_int_roundup() helper defined by DRM core. v2: Rebase on s/drm_x16/fxp_q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-4-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 5 ++++- drivers/gpu/drm/i915/display/intel_display_types.h | 5 ----- drivers/gpu/drm/i915/display/intel_dp.c | 6 +++--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 16d5550f7e5e..aa3ba66c5307 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -23,7 +23,10 @@ #include +#include + #include "soc/intel_dram.h" + #include "hsw_ips.h" #include "i915_reg.h" #include "intel_atomic.h" @@ -2750,7 +2753,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) */ int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24; int min_cdclk_bj = - (to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) * + (fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) * pixel_clock) / (2 * bigjoiner_interface_bits); min_cdclk = max(min_cdclk, min_cdclk_bj); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 5f1f3e1a9e08..0f57065ea226 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2208,11 +2208,6 @@ static inline int to_bpp_frac(int bpp_x16) #define BPP_X16_FMT "%d.%04d" #define BPP_X16_ARGS(bpp_x16) fxp_q4_to_int(bpp_x16), (to_bpp_frac(bpp_x16) * 625) -static inline int to_bpp_int_roundup(int bpp_x16) -{ - return (bpp_x16 + 0xf) >> 4; -} - /* * Conversion functions/macros from various pointer types to struct * intel_display pointer. diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 7355b1fd7329..d132b8d5aaa9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2101,7 +2101,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, dsc_src_min_bpp = dsc_src_min_compressed_bpp(); dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, @@ -2263,7 +2263,7 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, dsc_src_min_bpp = dsc_src_min_compressed_bpp(); dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, @@ -2489,7 +2489,7 @@ int intel_dp_config_required_rate(const struct intel_crtc_state *crtc_state) const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int bpp = crtc_state->dsc.compression_enable ? - to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) : + fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) : crtc_state->pipe_bpp; return intel_dp_link_required(adjusted_mode->crtc_clock, bpp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a4fbcae3139f..8c5f783abdc2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -379,7 +379,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); min_compressed_bpp = max(min_compressed_bpp, - to_bpp_int_roundup(limits->link.min_bpp_x16)); + fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); drm_dbg_kms(&i915->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n", min_compressed_bpp, max_compressed_bpp); From e60244554ca98e87ef731739f608eac54e478e9a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:53 +0300 Subject: [PATCH 0451/1523] drm/i915: Replace to_bpp_frac() with fxp_q4_to_frac() Replace the to_bpp_frac() helper defined by the driver with the equivalent fxp_q4_to_frac() helper defined by DRM core. v2: Rebase on the s/drm_x16/fxp_q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 7 +------ drivers/gpu/drm/i915/display/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/display/intel_vdsc.c | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 0f57065ea226..423e7ce187f6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2200,13 +2200,8 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL; } -static inline int to_bpp_frac(int bpp_x16) -{ - return bpp_x16 & 0xf; -} - #define BPP_X16_FMT "%d.%04d" -#define BPP_X16_ARGS(bpp_x16) fxp_q4_to_int(bpp_x16), (to_bpp_frac(bpp_x16) * 625) +#define BPP_X16_ARGS(bpp_x16) fxp_q4_to_int(bpp_x16), (fxp_q4_to_frac(bpp_x16) * 625) /* * Conversion functions/macros from various pointer types to struct diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d132b8d5aaa9..3ed2b74d5eb5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2066,7 +2066,7 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, compressed_bppx16 >= dsc_min_bpp; compressed_bppx16 -= bppx16_step) { if (intel_dp->force_dsc_fractional_bpp_en && - !to_bpp_frac(compressed_bppx16)) + !fxp_q4_to_frac(compressed_bppx16)) continue; ret = dsc_compute_link_config(intel_dp, pipe_config, @@ -2076,7 +2076,7 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, if (ret == 0) { pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16; if (intel_dp->force_dsc_fractional_bpp_en && - to_bpp_frac(compressed_bppx16)) + fxp_q4_to_frac(compressed_bppx16)) drm_dbg_kms(&i915->drm, "Forcing DSC fractional bpp\n"); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 99902fbfeec4..ff717dc1a2ce 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -185,7 +185,7 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg) } } else { /* fractional bpp part * 10000 (for precision up to 4 decimal places) */ - int fractional_bits = to_bpp_frac(vdsc_cfg->bits_per_pixel); + int fractional_bits = fxp_q4_to_frac(vdsc_cfg->bits_per_pixel); static const s8 ofs_und6[] = { 0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12 From 2796b7ceec95bd5a7069cd27af7e4cf01a692db4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:54 +0300 Subject: [PATCH 0452/1523] drm/i915: Replace BPP_X16_FMT()/ARGS() with FXP_Q4_FMT()/ARGS() Replace the BPP_X16_FMT()/ARGS() helpers defined by the driver with the equivalent FXP_Q4_FMT()/ARGS() helpers defined by DRM core. v2: Rebase on the s/DRM_X16/FXP_Q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-6-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 5 +++-- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- .../gpu/drm/i915/display/intel_display_types.h | 3 --- drivers/gpu/drm/i915/display/intel_dp.c | 16 ++++++++-------- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++-- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index b9bafec06fb8..9b8508a503f7 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -26,6 +26,7 @@ #include #include +#include #include #include "i915_drv.h" @@ -452,8 +453,8 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, lanes = crtc_state->lane_count; drm_dbg_kms(&i915->drm, - "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " BPP_X16_FMT " cdclk = %u\n", - h_active, link_clk, lanes, BPP_X16_ARGS(vdsc_bppx16), cdclk); + "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " FXP_Q4_FMT " cdclk = %u\n", + h_active, link_clk, lanes, FXP_Q4_ARGS(vdsc_bppx16), cdclk); if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) return 0; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8172e8e70005..2755ebbbb9d2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4682,9 +4682,9 @@ intel_modeset_pipe_config(struct intel_atomic_state *state, if (crtc_state->pipe_bpp > fxp_q4_to_int(crtc_state->max_link_bpp_x16)) { drm_dbg_kms(&i915->drm, - "[CRTC:%d:%s] Link bpp limited to " BPP_X16_FMT "\n", + "[CRTC:%d:%s] Link bpp limited to " FXP_Q4_FMT "\n", crtc->base.base.id, crtc->base.name, - BPP_X16_ARGS(crtc_state->max_link_bpp_x16)); + FXP_Q4_ARGS(crtc_state->max_link_bpp_x16)); crtc_state->bw_constrained = true; } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 423e7ce187f6..ea6548ceab2f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2200,9 +2200,6 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL; } -#define BPP_X16_FMT "%d.%04d" -#define BPP_X16_ARGS(bpp_x16) fxp_q4_to_int(bpp_x16), (fxp_q4_to_frac(bpp_x16) * 625) - /* * Conversion functions/macros from various pointer types to struct * intel_display pointer. diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3ed2b74d5eb5..49a37b996530 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2366,17 +2366,17 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, if (ret < 0) { drm_dbg_kms(&dev_priv->drm, "Cannot compute valid DSC parameters for Input Bpp = %d" - "Compressed BPP = " BPP_X16_FMT "\n", + "Compressed BPP = " FXP_Q4_FMT "\n", pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16)); + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16)); return ret; } pipe_config->dsc.compression_enable = true; drm_dbg_kms(&dev_priv->drm, "DP DSC computed with Input Bpp = %d " - "Compressed Bpp = " BPP_X16_FMT " Slice Count = %d\n", + "Compressed Bpp = " FXP_Q4_FMT " Slice Count = %d\n", pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16), + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16), pipe_config->dsc.slice_count); return 0; @@ -2429,7 +2429,7 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, limits->link.max_bpp_x16 = max_link_bpp_x16; drm_dbg_kms(&i915->drm, - "[ENCODER:%d:%s][CRTC:%d:%s] DP link limits: pixel clock %d kHz DSC %s max lanes %d max rate %d max pipe_bpp %d max link_bpp " BPP_X16_FMT "\n", + "[ENCODER:%d:%s][CRTC:%d:%s] DP link limits: pixel clock %d kHz DSC %s max lanes %d max rate %d max pipe_bpp %d max link_bpp " FXP_Q4_FMT "\n", encoder->base.base.id, encoder->base.name, crtc->base.base.id, crtc->base.name, adjusted_mode->crtc_clock, @@ -2437,7 +2437,7 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, limits->max_lane_count, limits->max_rate, limits->pipe.max_bpp, - BPP_X16_ARGS(limits->link.max_bpp_x16)); + FXP_Q4_ARGS(limits->link.max_bpp_x16)); return true; } @@ -2568,10 +2568,10 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, } drm_dbg_kms(&i915->drm, - "DP lane count %d clock %d bpp input %d compressed " BPP_X16_FMT " link rate required %d available %d\n", + "DP lane count %d clock %d bpp input %d compressed " FXP_Q4_FMT " link rate required %d available %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16), + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16), intel_dp_config_required_rate(pipe_config), intel_dp_max_link_data_rate(intel_dp, pipe_config->port_clock, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8c5f783abdc2..45d2230d1801 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -498,10 +498,10 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne return true; drm_dbg_kms(&i915->drm, - "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " BPP_X16_FMT " in DSC mode due to hblank expansion quirk\n", + "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " FXP_Q4_FMT " in DSC mode due to hblank expansion quirk\n", crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name, - BPP_X16_ARGS(min_bpp_x16)); + FXP_Q4_ARGS(min_bpp_x16)); if (limits->link.max_bpp_x16 < min_bpp_x16) return false; From 1de99ff7371bc6ce2187081bf5bfbbb4a7de28d7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:55 +0300 Subject: [PATCH 0453/1523] drm/i915: Dump DSC state to dmesg and debugfs/i915_display_info Dump the DSC state to dmesg during HW readout and state computation as well as the i915_display_info debugfs entry. v2: Rebase on the s/DRM_X16/FXP_Q4 change. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-7-imre.deak@intel.com --- .../drm/i915/display/intel_crtc_state_dump.c | 3 +++ .../drm/i915/display/intel_display_debugfs.c | 4 ++++ drivers/gpu/drm/i915/display/intel_vdsc.c | 20 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_vdsc.h | 4 ++++ 4 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 6df526e189b5..705ec5ad385c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -10,6 +10,7 @@ #include "intel_crtc_state_dump.h" #include "intel_display_types.h" #include "intel_hdmi.h" +#include "intel_vdsc.h" #include "intel_vrr.h" static void intel_dump_crtc_timings(struct drm_printer *p, @@ -369,6 +370,8 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, else if (IS_VALLEYVIEW(i915)) vlv_dump_csc(&p, "wgc csc", &pipe_config->csc); + intel_vdsc_state_dump(&p, 0, pipe_config); + dump_planes: if (!state) return; diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 5cf9b4af9adf..9c5a3f5beda2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -36,6 +36,7 @@ #include "intel_pps.h" #include "intel_psr.h" #include "intel_psr_regs.h" +#include "intel_vdsc.h" #include "intel_wm.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) @@ -551,6 +552,7 @@ static void crtc_updates_add(struct intel_crtc *crtc) static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_encoder *encoder; @@ -581,6 +583,8 @@ static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) crtc_state->joiner_pipes, intel_crtc_is_joiner_secondary(crtc_state) ? "slave" : "master"); + intel_vdsc_state_dump(&p, 1, crtc_state); + for_each_intel_encoder_mask(&dev_priv->drm, encoder, crtc_state->uapi.encoder_mask) intel_encoder_info(m, crtc, encoder); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index ff717dc1a2ce..404ed05371cb 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -990,3 +990,23 @@ void intel_dsc_get_config(struct intel_crtc_state *crtc_state) out: intel_display_power_put(dev_priv, power_domain, wakeref); } + +static void intel_vdsc_dump_state(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state) +{ + drm_printf_indent(p, indent, + "dsc-dss: compressed-bpp:" FXP_Q4_FMT ", slice-count: %d, split: %s\n", + FXP_Q4_ARGS(crtc_state->dsc.compressed_bpp_x16), + crtc_state->dsc.slice_count, + str_yes_no(crtc_state->dsc.dsc_split)); +} + +void intel_vdsc_state_dump(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->dsc.compression_enable) + return; + + intel_vdsc_dump_state(p, indent, crtc_state); + drm_dsc_dump_config(p, indent, &crtc_state->dsc.config); +} diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 2cc41ff08909..290b2e9b3482 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -8,6 +8,8 @@ #include +struct drm_printer; + enum transcoder; struct intel_crtc; struct intel_crtc_state; @@ -27,5 +29,7 @@ void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +void intel_vdsc_state_dump(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VDSC_H__ */ From 34d0472ce1087fbf8e2ba504a4625e4c74286c63 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Aug 2024 18:07:56 +0300 Subject: [PATCH 0454/1523] drm/i915: Remove DSC register dump The Display Engine's DSC register values are deducted from the DSC configuration stored in intel_crtc_state::dsc. The latter one is dumped in a human-readable format, so dumping the register values is redundant, remove it. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240805150802.3568970-8-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_vdsc.c | 24 ++--------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 404ed05371cb..2e849b015e74 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -457,36 +457,30 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) pps_val |= DSC_PPS0_422_ENABLE; if (vdsc_cfg->vbr_enable) pps_val |= DSC_PPS0_VBR_ENABLE; - drm_dbg_kms(&dev_priv->drm, "PPS0 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 0, pps_val); /* PPS 1 */ pps_val = DSC_PPS1_BPP(vdsc_cfg->bits_per_pixel); - drm_dbg_kms(&dev_priv->drm, "PPS1 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 1, pps_val); /* PPS 2 */ pps_val = DSC_PPS2_PIC_HEIGHT(vdsc_cfg->pic_height) | DSC_PPS2_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances); - drm_dbg_kms(&dev_priv->drm, "PPS2 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 2, pps_val); /* PPS 3 */ pps_val = DSC_PPS3_SLICE_HEIGHT(vdsc_cfg->slice_height) | DSC_PPS3_SLICE_WIDTH(vdsc_cfg->slice_width); - drm_dbg_kms(&dev_priv->drm, "PPS3 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 3, pps_val); /* PPS 4 */ pps_val = DSC_PPS4_INITIAL_XMIT_DELAY(vdsc_cfg->initial_xmit_delay) | DSC_PPS4_INITIAL_DEC_DELAY(vdsc_cfg->initial_dec_delay); - drm_dbg_kms(&dev_priv->drm, "PPS4 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 4, pps_val); /* PPS 5 */ pps_val = DSC_PPS5_SCALE_INC_INT(vdsc_cfg->scale_increment_interval) | DSC_PPS5_SCALE_DEC_INT(vdsc_cfg->scale_decrement_interval); - drm_dbg_kms(&dev_priv->drm, "PPS5 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 5, pps_val); /* PPS 6 */ @@ -494,25 +488,21 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) DSC_PPS6_FIRST_LINE_BPG_OFFSET(vdsc_cfg->first_line_bpg_offset) | DSC_PPS6_FLATNESS_MIN_QP(vdsc_cfg->flatness_min_qp) | DSC_PPS6_FLATNESS_MAX_QP(vdsc_cfg->flatness_max_qp); - drm_dbg_kms(&dev_priv->drm, "PPS6 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 6, pps_val); /* PPS 7 */ pps_val = DSC_PPS7_SLICE_BPG_OFFSET(vdsc_cfg->slice_bpg_offset) | DSC_PPS7_NFL_BPG_OFFSET(vdsc_cfg->nfl_bpg_offset); - drm_dbg_kms(&dev_priv->drm, "PPS7 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 7, pps_val); /* PPS 8 */ pps_val = DSC_PPS8_FINAL_OFFSET(vdsc_cfg->final_offset) | DSC_PPS8_INITIAL_OFFSET(vdsc_cfg->initial_offset); - drm_dbg_kms(&dev_priv->drm, "PPS8 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 8, pps_val); /* PPS 9 */ pps_val = DSC_PPS9_RC_MODEL_SIZE(vdsc_cfg->rc_model_size) | DSC_PPS9_RC_EDGE_FACTOR(DSC_RC_EDGE_FACTOR_CONST); - drm_dbg_kms(&dev_priv->drm, "PPS9 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 9, pps_val); /* PPS 10 */ @@ -520,7 +510,6 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) DSC_PPS10_RC_QUANT_INC_LIMIT1(vdsc_cfg->rc_quant_incr_limit1) | DSC_PPS10_RC_TARGET_OFF_HIGH(DSC_RC_TGT_OFFSET_HI_CONST) | DSC_PPS10_RC_TARGET_OFF_LOW(DSC_RC_TGT_OFFSET_LO_CONST); - drm_dbg_kms(&dev_priv->drm, "PPS10 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 10, pps_val); /* PPS 16 */ @@ -529,31 +518,25 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) vdsc_cfg->slice_width) | DSC_PPS16_SLICE_ROW_PER_FRAME(vdsc_cfg->pic_height / vdsc_cfg->slice_height); - drm_dbg_kms(&dev_priv->drm, "PPS16 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 16, pps_val); if (DISPLAY_VER(dev_priv) >= 14) { /* PPS 17 */ pps_val = DSC_PPS17_SL_BPG_OFFSET(vdsc_cfg->second_line_bpg_offset); - drm_dbg_kms(&dev_priv->drm, "PPS17 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 17, pps_val); /* PPS 18 */ pps_val = DSC_PPS18_NSL_BPG_OFFSET(vdsc_cfg->nsl_bpg_offset) | DSC_PPS18_SL_OFFSET_ADJ(vdsc_cfg->second_line_offset_adj); - drm_dbg_kms(&dev_priv->drm, "PPS18 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 18, pps_val); } /* Populate the RC_BUF_THRESH registers */ memset(rc_buf_thresh_dword, 0, sizeof(rc_buf_thresh_dword)); - for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++) { + for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++) rc_buf_thresh_dword[i / 4] |= (u32)(vdsc_cfg->rc_buf_thresh[i] << BITS_PER_BYTE * (i % 4)); - drm_dbg_kms(&dev_priv->drm, "RC_BUF_THRESH_%d = 0x%08x\n", i, - rc_buf_thresh_dword[i / 4]); - } if (!is_pipe_dsc(crtc, cpu_transcoder)) { intel_de_write(dev_priv, DSCA_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); @@ -600,7 +583,7 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) /* Populate the RC_RANGE_PARAMETERS registers */ memset(rc_range_params_dword, 0, sizeof(rc_range_params_dword)); - for (i = 0; i < DSC_NUM_BUF_RANGES; i++) { + for (i = 0; i < DSC_NUM_BUF_RANGES; i++) rc_range_params_dword[i / 2] |= (u32)(((vdsc_cfg->rc_range_params[i].range_bpg_offset << RC_BPG_OFFSET_SHIFT) | @@ -608,9 +591,6 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) RC_MAX_QP_SHIFT) | (vdsc_cfg->rc_range_params[i].range_min_qp << RC_MIN_QP_SHIFT)) << 16 * (i % 2)); - drm_dbg_kms(&dev_priv->drm, "RC_RANGE_PARAM_%d = 0x%08x\n", i, - rc_range_params_dword[i / 2]); - } if (!is_pipe_dsc(crtc, cpu_transcoder)) { intel_de_write(dev_priv, DSCA_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); From 402d336053a5d827c70ec11109e079811e86e0e8 Mon Sep 17 00:00:00 2001 From: Parth Pancholi Date: Tue, 30 Jul 2024 11:37:54 +0200 Subject: [PATCH 0455/1523] arm64: dts: ti: k3-j784s4-main: Correct McASP DMAs Correct the McASP nodes - mcasp3 and mcasp4 with the right DMAs thread IDs as per TISCI documentation [1] for J784s4. This fixes the related McASPs probe failure due to incorrect DMA IDs. Link: http://downloads.ti.com/tisci/esd/latest/5_soc_doc/j784s4/psil_cfg.html#psi-l-source-and-destination-thread-ids/ [1] Fixes: 5095ec4aa1ea ("arm64: dts: ti: k3-j784s4-main: Add McASP nodes") Signed-off-by: Parth Pancholi Reviewed-by: Jayesh Choudhary Link: https://lore.kernel.org/r/20240730093754.1659782-1-parth105105@gmail.com Signed-off-by: Nishanth Menon --- arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi index f170f80f00c1..d4ac1c9872a5 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi @@ -2755,7 +2755,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc500>, <&main_udmap 0x4500>; + dmas = <&main_udmap 0xc403>, <&main_udmap 0x4403>; dma-names = "tx", "rx"; clocks = <&k3_clks 268 0>; clock-names = "fck"; @@ -2773,7 +2773,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc501>, <&main_udmap 0x4501>; + dmas = <&main_udmap 0xc404>, <&main_udmap 0x4404>; dma-names = "tx", "rx"; clocks = <&k3_clks 269 0>; clock-names = "fck"; From 60a2066c50058086510c91f404eb582029650970 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 2 Aug 2024 10:38:49 +0200 Subject: [PATCH 0456/1523] drm/i915/gem: Adjust vma offset for framebuffer mmap offset When mapping a framebuffer object, the virtual memory area (VMA) offset ('vm_pgoff') should be adjusted by the start of the 'vma_node' associated with the object. This ensures that the VMA offset is correctly aligned with the corresponding offset within the GGTT aperture. Increment vm_pgoff by the start of the vma_node with the offset= provided by the user. Suggested-by: Chris Wilson Signed-off-by: Andi Shyti Reviewed-by: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Cc: # v4.9+ [Joonas: Add Cc: stable] Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625..ce10dd259812 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -1084,6 +1084,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* From 97b6784753da06d9d40232328efc5c5367e53417 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 2 Aug 2024 10:38:50 +0200 Subject: [PATCH 0457/1523] drm/i915/gem: Fix Virtual Memory mapping boundaries calculation Calculating the size of the mapped area as the lesser value between the requested size and the actual size does not consider the partial mapping offset. This can cause page fault access. Fix the calculation of the starting and ending addresses, the total size is now deduced from the difference between the end and start addresses. Additionally, the calculations have been rewritten in a clearer and more understandable form. Fixes: c58305af1835 ("drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass") Reported-by: Jann Horn Co-developed-by: Chris Wilson Signed-off-by: Chris Wilson Signed-off-by: Andi Shyti Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Rodrigo Vivi Cc: # v4.9+ Reviewed-by: Jann Horn Reviewed-by: Jonathan Cavitt [Joonas: Add Requires: tag] Requires: 60a2066c5005 ("drm/i915/gem: Adjust vma offset for framebuffer mmap offset") Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 53 +++++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index ce10dd259812..cac6d4184506 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; From 1ac5167b3a90c9820daa64cc65e319b2d958d686 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 2 Aug 2024 10:38:49 +0200 Subject: [PATCH 0458/1523] drm/i915/gem: Adjust vma offset for framebuffer mmap offset When mapping a framebuffer object, the virtual memory area (VMA) offset ('vm_pgoff') should be adjusted by the start of the 'vma_node' associated with the object. This ensures that the VMA offset is correctly aligned with the corresponding offset within the GGTT aperture. Increment vm_pgoff by the start of the vma_node with the offset= provided by the user. Suggested-by: Chris Wilson Signed-off-by: Andi Shyti Reviewed-by: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Cc: # v4.9+ [Joonas: Add Cc: stable] Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-2-andi.shyti@linux.intel.com (cherry picked from commit 60a2066c50058086510c91f404eb582029650970) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625..ce10dd259812 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -1084,6 +1084,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* From 8bdd9ef7e9b1b2a73e394712b72b22055e0e26c3 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 2 Aug 2024 10:38:50 +0200 Subject: [PATCH 0459/1523] drm/i915/gem: Fix Virtual Memory mapping boundaries calculation Calculating the size of the mapped area as the lesser value between the requested size and the actual size does not consider the partial mapping offset. This can cause page fault access. Fix the calculation of the starting and ending addresses, the total size is now deduced from the difference between the end and start addresses. Additionally, the calculations have been rewritten in a clearer and more understandable form. Fixes: c58305af1835 ("drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass") Reported-by: Jann Horn Co-developed-by: Chris Wilson Signed-off-by: Chris Wilson Signed-off-by: Andi Shyti Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Rodrigo Vivi Cc: # v4.9+ Reviewed-by: Jann Horn Reviewed-by: Jonathan Cavitt [Joonas: Add Requires: tag] Requires: 60a2066c5005 ("drm/i915/gem: Adjust vma offset for framebuffer mmap offset") Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-3-andi.shyti@linux.intel.com (cherry picked from commit 97b6784753da06d9d40232328efc5c5367e53417) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 53 +++++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index ce10dd259812..cac6d4184506 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; From 54bf0af90844fbf18f5be3272eda69198dfdb622 Mon Sep 17 00:00:00 2001 From: David Gow Date: Sun, 4 Aug 2024 17:18:47 +0800 Subject: [PATCH 0460/1523] drm/i915: Allow evicting to use the requested placement In commit a78a8da51b36 ("drm/ttm: replace busy placement with flags v6"), the old system of having a separate placement list (for placements which should be used without eviction) and a 'busy' placement list (for placements which should be attempted if eviction is required) was replaced with a new one where placements could be marked 'FALLBACK' (to be attempted if eviction is required) or 'DESIRED' (to be attempted first, but not if eviction is required). i915 had always included the requested placement in the list of 'busy' placements: i.e., the placement could be used either if eviction is required or not. But when the new system was put in place, the requested (first) placement was marked 'DESIRED', so would never be used if eviction became necessary. While a bug in the original commit prevented this flag from working, when this was fixed in 4a0e7b3c ("drm/i915: fix applying placement flag"), it caused long hangs on DG2 systems with small BAR. Don't mark the requested placement DESIRED (or FALLBACK), allowing it to be used in both situations. This matches the old behaviour, and resolves the hangs. Thanks to Justin Brewer for bisecting the issue. Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6") Fixes: 4a0e7b3c3753 ("drm/i915: fix applying placement flag") Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11255 Signed-off-by: David Gow Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240804091851.122186-2-david@davidgow.net --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0f..fb848fd8ba15 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { From 92653f2a572505adaf7f13f695c1907e71a1dc84 Mon Sep 17 00:00:00 2001 From: David Gow Date: Sun, 4 Aug 2024 17:18:48 +0800 Subject: [PATCH 0461/1523] drm/i915: Attempt to get pages without eviction first In commit a78a8da51b36 ("drm/ttm: replace busy placement with flags v6"), __i915_ttm_get_pages was updated to use flags instead of the separate 'busy' placement list. However, the behaviour was subtly changed. Originally, the function would attempt to use the preferred placement without eviction, and give an opportunity to restart the operation before falling back to allowing eviction. This was unintentionally changed, as the preferred placement was not given the TTM_PL_FLAG_DESIRED flag, and so eviction could be triggered in that first pass. This caused thrashing, and a significant performance regression on DG2 systems with small BAR. For example, Minecraft and Team Fortress 2 would drop to single-digit framerates. Restore the original behaviour by marking the initial placement as desired on that first attempt. Also, rework this to use a separate struct ttm_palcement, as the individual placements are marked 'const', so hot-patching the flags is even more dodgy than before. Thanks to Justin Brewer for bisecting this. Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6") Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11255 Signed-off-by: David Gow Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240804091851.122186-3-david@davidgow.net --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fb848fd8ba15..5c72462d1f57 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -778,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -799,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); From ee9a68394b4bea8b9044ec4bfdbaacf45297ecef Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 1 Aug 2024 21:14:04 +0200 Subject: [PATCH 0462/1523] riscv: Re-introduce global icache flush in patch_text_XXX() commit edf2d546bfd6 ("riscv: patch: Flush the icache right after patching to avoid illegal insns") mistakenly removed the global icache flush in patch_text_nosync() and patch_text_set_nosync() functions, so reintroduce them. Fixes: edf2d546bfd6 ("riscv: patch: Flush the icache right after patching to avoid illegal insns") Reported-by: Samuel Holland Closes: https://lore.kernel.org/linux-riscv/a28ddc26-d77a-470a-a33f-88144f717e86@sifive.com/ Signed-off-by: Alexandre Ghiti Reviewed-by: Samuel Holland Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240801191404.55181-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/patch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51f..34ef522f07a8 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } From 9a72570491b524c9dc4c1caa7323b2297c27b0b7 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Wed, 10 Jul 2024 12:58:20 -0400 Subject: [PATCH 0463/1523] drm/amd/display: Enable aux transfer path via dmub for dp tunneling [Why] Aux transfer retries path does not support dp tunneling. [How] Based on ddc pin check, aux will be issued in legacy path or DMUB. Signed-off-by: Meenakshikumar Somasundaram Reviewed-by: Eric Yang Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index b8996d285f00..bb4ac5042c80 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -735,7 +735,15 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); - ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + + /* Check whether aux to be processed via dmub or dcn directly */ + if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc + || ddc->ddc_pin == NULL) { + ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result); + } else { + ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", From ca0fb243c3bb53dbbd71d16c76f319bf923ee3d4 Mon Sep 17 00:00:00 2001 From: Daniel Sa Date: Fri, 19 Jul 2024 13:39:09 -0400 Subject: [PATCH 0464/1523] drm/amd/display: Underflow Seen on DCN401 eGPU [WHY] In dcn401 we read clock values before FW is loaded. These incorrect values cause the driver to believe that we are running higher clocks than what we actually have. This then causes corruption/underflow for the eGPU. [HOW] When new values are read from HW, update internal structures to propagate the new/correct value. Fixes issue Signed-off-by: Daniel Sa Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 8e1ca709d304..ceaaa8df3641 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -221,6 +221,7 @@ void dcn401_init_hw(struct dc *dc) int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; + int current_dchub_ref_freq = 0; if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -264,6 +265,8 @@ void dcn401_init_hw(struct dc *dc) dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency, &res_pool->ref_clocks.dccg_ref_clock_inKhz); + current_dchub_ref_freq = res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, res_pool->ref_clocks.dccg_ref_clock_inKhz, &res_pool->ref_clocks.dchub_ref_clock_inKhz); @@ -433,8 +436,9 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; - if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { - /* update bounding box if FAMS2 disabled */ + if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) + || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { + /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ if (dc->clk_mgr) dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); From 9330af0af3d54df71b6b752a260dadef05a4fc44 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 16 Jul 2024 14:05:12 -0600 Subject: [PATCH 0465/1523] drm/amd/display: Check UnboundedRequestEnabled's value CalculateSwathAndDETConfiguration_params_st's UnboundedRequestEnabled is a pointer (i.e. dml_bool_t *UnboundedRequestEnabled), and thus p->UnboundedRequestEnabled checks its address, not bool value. To check value, *p->UnboundedRequestEnabled is used instead. This fixes 1 REVERSE_INULL issue reported by Coverity. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 13f2c80bad4c..c54f1af1845c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -3851,7 +3851,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->hw_debug5 = false; for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(p->mrq_present) && (!p->UnboundedRequestEnabled) && (TotalActiveDPP == 1) + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) && p->display_cfg->plane_descriptors[k].surface.dcc.enable && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) From 29d0732f8f61ed028d642034e5323b8cdf6a1905 Mon Sep 17 00:00:00 2001 From: Cruise Date: Mon, 22 Jul 2024 19:15:53 +0800 Subject: [PATCH 0466/1523] drm/amd/display: Get link index for AUX reply notification The link index wasn't updated for the AUX reply notification. Get link index based on DPIA instance for AUX reply notification. Signed-off-by: Cruise Reviewed-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index cd6570a1e20e..fe9f99f1bdf9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -61,6 +61,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification /* For HPD/HPD RX, convert dpia port index into link index */ if (notify->type == DMUB_NOTIFICATION_HPD || notify->type == DMUB_NOTIFICATION_HPD_IRQ || + notify->type == DMUB_NOTIFICATION_AUX_REPLY || notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = From 85ecfdda063b6f148335c354c8b7200a49640510 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 16 Jul 2024 13:24:48 -0600 Subject: [PATCH 0467/1523] drm/amd/display: Re-order enum in a header file Move the lb_memory_config close to the pixel format enums to improve the code readability. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index caaa9ced2ec4..36d10b0f2eed 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -10,22 +10,6 @@ #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -enum lb_memory_config { - /* Enable all 3 pieces of memory */ - LB_MEMORY_CONFIG_0 = 0, - - /* Enable only the first piece of memory */ - LB_MEMORY_CONFIG_1 = 1, - - /* Enable only the second piece of memory */ - LB_MEMORY_CONFIG_2 = 2, - - /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the - * last piece of chroma memory used for the luma storage - */ - LB_MEMORY_CONFIG_3 = 3 -}; - struct spl_size { uint32_t width; uint32_t height; @@ -87,6 +71,22 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_UNKNOWN }; +enum lb_memory_config { + /* Enable all 3 pieces of memory */ + LB_MEMORY_CONFIG_0 = 0, + + /* Enable only the first piece of memory */ + LB_MEMORY_CONFIG_1 = 1, + + /* Enable only the second piece of memory */ + LB_MEMORY_CONFIG_2 = 2, + + /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the + * last piece of chroma memory used for the luma storage + */ + LB_MEMORY_CONFIG_3 = 3 +}; + /* Rotation angle */ enum spl_rotation_angle { SPL_ROTATION_ANGLE_0 = 0, From 5d6a620875a04e70c51d8366eccae74d9cef0308 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 15:32:21 -0600 Subject: [PATCH 0468/1523] drm/amd/display: Setup two pixel per container SPL has a control field for controlling the two pixels per container that is not in use yet. This commit adds a proper initialization for this feature. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_spl_translate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index bcc596724a4f..8f85a1db5eba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,6 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale; // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en; + spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing); // Make spl input basic input info scaling quality field point to plane state scaling_quality populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality); // Translate edge adaptive scaler preference From 6cc213b9aa34bc3213e20f9256345c5cc1495b0b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 4 Dec 2023 16:35:04 -0500 Subject: [PATCH 0469/1523] drm/amd/display: Replace dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd In the commit c2cec7a872b6 ("drm/amd/display: Wake DMCUB before sending a command for replay feature"), replaced dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd in multiple areas, but due to merge issues the replacement of this function in the dmub_replay_copy_settings was missed. This commit replaces the old dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd. Fixes: 3601a35a2e9d ("drm/amd/display: Wake DMCUB before sending a command for replay feature") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 44df9e2351c2..14f935961672 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -187,8 +187,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } From c7b3569b3ebc53e997500be09eb612b6c852525a Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 4 Apr 2024 10:25:21 -0400 Subject: [PATCH 0470/1523] drm/amd/display: Fix Cursor Offset in Scaled Scenarios [WHY] Cursor position code had improper offsets in scaled modes. [HOW] Adjust cursor scaling to account for cursor offsets properly. Reviewed-by: Rodrigo Siqueira Signed-off-by: Sung Lee Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index eb0da6c6b87c..846c183fe3a8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -725,8 +725,8 @@ void hubp401_cursor_set_position( CURSOR_ENABLE, cur_en); REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); + CURSOR_X_POSITION, x_pos, + CURSOR_Y_POSITION, y_pos); REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, pos->x_hotspot, From ab799c16c9d537fa2f070283f1ca63a4425502e9 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 26 Apr 2024 15:24:06 -0400 Subject: [PATCH 0471/1523] drm/amd/display: For FAMS2 don't program P-State force from driver P-State force programming is handled entirely by FW in FAMS2. Remove any programming from driver side to prevent incorrect programming from driver side (which may override FW programming) Signed-off-by: Alvin Lee Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 1 - drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 846c183fe3a8..b1ebf5053b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -990,7 +990,6 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_soft_reset = hubp31_soft_reset, .hubp_set_flip_int = hubp401_set_flip_int, .hubp_in_blank = hubp401_in_blank, - .hubp_update_force_pstate_disallow = hubp32_update_force_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp401_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 2533f16510ba..457f4167e848 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -132,7 +132,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .dccg_init = dcn20_dccg_init, .set_mcm_luts = dcn401_set_mcm_luts, .program_mall_pipe_config = dcn32_program_mall_pipe_config, - .update_force_pstate = dcn32_update_force_pstate, .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, From 675d9ac9d0de765531e94f9fdc536989a997a324 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 16:53:42 -0600 Subject: [PATCH 0472/1523] drm/amd/display: Add missing DET segments programming The commit 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") introduced a code refactor for DCHUB, but during the merge process into amd-staging-drm-next, the program det segments were removed. This commit adds the DET segment programming for DCN35. Fixes: 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index a9dc7cf12dac..899e239352aa 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From 24ffa5bb6d363c8164be6af974e318f5752797e1 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:30:21 -0600 Subject: [PATCH 0473/1523] drm/amd/display: Remove duplicated code DCN_MINIMUM_DISPCLK_Khz and DCN_MINIMUM_DPPCLK_Khz is declared twice. This commit removes that duplication. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index c55d7279fe51..2d06067ff36d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -29,9 +29,6 @@ #include "dc.h" #include "dm_pp_smu.h" -#define DCN_MINIMUM_DISPCLK_Khz 100000 -#define DCN_MINIMUM_DPPCLK_Khz 100000 - /* Constants */ #define DDR4_DRAM_WIDTH 64 #define WM_A 0 From a00a177055cced5cd2bb057a1ace9a95a286bc49 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:33:40 -0600 Subject: [PATCH 0474/1523] drm/amd/display: Add missing mcache registers Add missing register programming for mcache in DCN401. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 106008593464..514d1ce20df9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ From 74bad61c5d83f5af8a855c8b7dc8e20377c74d46 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:40:22 -0600 Subject: [PATCH 0475/1523] drm/amd/display: Add dcc propagation value Initialize the field dcc_meta_propagation_delay_us with 10 ms. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 9fcdf06d6aa4..3e76732ac0dc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, From d91f93c7a7fb9589e62814c1e229943e1259b48c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:43:43 -0600 Subject: [PATCH 0476/1523] drm/amd/display: Add missing registers for dcn32 Add missing debug registers for DCN32. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index fee67fbab8e2..7901792afb7b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -505,6 +505,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \ SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -761,6 +763,7 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) @@ -1185,6 +1188,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_TEST_DEBUG_INDEX), \ + SR(DCHUBBUB_TEST_DEBUG_DATA), \ SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ From 946e2c5be80b2cf93be34e28b3a6bdadc8ca419b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:46:25 -0600 Subject: [PATCH 0477/1523] drm/amd/display: Remove unused code Remove function pointers that were never used. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 28da1dddf0a0..45262cba675e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -245,16 +245,6 @@ struct transform_funcs { void (*set_cursor_attributes)( struct transform *xfm_base, const struct dc_cursor_attributes *attr); - - bool (*transform_program_blnd_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_shaper_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_3dlut)( - struct transform *xfm, - struct tetrahedral_params *params); }; const uint16_t *get_filter_2tap_16p(void); From 3e048c8846a658098d935df83050170c8a8fb104 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 21 Jul 2024 21:46:07 -0400 Subject: [PATCH 0478/1523] drm/amd/display: 3.2.294 This version brings along the following: - SPL improvements. - Address coverity issues. - DML2 fixes. - Code cleanup. - DIO and DCCG refactor. - Improve the PSR state. Signed-off-by: Aric Cyr Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4077c1ddb9c1..250d5d48c2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.293" +#define DC_VER "3.2.294" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1cb62da0802c8f08e26443a5409edba99b8a1f6e Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 25 Jul 2024 19:10:43 -0400 Subject: [PATCH 0479/1523] drm/amdkfd: Fix compile error if HMM support not enabled Fixes the below if kernel config not enable HMM support >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:107:26: error: implicit declaration of function 'svm_range_from_addr' >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:107:24: error: assignment to 'struct svm_range *' from 'int' makes pointer from integer without a cast [-Wint-conversion] >> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:111:28: error: invalid use of undefined type 'struct svm_range' Fixes: b049504e211e ("drm/amdkfd: Validate user queue svm memory residency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407252127.zvnxaKRA-lkp@intel.com/ Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 63795f0cd55a..e0a073ae4a49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -85,6 +85,8 @@ void uninit_queue(struct queue *q) kfree(q); } +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) { struct kfd_process *p = pdd->process; @@ -178,6 +180,18 @@ static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u mutex_unlock(&p->svms.lock); } +#else + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + return -EINVAL; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ +} + +#endif int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size) From f905d0c328b440fabaaf265350bf4187ccd5f59b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Mon, 29 Jul 2024 09:24:20 +0800 Subject: [PATCH 0480/1523] drm/amd/pm: update powerplay structure on smu v14.0.2/3 update powerplay structure on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../amd/pm/swsmu/inc/smu_v14_0_2_pptable.h | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif From 8141f21b941710ecebe49220b69822cab3abd23d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 26 Jul 2024 19:31:55 +0530 Subject: [PATCH 0481/1523] drm/amd/display: Handle null 'stream_status' in 'planes_changed_for_existing_stream' This commit adds a null check for 'stream_status' in the function 'planes_changed_for_existing_stream'. Previously, the code assumed 'stream_status' could be null, but did not handle the case where it was actually null. This could lead to a null pointer dereference. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:3784 planes_changed_for_existing_stream() error: we previously assumed 'stream_status' could be null (see line 3774) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4f5b23520365..1c379a6b1b4c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3773,8 +3773,10 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, } } - if (!stream_status) + if (!stream_status) { ASSERT(0); + return false; + } for (i = 0; i < set_count; i++) if (set[i].stream == stream) From 9710b84e2a6afde2db20cd33435038eb75b91200 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 30 Jul 2024 09:43:11 +0800 Subject: [PATCH 0482/1523] drm/amd/pm: add overdrive support on smu v14.0.2/3 add overdrive support on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 11 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 886 +++++++++++++++++- 2 files changed, 894 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 09973615f210..865e916fc425 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -452,17 +452,26 @@ int smu_v14_0_init_smc_tables(struct smu_context *smu) ret = -ENOMEM; goto err3_out; } + + smu_table->user_overdrive_table = + kzalloc(tables[SMU_TABLE_OVERDRIVE].size, GFP_KERNEL); + if (!smu_table->user_overdrive_table) { + ret = -ENOMEM; + goto err4_out; + } } smu_table->combo_pptable = kzalloc(tables[SMU_TABLE_COMBO_PPTABLE].size, GFP_KERNEL); if (!smu_table->combo_pptable) { ret = -ENOMEM; - goto err4_out; + goto err5_out; } return 0; +err5_out: + kfree(smu_table->user_overdrive_table); err4_out: kfree(smu_table->boot_overdrive_table); err3_out: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..5913f9c60fe0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -68,6 +68,18 @@ #define DEBUGSMC_MSG_Mode1Reset 2 #define LINK_SPEED_MAX 3 +#define PP_OD_FEATURE_GFXCLK_FMIN 0 +#define PP_OD_FEATURE_GFXCLK_FMAX 1 +#define PP_OD_FEATURE_UCLK_FMIN 2 +#define PP_OD_FEATURE_UCLK_FMAX 3 +#define PP_OD_FEATURE_GFX_VF_CURVE 4 +#define PP_OD_FEATURE_FAN_CURVE_TEMP 5 +#define PP_OD_FEATURE_FAN_CURVE_PWM 6 +#define PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT 7 +#define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 +#define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 +#define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 + static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -204,6 +216,7 @@ static struct cmn2asic_mapping smu_v14_0_2_table_map[SMU_TABLE_COUNT] = { [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), + TAB_MAP(OVERDRIVE), }; static struct cmn2asic_mapping smu_v14_0_2_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1029,16 +1042,97 @@ static int smu_v14_0_2_get_current_clk_freq_by_table(struct smu_context *smu, value); } +static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, + int od_feature_bit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + + return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); +} + +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, + int32_t *max) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + const OverDriveLimits_t * const overdrive_lowerlimits = + &pptable->SkuTable.OverDriveLimitsBasicMin; + int32_t od_min_setting, od_max_setting; + + switch (od_feature_bit) { + case PP_OD_FEATURE_GFXCLK_FMIN: + od_min_setting = overdrive_lowerlimits->GfxclkFmin; + od_max_setting = overdrive_upperlimits->GfxclkFmin; + break; + case PP_OD_FEATURE_GFXCLK_FMAX: + od_min_setting = overdrive_lowerlimits->GfxclkFmax; + od_max_setting = overdrive_upperlimits->GfxclkFmax; + break; + case PP_OD_FEATURE_UCLK_FMIN: + od_min_setting = overdrive_lowerlimits->UclkFmin; + od_max_setting = overdrive_upperlimits->UclkFmin; + break; + case PP_OD_FEATURE_UCLK_FMAX: + od_min_setting = overdrive_lowerlimits->UclkFmax; + od_max_setting = overdrive_upperlimits->UclkFmax; + break; + case PP_OD_FEATURE_GFX_VF_CURVE: + od_min_setting = overdrive_lowerlimits->VoltageOffsetPerZoneBoundary[0]; + od_max_setting = overdrive_upperlimits->VoltageOffsetPerZoneBoundary[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_TEMP: + od_min_setting = overdrive_lowerlimits->FanLinearTempPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearTempPoints[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_PWM: + od_min_setting = overdrive_lowerlimits->FanLinearPwmPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearPwmPoints[0]; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT: + od_min_setting = overdrive_lowerlimits->AcousticLimitRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticLimitRpmThreshold; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_TARGET: + od_min_setting = overdrive_lowerlimits->AcousticTargetRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticTargetRpmThreshold; + break; + case PP_OD_FEATURE_FAN_TARGET_TEMPERATURE: + od_min_setting = overdrive_lowerlimits->FanTargetTemperature; + od_max_setting = overdrive_upperlimits->FanTargetTemperature; + break; + case PP_OD_FEATURE_FAN_MINIMUM_PWM: + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; + } + + if (min) + *min = od_min_setting; + if (max) + *max = od_max_setting; +} + static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct smu_14_0_dpm_context *dpm_context = smu_dpm->dpm_context; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; struct smu_14_0_dpm_table *single_dpm_table; struct smu_14_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; int i, curr_freq, size = 0; + int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); @@ -1159,6 +1253,183 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, "*" : ""); break; + case SMU_OD_SCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFXCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", + od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + break; + + case SMU_OD_MCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_UCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMHz\n", + od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); + break; + + case SMU_OD_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "%dmV\n", + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); + break; + + case SMU_OD_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_FAN_CURVE:\n"); + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) + size += sysfs_emit_at(buf, size, "%d: %dC %d%%\n", + i, + (int)od_table->OverDriveTable.FanLinearTempPoints[i], + (int)od_table->OverDriveTable.FanLinearPwmPoints[i]); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(hotspot temp): %uC %uC\n", + min_value, max_value); + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(fan speed): %u%% %u%%\n", + min_value, max_value); + + break; + + case SMU_OD_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_LIMIT:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticLimitRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_LIMIT: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_TARGET:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticTargetRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_TARGET: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_TARGET_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanTargetTemperature); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "TARGET_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_MINIMUM_PWM:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanMinimumPwm); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "MINIMUM_PWM: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_RANGE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", + min_value, max_value); + } + break; + default: break; } @@ -1400,7 +1671,27 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *max_power_limit, uint32_t *min_power_limit) { - // TODO + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + CustomSkuTable_t *skutable = &pptable->CustomSkuTable; + uint32_t power_limit; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + + if (smu_v14_0_get_current_power_limit(smu, &power_limit)) + power_limit = smu->adev->pm.ac_power ? + skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : + skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; + + if (current_power_limit) + *current_power_limit = power_limit; + if (default_power_limit) + *default_power_limit = power_limit; + + if (max_power_limit) + *max_power_limit = msg_limit; + + if (min_power_limit) + *min_power_limit = 0; return 0; } @@ -1950,6 +2241,594 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static void smu_v14_0_2_dump_od_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + struct amdgpu_device *adev = smu->adev; + + dev_dbg(adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + dev_dbg(adev->dev, "OD: Uclk: (%d, %d)\n", od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); +} + +static int smu_v14_0_2_upload_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + true); + if (ret) + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + + return ret; +} + +static void smu_v14_0_2_set_supported_od_feature_mask(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + adev->pm.od_feature_mask |= OD_OPS_SUPPORT_FAN_CURVE_RETRIEVE | + OD_OPS_SUPPORT_FAN_CURVE_SET | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_SET | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_SET | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; +} + +static int smu_v14_0_2_get_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + false); + if (ret) + dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); + + return ret; +} + +static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) +{ + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; + OverDriveTableExternal_t *boot_od_table = + (OverDriveTableExternal_t *)smu->smu_table.boot_overdrive_table; + OverDriveTableExternal_t *user_od_table = + (OverDriveTableExternal_t *)smu->smu_table.user_overdrive_table; + OverDriveTableExternal_t user_od_table_bak; + int ret; + int i; + + ret = smu_v14_0_2_get_overdrive_table(smu, boot_od_table); + if (ret) + return ret; + + smu_v14_0_2_dump_od_table(smu, boot_od_table); + + memcpy(od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, + user_od_table, + sizeof(OverDriveTableExternal_t)); + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + user_od_table->OverDriveTable.GfxclkFmin = + user_od_table_bak.OverDriveTable.GfxclkFmin; + user_od_table->OverDriveTable.GfxclkFmax = + user_od_table_bak.OverDriveTable.GfxclkFmax; + user_od_table->OverDriveTable.UclkFmin = + user_od_table_bak.OverDriveTable.UclkFmin; + user_od_table->OverDriveTable.UclkFmax = + user_od_table_bak.OverDriveTable.UclkFmax; + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + user_od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = + user_od_table_bak.OverDriveTable.VoltageOffsetPerZoneBoundary[i]; + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) { + user_od_table->OverDriveTable.FanLinearTempPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearTempPoints[i]; + user_od_table->OverDriveTable.FanLinearPwmPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearPwmPoints[i]; + } + user_od_table->OverDriveTable.AcousticLimitRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticLimitRpmThreshold; + user_od_table->OverDriveTable.AcousticTargetRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticTargetRpmThreshold; + user_od_table->OverDriveTable.FanTargetTemperature = + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; + } + + smu_v14_0_2_set_supported_od_feature_mask(smu); + + return 0; +} + +static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = table_context->overdrive_table; + OverDriveTableExternal_t *user_od_table = table_context->user_overdrive_table; + int res; + + user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | + BIT(PP_OD_FEATURE_UCLK_BIT) | + BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | + BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); + user_od_table->OverDriveTable.FeatureCtrlMask = 0; + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTableExternal_t)); + + return res; +} + +static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long input) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *boot_overdrive_table = + (OverDriveTableExternal_t *)table_context->boot_overdrive_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + int i; + + switch (input) { + case PP_OD_EDIT_FAN_CURVE: + for (i = 0; i < NUM_OD_FAN_MAX_POINTS; i++) { + od_table->OverDriveTable.FanLinearTempPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearTempPoints[i]; + od_table->OverDriveTable.FanLinearPwmPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearPwmPoints[i]; + } + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_LIMIT: + od_table->OverDriveTable.AcousticLimitRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticLimitRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_TARGET: + od_table->OverDriveTable.AcousticTargetRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticTargetRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + od_table->OverDriveTable.FanTargetTemperature = + boot_overdrive_table->OverDriveTable.FanTargetTemperature; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_MINIMUM_PWM: + od_table->OverDriveTable.FanMinimumPwm = + boot_overdrive_table->OverDriveTable.FanMinimumPwm; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; + } + + return 0; +} + +static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], + uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + uint32_t offset_of_voltageoffset; + int32_t minimum, maximum; + uint32_t feature_ctrlmask; + int i, ret = 0; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + dev_warn(adev->dev, "GFXCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.GfxclkFmin > od_table->OverDriveTable.GfxclkFmax) { + dev_err(adev->dev, + "Invalid setting: GfxclkFmin(%u) is bigger than GfxclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.GfxclkFmin, + (uint32_t)od_table->OverDriveTable.GfxclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + dev_warn(adev->dev, "UCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid MCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.UclkFmin > od_table->OverDriveTable.UclkFmax) { + dev_err(adev->dev, + "Invalid setting: UclkFmin(%u) is bigger than UclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.UclkFmin, + (uint32_t)od_table->OverDriveTable.UclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + dev_warn(adev->dev, "Gfx offset setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + if (input[0] >= NUM_OD_FAN_MAX_POINTS - 1 || + input[0] < 0) + return -EINVAL; + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &minimum, + &maximum); + if (input[1] < minimum || + input[1] > maximum) { + dev_info(adev->dev, "Fan curve temp setting(%ld) must be within [%d, %d]!\n", + input[1], minimum, maximum); + return -EINVAL; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &minimum, + &maximum); + if (input[2] < minimum || + input[2] > maximum) { + dev_info(adev->dev, "Fan curve pwm setting(%ld) must be within [%d, %d]!\n", + input[2], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanLinearTempPoints[input[0]] = input[1]; + od_table->OverDriveTable.FanLinearPwmPoints[input[0]] = input[2]; + od_table->OverDriveTable.FanMode = FAN_MODE_MANUAL_LINEAR; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic limit threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticLimitRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic target threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticTargetRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan target temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanTargetTemperature = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan minimum pwm setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanMinimumPwm = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v14_0_2_od_restore_table_single(smu, input[0]); + if (ret) + return ret; + } else { + feature_ctrlmask = od_table->OverDriveTable.FeatureCtrlMask; + memcpy(od_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t)); + od_table->OverDriveTable.FeatureCtrlMask = feature_ctrlmask; + } + fallthrough; + case PP_OD_COMMIT_DPM_TABLE: + /* + * The member below instructs PMFW the settings focused in + * this single operation. + * `uint32_t FeatureCtrlMask;` + * It does not contain actual informations about user's custom + * settings. Thus we do not cache it. + */ + offset_of_voltageoffset = offsetof(OverDriveTable_t, VoltageOffsetPerZoneBoundary); + if (memcmp((u8 *)od_table + offset_of_voltageoffset, + table_context->user_overdrive_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset)) { + smu_v14_0_2_dump_od_table(smu, od_table); + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + od_table->OverDriveTable.FeatureCtrlMask = 0; + memcpy(table_context->user_overdrive_table + offset_of_voltageoffset, + (u8 *)od_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset); + + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t))) + smu->user_dpm_profile.user_od = false; + else + smu->user_dpm_profile.user_od = true; + } + break; + + default: + return -ENOSYS; + } + + return ret; +} + +static int smu_v14_0_2_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v14_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v14_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1988,13 +2867,16 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, + .set_default_od_settings = smu_v14_0_2_set_default_od_settings, + .restore_user_od_settings = smu_v14_0_2_restore_user_od_settings, + .od_edit_dpm_table = smu_v14_0_2_od_edit_dpm_table, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, - .set_power_limit = smu_v14_0_set_power_limit, + .set_power_limit = smu_v14_0_2_set_power_limit, .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, From 17277da26623d4aa8bdda628d0024cf2f2e39ae6 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 11:00:06 +0530 Subject: [PATCH 0483/1523] drm/amdgpu: Remove debugfs amdgpu_reset_dump_register_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some problem with existing amdgpu_reset_dump_register_list debugfs node. It is supposed to read a list of registers but there could be cases when the IP is not in correct power state. Register read in such cases could lead to more problems. We are taking care of all such power states in devcoredump and dumping the registers of need for debugging. So cleaning this code and we dont need this functionality via debugfs anymore. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 96 --------------------- 1 file changed, 96 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e1a11b6b989..cbef720de779 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2026,100 +2026,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, - char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[12]; - int i, ret, len = 0; - - if (*pos) - return 0; - - memset(reg_offset, 0, 12); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - - for (i = 0; i < adev->reset_info.num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); - up_read(&adev->reset_domain->sem); - if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) - return -EFAULT; - - len += strlen(reg_offset); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - } - - up_read(&adev->reset_domain->sem); - *pos += len; - - return len; -} - -static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, - const char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[11]; - uint32_t *new = NULL, *tmp = NULL; - unsigned int len = 0; - int ret, i = 0; - - do { - memset(reg_offset, 0, 11); - if (copy_from_user(reg_offset, buf + len, - min(10, (size-len)))) { - ret = -EFAULT; - goto error_free; - } - - new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - tmp = new; - if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { - ret = -EINVAL; - goto error_free; - } - - len += ret; - i++; - } while (len < size); - - new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - ret = down_write_killable(&adev->reset_domain->sem); - if (ret) - goto error_free; - - swap(adev->reset_info.reset_dump_reg_list, tmp); - swap(adev->reset_info.reset_dump_reg_value, new); - adev->reset_info.num_regs = i; - up_write(&adev->reset_domain->sem); - ret = size; - -error_free: - if (tmp != new) - kfree(tmp); - kfree(new); - return ret; -} - -static const struct file_operations amdgpu_reset_dump_register_list = { - .owner = THIS_MODULE, - .read = amdgpu_reset_dump_register_list_read, - .write = amdgpu_reset_dump_register_list_write, - .llseek = default_llseek -}; - int amdgpu_debugfs_init(struct amdgpu_device *adev) { struct dentry *root = adev_to_drm(adev)->primary->debugfs_root; @@ -2204,8 +2110,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) &amdgpu_debugfs_vm_info_fops); debugfs_create_file("amdgpu_benchmark", 0200, root, adev, &amdgpu_benchmark_fops); - debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev, - &amdgpu_reset_dump_register_list); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; From 836af5be1b6d8e93d736c252e711a20db7dbde9d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 11:19:53 +0530 Subject: [PATCH 0484/1523] drm/amdgpu: Clean up the register dump via debugfs list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debugfs register list for dump is cleaned as it have some issues related to proper power state of the IP before register read. Since the above mentioned is removed we no longer want this to be dumped part of the devcoredump and hence removed. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ------------- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 10 +--------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ------------------- 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 137a88b8de45..c54ddd3e68aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -823,17 +823,6 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; -struct amdgpu_reset_info { - /* reset dump register */ - u32 *reset_dump_reg_list; - u32 *reset_dump_reg_value; - int num_regs; - -#ifdef CONFIG_DEV_COREDUMP - struct amdgpu_coredump_info *coredump_info; -#endif -}; - /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1157,8 +1146,6 @@ struct amdgpu_device { struct mutex benchmark_mutex; - struct amdgpu_reset_info reset_info; - bool scpm_enabled; uint32_t scpm_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f6806ae1c061..cf2b4dd4d865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -203,7 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; - int i, ver; + int ver; iter.data = buffer; iter.offset = 0; @@ -317,14 +317,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } return count - iter.remain; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a2a1a3da17e3..3a43754e7f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5277,23 +5277,6 @@ mode1_reset_failed: return ret; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { @@ -5359,8 +5342,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State\n"); /* Trigger ip dump before we reset the asic */ for (i = 0; i < tmp_adev->num_ip_blocks; i++) From 4a4c815b08dc774dde67fb90a0286925f98204af Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jul 2024 09:39:14 +0530 Subject: [PATCH 0485/1523] drm/amd/display: Align 'dpp401_dscl_program_isharp' with actual function parameters This commit corrects the function comment for 'dpp401_dscl_program_isharp' in 'dcn401_dpp_dscl.c'. The comment previously included a description for a non-existent parameter 'bs_coeffs_updated'. This parameter description has been removed to reflect the function's actual parameters. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/dpp/dcn401/dcn401_dpp_dscl.c:981: warning: Excess function parameter 'bs_coeffs_updated' description in 'dpp401_dscl_program_isharp' Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 88d24e36fe00..505929800426 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -971,7 +971,6 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info - * @bs_coeffs_updated: coeffs update flag * * This is the primary function to program isharp * From e89d2fec4cde967445e16e02e406481bac380cc4 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Jul 2024 00:24:44 +0530 Subject: [PATCH 0486/1523] drm/amdgpu: optimize the padding for gfx10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 853084a2ce7f..1b88528b512b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9397,6 +9397,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9588,7 +9606,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, @@ -9629,7 +9647,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, From ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 25 Jun 2024 11:22:50 -0400 Subject: [PATCH 0487/1523] drm/amdkfd: support per-queue reset on gfx9 Support per-queue reset for GFX9. The recommendation is for the driver to target reset the HW queue via a SPI MMIO register write. Since this requires pipe and HW queue info and MEC FW is limited to doorbell reports of hung queues after an unmap failure, scan the HW queue slots defined by SET_RESOURCES first to identify the user queue candidates to reset. Only signal reset events to processes that have had a queue reset. If queue reset fails, fall back to GPU reset. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 16 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 9 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 18 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 85 ++++++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 9 + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 184 +++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 22 +++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 + .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 + 16 files changed, 373 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index aff08321e976..8dfdb18197c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -191,4 +191,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3a3f3ce09f00..017e8a3013aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -418,5 +418,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, - .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index a5c7259cf2a3..e2ae714a700f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -541,5 +541,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { kgd_gfx_v9_4_3_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, - .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 3ab6c3aa0ad1..62176d607bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1070,6 +1070,20 @@ static void program_trap_handler_settings(struct amdgpu_device *adev, unlock_srbm(adev); } +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -1097,4 +1111,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 67bcaa3d4226..9efd2dd4fdd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -56,3 +56,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 8c8437a4383f..c718bedda0ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -680,5 +680,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, .set_address_watch = kgd_gfx_v10_set_address_watch, - .clear_address_watch = kgd_gfx_v10_clear_address_watch + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index b61a32d6af4b..a4ba49cb22db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -786,6 +786,20 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -808,5 +822,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode, .set_address_watch = kgd_gfx_v11_set_address_watch, - .clear_address_watch = kgd_gfx_v11_clear_address_watch + .clear_address_watch = kgd_gfx_v11_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v11_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5a35a8ca8922..32f28c12077b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1144,6 +1144,89 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, kgd_gfx_v9_unlock_srbm(adev, inst); } +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + uint32_t low, high; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + uint32_t low, high, temp; + unsigned long end_jiffies; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + + pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n", + inst, pipe_id, queue_id); + + /* assume previous dequeue request issued will take affect after reset */ + WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + + if (time_after(jiffies, end_jiffies)) { + queue_addr = 0; + break; + } + + usleep_range(500, 1000); + } + + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1172,4 +1255,6 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index ce424615f59b..988c50ac3be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -101,3 +101,12 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f0bfeb35246f..f6e211070299 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -153,6 +153,20 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, static void kfd_hws_hang(struct device_queue_manager *dqm) { + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + /* Mark all device queues as reset. */ + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + pdd->has_reset_queue = true; + } + } + /* * Issue a GPU reset if HWS is unresponsive */ @@ -878,6 +892,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); + /* queue is reset so inaccessable */ + if (pdd->has_reset_queue) { + retval = -EACCES; + goto out_unlock; + } + if (retval) { dev_err(dev, "unmap queue failed\n"); goto out_unlock; @@ -1662,7 +1682,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; - int retval; + int retval, num_hw_queue_slots; retval = 0; @@ -1715,9 +1735,24 @@ static int start_cpsch(struct device_queue_manager *dqm) &dqm->wait_times); } + /* setup per-queue reset detection buffer */ + num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * + dqm->dev->kfd->shared_resources.num_pipe_per_mec * + NUM_XCC(dqm->dev->xcc_mask); + + dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); + dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); + + if (!dqm->detect_hang_info) { + retval = -ENOMEM; + goto fail_detect_hang_buffer; + } + dqm_unlock(dqm); return 0; +fail_detect_hang_buffer: + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); fail_allocate_vidmem: fail_set_sched_resources: if (!dqm->dev->kfd->shared_resources.enable_mes) @@ -1748,6 +1783,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr); + kfree(dqm->detect_hang_info); + dqm->detect_hang_info = NULL; dqm_unlock(dqm); return 0; @@ -1965,6 +2002,135 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) return retval; } +static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n", + q->properties.queue_id, q->process->pasid); + + pdd->has_reset_queue = true; + if (q->properties.is_active) { + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + } +} + +static int detect_queue_hang(struct device_queue_manager *dqm) +{ + int i; + + /* detect should be used only in dqm locked queue reset */ + if (WARN_ON(dqm->detect_hang_count > 0)) + return 0; + + memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); + + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { + uint32_t mec, pipe, queue; + int xcc_id; + + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; + + if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) + continue; + + amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); + + for_each_inst(xcc_id, dqm->dev->xcc_mask) { + uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( + dqm->dev->adev, pipe, queue, xcc_id); + struct dqm_detect_hang_info hang_info; + + if (!queue_addr) + continue; + + hang_info.pipe_id = pipe; + hang_info.queue_id = queue; + hang_info.xcc_id = xcc_id; + hang_info.queue_address = queue_addr; + + dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; + dqm->detect_hang_count++; + } + } + + return dqm->detect_hang_count; +} + +static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (queue_address == q->properties.queue_address) + return q; + } + } + + return NULL; +} + +/* only for compute queue */ +static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) +{ + int r = 0, reset_count = 0, i; + + if (!dqm->detect_hang_info || dqm->is_hws_hang) + return -EIO; + + /* assume dqm locked. */ + if (!detect_queue_hang(dqm)) + return -ENOTRECOVERABLE; + + for (i = 0; i < dqm->detect_hang_count; i++) { + struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; + struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); + struct kfd_process_device *pdd; + uint64_t queue_addr = 0; + + if (!q) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + pdd = kfd_get_process_device_data(dqm->dev, q->process); + if (!pdd) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, + hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, + KFD_UNMAP_LATENCY_MS); + + /* either reset failed or we reset an unexpected queue. */ + if (queue_addr != q->properties.queue_address) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + set_queue_as_reset(dqm, q, &pdd->qpd); + reset_count++; + } + + if (reset_count == dqm->detect_hang_count) + kfd_signal_reset_event(dqm->dev); + else + r = -ENOTRECOVERABLE; + +reset_fail: + dqm->detect_hang_count = 0; + + return r; +} + /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -2015,11 +2181,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { - while (halt_if_hws_hang) - schedule(); - kfd_hws_hang(dqm); - retval = -ETIME; - goto out; + if (reset_queues_on_hws_hang(dqm)) { + while (halt_if_hws_hang) + schedule(); + dqm->is_hws_hang = true; + kfd_hws_hang(dqm); + retval = -ETIME; + goto out; + } } /* We need to reset the grace period value for this device */ @@ -2038,8 +2207,7 @@ out: } /* only for compute queue */ -static int reset_queues_cpsch(struct device_queue_manager *dqm, - uint16_t pasid) +static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) { int retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3b9b8eabaacc..dfb36a246637 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -210,6 +210,13 @@ struct device_queue_manager_asic_ops { struct kfd_node *dev); }; +struct dqm_detect_hang_info { + int pipe_id; + int queue_id; + int xcc_id; + uint64_t queue_address; +}; + /** * struct device_queue_manager * @@ -264,6 +271,11 @@ struct device_queue_manager { uint32_t wait_times; wait_queue_head_t destroy_wait; + + /* for per-queue reset support */ + struct dqm_detect_hang_info *detect_hang_info; + size_t detect_hang_info_size; + int detect_hang_count; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 9b33d9d2c9ad..ea3792249209 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -31,6 +31,7 @@ #include #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_device_queue_manager.h" #include /* @@ -1244,12 +1245,33 @@ void kfd_signal_reset_event(struct kfd_node *dev) idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p); if (unlikely(user_gpu_id == -EINVAL)) { WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); continue; } + if (unlikely(!pdd)) { + WARN_ONCE(1, "Could not get device data from pasid:0x%x\n", p->pasid); + continue; + } + + if (dev->dqm->detect_hang_count && !pdd->has_reset_queue) + continue; + + if (dev->dqm->detect_hang_count) { + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid); + if (ti) { + dev_err(dev->adev->dev, + "Queues reset on process %s tid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 66c73825c0a0..84e8ea3a8a0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -321,8 +321,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v9_mqd *m = (struct v9_mqd *)mqd; + uint32_t doorbell_id = m->queue_doorbell_id0; - return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); + m->queue_doorbell_id0 = 0; + + return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -624,6 +627,7 @@ static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd) m = get_mqd(mqd + hiq_mqd_size * inst); ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, inst); + m->queue_doorbell_id0 = 0; ++inst; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4190fa339913..a5d47048c147 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -846,6 +846,9 @@ struct kfd_process_device { void *proc_ctx_bo; uint64_t proc_ctx_gpu_addr; void *proc_ctx_cpu_ptr; + + /* Tracks queue reset status */ + bool has_reset_queue; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e29b92eb523..a902950cc060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1851,6 +1851,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) goto fail; } n_evicted++; + + pdd->dev->dqm->is_hws_hang = false; } return r; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 6d094cf3587d..7744ca3ef4b1 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -318,6 +318,12 @@ struct kfd2kgd_calls { void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); + uint64_t (*hqd_get_pq_addr)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); + uint64_t (*hqd_reset)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ From 67c4ca9f794951482c54a7006c8b3c367d6c3efc Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 13:36:15 +0530 Subject: [PATCH 0488/1523] drm/amdgpu: do not call insert_nop fn for zero count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not make a function call for zero size NOP as it does not add anything in the ring and is unnecessary function call. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..0d72d2cbb64b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -145,7 +145,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); count %= ring->funcs->align_mask + 1; - ring->funcs->insert_nop(ring, count); + + if (count != 0) + ring->funcs->insert_nop(ring, count); mb(); amdgpu_ring_set_wptr(ring); From b41a382932263b2951bc9e83a22168d579a94865 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 22 Jul 2024 13:26:08 -0400 Subject: [PATCH 0489/1523] drm/amdkfd: fix debug watchpoints for logical devices The number of watchpoints should be set and constrained per logical partition device, not by the socket device. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 20 ++++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++++---- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 34a282540c7e..312dfa84f29f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { /* device watchpoint in use so skip */ - if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1) + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) continue; pdd->alloc_watch_ids |= 0x1 << i; - pdd->dev->kfd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; *watch_id = i; - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return 0; } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return -ENOMEM; } static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); /* process owns device watch point so safe to clear */ if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { pdd->alloc_watch_ids &= ~(0x1 << watch_id); - pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); } static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { bool owns_watch_id = false; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return owns_watch_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6619028dd58b..c2d2598f776c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -884,13 +884,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, dev_err(kfd_device, "Error initializing KFD node\n"); goto node_init_error; } + + spin_lock_init(&node->watch_points_lock); + kfd->nodes[i] = node; } svm_range_set_max_pages(kfd->adev); - spin_lock_init(&kfd->watch_points_lock); - kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a5d47048c147..057d20446c31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -310,6 +310,10 @@ struct kfd_node { struct kfd_local_mem_info local_mem_info; struct kfd_dev *kfd; + + /* Track per device allocated watch points */ + uint32_t alloc_watch_ids; + spinlock_t watch_points_lock; }; struct kfd_dev { @@ -362,10 +366,6 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; - /* Track per device allocated watch points */ - uint32_t alloc_watch_ids; - spinlock_t watch_points_lock; - /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; From 847e387e00547b0cc728a5e61f5beb2ff861ed1d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 13:39:20 +0530 Subject: [PATCH 0490/1523] drm/amdgpu: optimize the padding for gfx11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4a9766635933..e7c160b9d0fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -481,6 +481,24 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, amdgpu_ring_write(ring, inv); /* poll interval */ } +static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6709,7 +6727,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, @@ -6751,7 +6769,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, From 62eefd10ac1c7e976bda47ff311bd87cee40ab8d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jul 2024 10:45:08 +0800 Subject: [PATCH 0491/1523] drm/amdgpu: use CPU for page table update if SDMA is unavailable avoid using SDMA if it is unavailable. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a060c28f0877..bcb729094521 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2397,6 +2397,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { + struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; @@ -2426,6 +2427,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + /* use CPU for page table update if SDMA is unavailable */ + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); + if (!ip_block || ip_block->status.valid == false) + vm->use_cpu_for_update = true; + DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && From 62ed6f0f198da04e884062264df308277628004f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:09:28 +0530 Subject: [PATCH 0492/1523] drm/amd/display: Add NULL check for function pointer in dcn20_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn20_set_output_transfer_func function. Previously, set_output_gamma was being checked for null at line 1030, but then it was being dereferenced without any null check at line 1048. This could potentially lead to a null pointer dereference error if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma at line 1048. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 5a6064999033..425432ca497f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1045,7 +1045,8 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, /* * if above if is not executed then 'params' equal to 0 and set in bypass */ - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); return true; } From dd340acd42c24a3f28dd22fae6bf38662334264c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:22:06 +0530 Subject: [PATCH 0493/1523] drm/amd/display: Add NULL check for function pointer in dcn401_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn401_set_output_transfer_func function. Previously, set_output_gamma was being checked for null, but then it was being dereferenced without any null check. This could lead to a null pointer dereference if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index ceaaa8df3641..77489bbcda02 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -743,7 +743,9 @@ bool dcn401_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } From 28574b08c70e56d34d6f6379326a860b96749051 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jul 2024 13:15:00 +0530 Subject: [PATCH 0494/1523] drm/amd/display: Add NULL check for function pointer in dcn32_set_output_transfer_func This commit adds a null check for the set_output_gamma function pointer in the dcn32_set_output_transfer_func function. Previously, set_output_gamma was being checked for null, but then it was being dereferenced without any null check. This could lead to a null pointer dereference if set_output_gamma is null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a null check for set_output_gamma before the call to set_output_gamma. Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index fcaabad204a2..c3bbbfd1be94 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -582,7 +582,9 @@ bool dcn32_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } From f59902ffcc43ce7e1db5d0c4dfee37ec2a1bae0c Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:27:28 +0530 Subject: [PATCH 0495/1523] drm/amdgpu: optimize the padding for gfx12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f932c7ff85e3..29b3bf1b29b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5005,6 +5005,24 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v12_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5186,7 +5204,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec, @@ -5224,7 +5242,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, From 9192c7613ca53572908ba23a4c3f39c7f8ba8021 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 31 Jul 2024 11:58:46 +0800 Subject: [PATCH 0496/1523] drm/amdgpu: force to use legacy inv in mmhub MMHUB v4.1.0 only support fixed cache mode, so only use legacy invalidation accordingly. Signed-off-by: Likun Gao Reviewed-by: Frank Min Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); From dfe9d047b162f3a79ab63046608c693ee14c5b7a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 1 Aug 2024 13:45:27 +0800 Subject: [PATCH 0497/1523] drm/amdgpu: Add more types for boot time error reporting Data abort exception and unknown errors are supported. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0fb2d9285834..9cda368ad794 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4748,6 +4748,16 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n", socket_id, aid_id, hbm_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n", + socket_id, aid_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n", + socket_id, aid_id, fw_status); } static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7ddd13d5c06b..0d49b74bfe5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -46,6 +46,8 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) +#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29) +#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA From bb670c31e13a402d30afa3d484403196031d0fbd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 31 Jul 2024 20:19:15 +0530 Subject: [PATCH 0498/1523] drm/amdpgu: Micro-optimise amdgpu_ring_commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some value of optimisation we can replace the division with an bitwise and. And it even shrinks the code. Before: 6c9: 53 push %rbx 6ca: 4c 8b 47 08 mov 0x8(%rdi),%r8 6ce: 31 d2 xor %edx,%edx 6d0: 48 89 fb mov %rdi,%rbx 6d3: 8b 87 c8 05 00 00 mov 0x5c8(%rdi),%eax 6d9: 41 8b 48 04 mov 0x4(%r8),%ecx 6dd: f7 d0 not %eax 6df: 21 c8 and %ecx,%eax 6e1: 83 c1 01 add $0x1,%ecx 6e4: 83 c0 01 add $0x1,%eax 6e7: f7 f1 div %ecx 6e9: 89 d6 mov %edx,%esi 6eb: 41 ff 90 88 00 00 00 call *0x88(%r8) After: 6c9: 53 push %rbx 6ca: 48 8b 57 08 mov 0x8(%rdi),%rdx 6ce: 48 89 fb mov %rdi,%rbx 6d1: 8b 87 c8 05 00 00 mov 0x5c8(%rdi),%eax 6d7: 8b 72 04 mov 0x4(%rdx),%esi 6da: f7 d0 not %eax 6dc: 21 f0 and %esi,%eax 6de: 83 c0 01 add $0x1,%eax 6e1: 21 c6 and %eax,%esi 6e3: ff 92 88 00 00 00 call *0x88(%rdx) Reviewed-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 0d72d2cbb64b..8c39bf7e1fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -144,7 +144,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); - count %= ring->funcs->align_mask + 1; + count &= ring->funcs->align_mask; if (count != 0) ring->funcs->insert_nop(ring, count); From dee44a7cb577f2aba39e1713a51e9faee2f65534 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:35:14 +0530 Subject: [PATCH 0499/1523] drm/amdgpu: optimize the padding for gfx9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 675a1a8e2515..991f7c2fc1a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7100,6 +7100,24 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7240,7 +7258,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7294,7 +7312,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_sw_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7338,7 +7356,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, From fd69ef05029f9beb7b031ef96e7a36970806a670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 11 Jul 2024 15:25:06 +0200 Subject: [PATCH 0500/1523] drm/radeon: use GEM references instead of TTMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of a TTM reference grab a GEM reference whenever necessary. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 210e8d43bb23..9735f4968b86 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -88,7 +88,7 @@ static void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { radeon_mn_unregister(robj); - radeon_bo_unref(&robj); + ttm_bo_put(&robj->tbo); } } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 450ff7daa46c..d0e4b43d155c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -256,18 +256,15 @@ struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } void radeon_bo_unref(struct radeon_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } From 62341f7bc2ccb7c024eb023a05714d798ba6437d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 31 Jul 2024 18:37:42 +0530 Subject: [PATCH 0501/1523] drm/amdgpu: optimize the padding for gfx_v9_4_3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König Cc: Christian König Cc: Pierre-Eric Pelloux-Prayer Cc: Tvrtko Ursulin Cc: Marek Olšák Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 98fe6c40da64..c4832a5725c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4178,6 +4178,24 @@ static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); } +static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4227,7 +4245,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, .test_ring = gfx_v9_4_3_ring_test_ring, .test_ib = gfx_v9_4_3_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_4_3_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, From 089525632d40bbfa507f224c20563529b3f8a4b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 09:29:13 -0600 Subject: [PATCH 0502/1523] drm/amd/display: Add missing DCN314 to the DML Makefile Include display_mode_vba_314 and display_rq_dlg_calc_314 to the dml Makefile. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) From 0964fbd59e7131c1bad4c8549ec13a27104f0b11 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:19:31 -0600 Subject: [PATCH 0503/1523] drm/amd/display: Cleanup dml2 and dc/resource Makefile Remove some useless lines from DC Makefiles. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 -- drivers/gpu/drm/amd/display/dc/resource/Makefile | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index dcd01fe04296..cf979ab172bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -35,8 +35,6 @@ frame_warn_flag := -Wframe-larger-than=2048 endif endif -# DRIVER_BUILD is mostly used in DML2.1 source -subdir-ccflags-y += -DDRIVER_BUILD=1 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index 4860bb2531a1..09320344d8e9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -198,8 +198,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN351) ############################################################################### -############################################################################### - RESOURCE_DCN401 = dcn401_resource.o AMD_DAL_RESOURCE_DCN401 = $(addprefix $(AMDDALPATH)/dc/resource/dcn401/,$(RESOURCE_DCN401)) From 0345c8bc22c92f3d5fc0287957a96fd214297aa4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:21:40 -0600 Subject: [PATCH 0504/1523] drm/amd/display: Remove useless defines Remove __cplusplus defines added by accident. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index f1fd3eb92f8a..205e59a2a8ee 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -9,16 +9,8 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 -#ifdef __cplusplus -extern "C" { -#endif - /* SPL interfaces */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); -#ifdef __cplusplus -} -#endif - #endif /* __DC_SPL_H__ */ From e9180253e22596991db426ab5b01111c2a739db4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 15:22:26 -0600 Subject: [PATCH 0505/1523] drm/amd/display: Remove unused fields from dmub_cmd_update_dirty_rect_data Drop coasting_vtotal_high and pad from dmub_cmd_update_dirty_rect_data, since it is not used. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7c3838362c49..c5f99cbff0b6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -3028,14 +3028,6 @@ struct dmub_cmd_update_dirty_rect_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * 16-bit value dicated by driver that indicates the coasting vtotal high byte part. - */ - uint16_t coasting_vtotal_high; - /** - * Explicit padding to 4 byte boundary. - */ - uint8_t pad[2]; }; /** From 4f842ba7cdd83cba1d5c0bbd15ed9d14d882cf89 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 25 Jul 2024 14:35:09 -0600 Subject: [PATCH 0506/1523] drm/amd/display: Remove unused fields from dc_caps [Why & How] Identify few unused fileds in dc_caps. Remove them. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 250d5d48c2d3..95b0413e9f17 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -261,10 +261,7 @@ struct dc_caps { bool zstate_support; bool ips_support; uint32_t num_of_internal_disp; - uint32_t max_dwb_htap; - uint32_t max_dwb_vtap; enum dp_protocol_version max_dp_protocol_version; - bool spdif_aud; unsigned int mall_size_per_mem_channel; unsigned int mall_size_total; unsigned int cursor_cache_size; @@ -1370,7 +1367,6 @@ struct dc_plane_info { int global_alpha_value; bool input_csc_enabled; int layer_index; - bool front_buffer_rendering_active; enum chroma_cositing cositing; }; From e1dbe625d6ac2821eb29e087db46cb539d8079f0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 25 Jul 2024 16:41:38 -0600 Subject: [PATCH 0507/1523] drm/amd/display: Add missing program DET segment call to pipe init Add a callback that program the DET segment when initializing pipes. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e31249d1dd22..3cd584419b88 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1403,6 +1403,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From db65eb46de135338d6177f8853e0fd208f19d63e Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: [PATCH 0508/1523] drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_buddy.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- include/drm/drm_buddy.h | 2 ++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); From 507293b1b207e419fc7ea4ff1d72c2f1db18e33c Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 30 Jul 2019 16:27:24 -0400 Subject: [PATCH 0509/1523] drm/amd/display: Fix overlay with pre-blend color processing [WHY] Overlay works similarly to MPO, but uses global alpha on both planes and sets the desktop as the rear plane instead of the front plane [HOW] Ensure that top plane isn't overlay by checking global alpha before applying the previously added MPO fix Reviewed-by: Rodrigo Siqueira Signed-off-by: Michael Strauss Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 3cd584419b88..a7b5b25e3f34 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2586,8 +2586,11 @@ static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_co while (top->top_pipe) top = top->top_pipe; // Traverse to top pipe_ctx - if (top->plane_state && top->plane_state->layer_index == 0) - return true; // Front MPO plane not hidden + if (top->plane_state && top->plane_state->layer_index == 0 && !top->plane_state->global_alpha) + // Global alpha used by top plane for PIP overlay + // Pre-multiplied/per-pixel alpha used by MPO + // Check top plane's global alpha to ensure layer_index > 0 not caused by PIP + return true; // MPO in use and front plane not hidden } } return false; From 64a905203fd0da3e8a3f649593cc48c9fbab25d0 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 23 Jul 2024 00:26:19 -0400 Subject: [PATCH 0510/1523] drm/amd/display: Add stream and char control callback [why & how] Add new stream and char control functions based on DCCG spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 132 ++++++++++++++++-- 1 file changed, 122 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index bd3757de51c9..13e3d64ee2f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -896,7 +896,7 @@ static void dccg35_disable_symclk32_le_new( dccg35_set_symclk32_le_rcg(dccg, inst, true); } -static void dccg35_enable_dpp_new( +static void dccg35_enable_dpp_clk_new( struct dccg *dccg, int inst, enum dppclk_clock_source src) @@ -915,7 +915,7 @@ static void dccg35_enable_dpp_new( DPPCLK0_DTO_MODULO, 0xFF); } -static void dccg35_disable_dpp_new( +static void dccg35_disable_dpp_clk_new( struct dccg *dccg, int inst) { @@ -956,27 +956,25 @@ static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, } static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, - enum dtbclk_source src, int inst) { dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); dccg35_set_dtbclk_p_rcg(dccg, inst, true); } -static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, - enum dtbclk_source src, +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, int inst) { dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); dccg35_set_dpstreamclk_rcg(dccg, inst, true); } -static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, - enum dtbclk_source src, +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dp_stream_clk_source src, int inst) { dccg35_set_dpstreamclk_rcg(dccg, inst, false); - dccg35_set_dtbclk_p_src_new(dccg, src, inst); + dccg35_set_dpstreamclk_src_new(dccg, src, inst); } static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) @@ -1935,6 +1933,114 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst } } +static void dccg35_set_dpstreamclk_cb( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) +{ + + enum dtbclk_source dtb_clk_src; + enum dp_stream_clk_source dp_stream_clk_src; + + ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); + + switch (src) { + case REFCLK: + dtb_clk_src = DTBCLK_REFCLK; + dp_stream_clk_src = DP_STREAM_REFCLK; + break; + case DPREFCLK: + dtb_clk_src = DTBCLK_DPREFCLK; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + case DTBCLK0: + dtb_clk_src = DTBCLK_DTBCLK0; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + if (dtb_clk_src == DTBCLK_REFCLK && + dp_stream_clk_src == DP_STREAM_REFCLK) { + dccg35_disable_dtbclk_p_new(dccg, otg_inst); + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); + } else { + dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst); + dccg35_enable_dpstreamclk_new(dccg, + dp_stream_clk_src, + dp_hpo_inst); + } +} + +static void dccg35_set_dpstreamclk_root_clock_gating_cb( + struct dccg *dccg, + int dp_hpo_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Instance 0 is implied here since only one streamclock resource + * Redundant as gating when enabled is acheived through set_dpstreamclk + */ + if (power_on) + dccg35_enable_dpstreamclk_new(dccg, + DP_STREAM_REFCLK, + dp_hpo_inst); + else + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); +} + +static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, + int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + /* Enable DPP CLK DTO output */ + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO); + + /* Program DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + } else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); + + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +static void dccg35_dpp_root_clock_control_cb( + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through update_dpp_dto + */ + if (power_on) + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); + else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); +} + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2010,14 +2116,20 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk32_se_new; (void)&dccg35_enable_symclk32_le_new; (void)&dccg35_disable_symclk32_le_new; - (void)&dccg35_enable_dpp_new; - (void)&dccg35_disable_dpp_new; + (void)&dccg35_enable_dpp_clk_new; + (void)&dccg35_enable_dpp_clk_new; (void)&dccg35_disable_dscclk_new; (void)&dccg35_enable_dscclk_new; (void)&dccg35_enable_dtbclk_p_new; (void)&dccg35_disable_dtbclk_p_new; (void)&dccg35_enable_dpstreamclk_new; (void)&dccg35_disable_dpstreamclk_new; + (void)&dccg35_set_dpstreamclk_cb; + (void)&dccg35_dpp_root_clock_control_cb; + (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; + (void)&dccg35_update_dpp_dto_cb; + (void)&dccg35_dpp_root_clock_control_cb; + base = &dccg_dcn->base; base->ctx = ctx; base->funcs = &dccg35_funcs; From 792be2e23ac69821db7860ba4ba94592101f0b07 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 1 Aug 2024 14:26:19 +0800 Subject: [PATCH 0511/1523] drm/amdgpu: create function to check RAS RMA status In the convenience of calling it globally. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9cda368ad794..16da939a8406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2153,7 +2153,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * /* gpu reset is fallback for failed and default cases. * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. */ - if (poison_stat && !con->is_rma) { + if (poison_stat && !amdgpu_ras_is_rma(adev)) { event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "GPU reset for %s RAS poison consumption is issued!\n", @@ -2945,7 +2945,7 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && con->is_rma) + if (err_cnt && amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); amdgpu_ras_schedule_retirement_dwork(con, @@ -3046,7 +3046,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, } /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ - if (reset_flags && !con->is_rma) { + if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) @@ -3192,7 +3192,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) * This calling fails when is_rma is true or * ret != 0. */ - if (con->is_rma || ret) + if (amdgpu_ras_is_rma(adev) || ret) goto free; if (con->eeprom_control.ras_num_recs) { @@ -3241,7 +3241,7 @@ out: * Except error threshold exceeding case, other failure cases in this * function would not fail amdgpu driver init. */ - if (!con->is_rma) + if (!amdgpu_ras_is_rma(adev)) ret = 0; else ret = -EINVAL; @@ -4284,7 +4284,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); /* mode1 is the only selection for RMA status */ - if (ras->is_rma) { + if (amdgpu_ras_is_rma(adev)) { ras->gpu_reset_flags = 0; ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; } @@ -4824,3 +4824,13 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, va_end(args); } + +bool amdgpu_ras_is_rma(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return false; + + return con->is_rma; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0d49b74bfe5e..9625e5c92413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -974,4 +974,5 @@ __printf(3, 4) void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, const char *fmt, ...); +bool amdgpu_ras_is_rma(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2ed55f3c5fa2..bb7b9b2eaac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -196,7 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_umc_handle_bad_pages(adev, ras_error_status); if ((err_data->ue_count || err_data->de_count) && - (reset || (con && con->is_rma))) { + (reset || amdgpu_ras_is_rma(adev))) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 9cd221ed240c..999bb3cc88b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -97,7 +97,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; } - if (con && !con->is_rma) + if (con && !amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); } From eaf3adb8faab611ba57594fa915893fc93a7788c Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 19 Jul 2024 14:10:58 -0400 Subject: [PATCH 0512/1523] drm/amd/display: fix a UBSAN warning in DML2.1 When programming phantom pipe, since cursor_width is explicity set to 0, this causes calculation logic to trigger overflow for an unsigned int triggering the kernel's UBSAN check as below: [ 40.962845] UBSAN: shift-out-of-bounds in /tmp/amd.EfpumTkO/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:3312:34 [ 40.962849] shift exponent 4294967170 is too large for 32-bit type 'unsigned int' [ 40.962852] CPU: 1 PID: 1670 Comm: gnome-shell Tainted: G W OE 6.5.0-41-generic #41~22.04.2-Ubuntu [ 40.962854] Hardware name: Gigabyte Technology Co., Ltd. X670E AORUS PRO X/X670E AORUS PRO X, BIOS F21 01/10/2024 [ 40.962856] Call Trace: [ 40.962857] [ 40.962860] dump_stack_lvl+0x48/0x70 [ 40.962870] dump_stack+0x10/0x20 [ 40.962872] __ubsan_handle_shift_out_of_bounds+0x1ac/0x360 [ 40.962878] calculate_cursor_req_attributes.cold+0x1b/0x28 [amdgpu] [ 40.963099] dml_core_mode_support+0x6b91/0x16bc0 [amdgpu] [ 40.963327] ? srso_alias_return_thunk+0x5/0x7f [ 40.963331] ? CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport+0x18b8/0x2790 [amdgpu] [ 40.963534] ? srso_alias_return_thunk+0x5/0x7f [ 40.963536] ? dml_core_mode_support+0xb3db/0x16bc0 [amdgpu] [ 40.963730] dml2_core_calcs_mode_support_ex+0x2c/0x90 [amdgpu] [ 40.963906] ? srso_alias_return_thunk+0x5/0x7f [ 40.963909] ? dml2_core_calcs_mode_support_ex+0x2c/0x90 [amdgpu] [ 40.964078] core_dcn4_mode_support+0x72/0xbf0 [amdgpu] [ 40.964247] dml2_top_optimization_perform_optimization_phase+0x1d3/0x2a0 [amdgpu] [ 40.964420] dml2_build_mode_programming+0x23d/0x750 [amdgpu] [ 40.964587] dml21_validate+0x274/0x770 [amdgpu] [ 40.964761] ? srso_alias_return_thunk+0x5/0x7f [ 40.964763] ? resource_append_dpp_pipes_for_plane_composition+0x27c/0x3b0 [amdgpu] [ 40.964942] dml2_validate+0x504/0x750 [amdgpu] [ 40.965117] ? dml21_copy+0x95/0xb0 [amdgpu] [ 40.965291] ? srso_alias_return_thunk+0x5/0x7f [ 40.965295] dcn401_validate_bandwidth+0x4e/0x70 [amdgpu] [ 40.965491] update_planes_and_stream_state+0x38d/0x5c0 [amdgpu] [ 40.965672] update_planes_and_stream_v3+0x52/0x1e0 [amdgpu] [ 40.965845] ? srso_alias_return_thunk+0x5/0x7f [ 40.965849] dc_update_planes_and_stream+0x71/0xb0 [amdgpu] Fix this by adding a guard for checking cursor width before triggering the size calculation. Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 89 ++++++++++--------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c54f1af1845c..cbecdc9f253a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7247,10 +7247,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Cursor Support Check */ mode_lib->ms.support.CursorSupport = true; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { - if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) mode_lib->ms.support.CursorSupport = false; - } } } @@ -8111,27 +8110,31 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool cursor_not_enough_urgent_latency_hiding = 0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->ms.UrgLatency, + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } - // output - &mode_lib->ms.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ @@ -10608,31 +10611,33 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us; + double line_time_us = 0.0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, + line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->mp.UrgentLatency, - - // output - &mode_lib->mp.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; CalculateUrgentBurstFactor( From c9875d0a789060facc274dee0d4eb6500d471772 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 1 Aug 2024 10:47:16 +0800 Subject: [PATCH 0513/1523] drm/amdgpu: add golden setting for gc v12 Adding Manual GDB golden setting for gc v12 revision 0 ASIC. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 29b3bf1b29b3..0a71e216a7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3481,6 +3487,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3521,6 +3545,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) From 671af06690e7f79db51b475a35c3b2619f345abc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 2 Aug 2024 10:11:37 +0800 Subject: [PATCH 0514/1523] drm/amdgpu: remove RAS unused paramter 'err_addr' - amdgpu_ras_error_statistic_ue_count() - amdgpu_ras_error_statistic_ce_count() - amdgpu_ras_error_statistic_de_count() The parameter 'err_addr' is no longer used since following patch. Fixes: a7e8467fbeee ("drm/amdgpu: Remove unused code") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 14 +++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 18 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 18 ++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 6 +++--- 9 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 19158cc30f31..929095a2e088 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -453,13 +453,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er switch (type) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_DEFERRED: - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 2542bd7aa7c7..18ee60378727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -396,7 +396,6 @@ static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_err_data *err_data) { - struct ras_err_addr err_addr; struct amdgpu_smuio_mcm_config_info mcm_info; struct mca_bank_node *node, *tmp; struct mca_bank_entry *entry; @@ -421,27 +420,20 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_r continue; memset(&mcm_info, 0, sizeof(mcm_info)); - memset(&err_addr, 0, sizeof(err_addr)); mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; - if (blk == AMDGPU_RAS_BLOCK__UMC) { - err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; - err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; - err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; - } - if (type == AMDGPU_MCA_ERROR_TYPE_UE) { amdgpu_ras_error_statistic_ue_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } else { if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) amdgpu_ras_error_statistic_de_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); else amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } amdgpu_mca_bank_set_remove_node(mca_set, node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 16da939a8406..61a2f386d9fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1223,11 +1223,11 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; amdgpu_ras_error_statistic_de_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->de_count); + &err_info->mcm_info, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ce_count); + &err_info->mcm_info, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ue_count); + &err_info->mcm_info, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -4618,8 +4618,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d } int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4640,8 +4640,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4662,8 +4662,8 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9625e5c92413..669720a9c60a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -572,12 +572,6 @@ struct ras_fs_data { char debugfs_name[32]; }; -struct ras_err_addr { - uint64_t err_status; - uint64_t err_ipid; - uint64_t err_addr; -}; - struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; @@ -939,14 +933,14 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, const struct aca_info *aca_info, void *data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 821ba2309dec..7de449fae1e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1389,10 +1389,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c4832a5725c3..8455fda750a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4075,8 +4075,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 621761a17ac7..915203b91c5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -670,8 +670,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index cb7fedb34fa6..c77889040760 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2243,7 +2243,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 0e6c3ce3ea8f..1a8ea834efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -157,9 +157,9 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, &de_count, umc_v12_0_is_deferred_error); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count); return 0; } From 434967aadbbbe3ad9103cc29e9a327de20fdba01 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 7 Mar 2024 19:04:31 +0000 Subject: [PATCH 0515/1523] drm/amdgpu: Forward soft recovery errors to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we discussed before[1], soft recovery should be forwarded to userspace, or we can get into a really bad state where apps will keep submitting hanging command buffers cascading us to a hard reset. 1: https://lore.kernel.org/all/bf23d5ed-9a6b-43e7-84ee-8cbfd0d60f18@froggi.es/ Signed-off-by: Joshua Ashton Reviewed-by: Marek Olšák Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..908e13455152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) From 65f6e9f7e1319119096a6dc93e62894e4375b578 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 19 Jul 2024 20:01:01 -0400 Subject: [PATCH 0516/1523] drm/amd/display: Print Pcon FRL Link BW in Debug Message Under autonomous mode, source reads dpcd DP_PCON_HDMI_POST_FRL_STATUS for the frl link status. Without dsc passthrough, it serves as bw bottleneck on the entire link, compared with the dp link from source to the converter where dsc is available. Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index f3b6d8936f91..59c9dde10885 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1166,6 +1166,8 @@ static void get_active_converter_info( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, hdmi_encoded_link_bw); + DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); } if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) From dd3e296289346109d41c6317124f51aee0269c25 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 1 Aug 2024 16:26:27 +0800 Subject: [PATCH 0517/1523] drm/amdgpu: update bad state check in GPU recovery Return RMA status without message print. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3a43754e7f10..f595ba6ba7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5498,7 +5498,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_is_rma(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { From b3a3c9a6b27b68310f1d4d486f47556808c7c855 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 31 Jul 2024 15:54:27 +0800 Subject: [PATCH 0518/1523] drm/amdgpu: report bad status in GPU recovery Instead of printing GPU reset failed. v2: add check for reset_context->src. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f595ba6ba7c7..29a4adee9286 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5876,8 +5876,14 @@ skip_hw_reset: tmp_adev->asic_reset_res = 0; if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + /* bad news, how to tell it to userspace ? + * for ras error, we should report GPU bad status instead of + * reset failure + */ + if (reset_context->src != AMDGPU_RESET_SRC_RAS || + !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", + atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); From 5aacf8917fde5bc2a640f3cd49130c0e2e85e726 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Fri, 2 Aug 2024 11:15:11 +0800 Subject: [PATCH 0519/1523] drm/amdgpu: change non-dcc buffer copy configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without setting cpv bit and 7th ib dw, non-dcc buffer copy will have random corruption So set the cpv bit and clear the 7th ib dw for copy non-dcc buffers Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 62ef4a737a56..e2918318600b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1688,8 +1688,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1703,6 +1702,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** From 7b2363e06c0ff4b868e7d768d605a9e656ff61f3 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 23 Jul 2024 17:26:47 -0400 Subject: [PATCH 0520/1523] drm/amd/display: Disable SubVP if Hardware Rotation is Used [Why and How] SubVP is not supported when hardware rotation is being used Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 9331a8fe77c9..9c6397aafd38 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1058,7 +1058,8 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, /* check recout height covers entire otg vactive, and single plane */ if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || - !plane_descriptor->composition.rect_out_height_spans_vactive) { + !plane_descriptor->composition.rect_out_height_spans_vactive || + plane_descriptor->composition.rotation_angle != dml2_rotation_0) { return false; } } From 51d334d6a49629ea03a2dde562d46846eb7d07a0 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Tue, 23 Jul 2024 16:20:15 -0400 Subject: [PATCH 0521/1523] drm/amd/display: Assume 32 bpp cursor in DML21 [Why] Cursor size can change dynamically at runtime without re-validation, so DML should calculate with the max size cursor to cover all cases. Reviewed-by: Dillon Varone Signed-off-by: Joshua Aberback Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/dml21_translation_helper.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 65776602648d..b0de8920f7e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -725,18 +725,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); struct dc_stream_state *stream = context->streams[stream_index]; - if (stream->cursor_attributes.color_format == CURSOR_MODE_MONO) - plane->cursor.cursor_bpp = 2; - else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_1BIT_AND - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - plane->cursor.cursor_bpp = 32; - } else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED) { - plane->cursor.cursor_bpp = 64; - } else - plane->cursor.cursor_bpp = 32; - + plane->cursor.cursor_bpp = 32; plane->cursor.cursor_width = 256; plane->cursor.num_cursors = 1; From b8dc6ca028d9a39196a3a066b9ef2d4a5eca475d Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 23 Jul 2024 15:54:23 -0400 Subject: [PATCH 0522/1523] drm/amd/display: Force enable 3DLUT DMA check for dcn401 in DML [WHY] Currently TR0 (trip 0) is not properly budgeting for urgent latency in DML2.1. This results in overly aggressive prefetch schedules that are vulnerable to request return jitter, resulting in severe underflow at the start of the frame. [HOW] Forcing 3DLUT DMA check to enable causes urgent latency to be budgeted properly into the prefetch schedule, avoiding the vulnerability. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 6 ++++-- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 1 + .../drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b0de8920f7e7..006667aa961b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -816,6 +816,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { plane->tdlut.setup_for_tdlut = true; + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: @@ -825,6 +826,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; break; } + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; @@ -833,8 +835,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; } - } else - plane->tdlut.setup_for_tdlut = false; + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; plane->dynamic_meta_data.enable = false; plane->dynamic_meta_data.lines_before_active_required = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 023325e8f6e2..0f944fcfd5a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -236,6 +236,7 @@ struct dml2_configuration_options { bool use_clock_dc_limits; bool gpuvm_enable; + bool force_tdlut_enable; struct dml2_soc_bb *bb_from_dmub; }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 3e76732ac0dc..ec676d269d33 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -2099,6 +2099,7 @@ static bool dcn401_resource_construct( dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.map_dc_pipes_with_callbacks = true; + dc->dml2_options.force_tdlut_enable = true; resource_init_common_dml2_callbacks(dc, &dc->dml2_options); dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; From be64336307a6c3ee71fe1337c1b9f0495aa83c50 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 26 Jun 2024 17:02:23 +0800 Subject: [PATCH 0523/1523] drm/amd/display: Re-enable panel replay feature [Why & How] Fixed the replay issues and now re-enable the panel replay feature. Reported-by: Arthur Borsboom Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3344 Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b4fbccbf2f29..ec6064d40dbf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4850,18 +4850,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) /* Determine whether to enable Replay support by default. */ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { -/* - * Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344 - * case IP_VERSION(3, 1, 4): - * case IP_VERSION(3, 1, 5): - * case IP_VERSION(3, 1, 6): - * case IP_VERSION(3, 2, 0): - * case IP_VERSION(3, 2, 1): - * case IP_VERSION(3, 5, 0): - * case IP_VERSION(3, 5, 1): - * replay_feature_enabled = true; - * break; - */ + case IP_VERSION(3, 1, 4): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): + replay_feature_enabled = true; + break; + default: replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; break; From 78c508a1c162c90c48e12faa62bdab8b90e6f17c Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 25 Jul 2024 11:45:36 -0400 Subject: [PATCH 0524/1523] drm/amd/display: Add clock control callbacks [why & how] Add clock source selection an control functions based on spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 281 ++++++++++++++++-- 1 file changed, 252 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 13e3d64ee2f0..b4f441c405bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,13 +41,22 @@ #define DC_LOGGER \ dccg->ctx->logger -enum physymclk_fe_source { - PHYSYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A - PHYSYMCLK_FE_SYMCLK_B, - PHYSYMCLK_FE_SYMCLK_C, - PHYSYMCLK_FE_SYMCLK_D, - PHYSYMCLK_FE_SYMCLK_E, - PHYSYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +enum symclk_fe_source { + SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + SYMCLK_FE_SYMCLK_B, + SYMCLK_FE_SYMCLK_C, + SYMCLK_FE_SYMCLK_D, + SYMCLK_FE_SYMCLK_E, + SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk_be_source { + SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1 + SYMCLK_BE_DPIACLK_810 = 4, + SYMCLK_BE_DPIACLK_162 = 5, + SYMCLK_BE_DPIACLK_540 = 6, + SYMCLK_BE_DPIACLK_270 = 7, + SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software }; enum physymclk_source { @@ -252,7 +261,7 @@ static void dccg35_set_physymclk_rcg( } } -static void dccg35_set_physymclk_fe_rcg( +static void dccg35_set_symclk_fe_rcg( struct dccg *dccg, int inst, bool enable) @@ -289,6 +298,45 @@ static void dccg35_set_physymclk_fe_rcg( } } +static void dccg35_set_symclk_be_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* TBD add symclk_be in rcg control bits */ + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) { @@ -665,6 +713,42 @@ static void dccg35_set_physymclk_src_new( } } +static void dccg35_set_symclk_be_src_new( + struct dccg *dccg, + enum symclk_be_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + } +} + static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, int symclk_fe_inst, int symclk_be_inst) @@ -699,35 +783,35 @@ static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, return 0; } -static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum physymclk_fe_source src, int inst) +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); switch (inst) { case 0: REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, - SYMCLKA_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKA_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 1: REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, - SYMCLKB_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKB_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 2: REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, - SYMCLKC_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKC_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 3: REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, - SYMCLKD_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKD_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; case 4: REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, - SYMCLKE_FE_EN, (src == PHYSYMCLK_FE_REFCLK) ? 0 : 1, - SYMCLKE_FE_SRC_SEL, (src == PHYSYMCLK_FE_REFCLK) ? 0 : src); + SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); break; } } @@ -804,9 +888,9 @@ static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) static void dccg35_enable_symclk_fe_new( struct dccg *dccg, int inst, - enum physymclk_fe_source src) + enum symclk_fe_source src) { - dccg35_set_physymclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_rcg(dccg, inst, false); dccg35_set_symclk_fe_src_new(dccg, src, inst); } @@ -814,17 +898,17 @@ static void dccg35_disable_symclk_fe_new( struct dccg *dccg, int inst) { - dccg35_set_symclk_fe_src_new(dccg, PHYSYMCLK_FE_REFCLK, inst); - dccg35_set_physymclk_fe_rcg(dccg, inst, true); + dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst); + dccg35_set_symclk_fe_rcg(dccg, inst, true); } static void dccg35_enable_symclk_be_new( struct dccg *dccg, int inst, - enum physymclk_source src) + enum symclk_be_source src) { - dccg35_set_physymclk_rcg(dccg, inst, false); - dccg35_set_physymclk_src_new(dccg, inst, src); + dccg35_set_symclk_be_rcg(dccg, inst, false); + dccg35_set_symclk_be_src_new(dccg, inst, src); } static void dccg35_disable_symclk_be_new( @@ -834,7 +918,7 @@ static void dccg35_disable_symclk_be_new( int i; /* Switch from functional clock to refclock */ - dccg35_set_physymclk_src_new(dccg, inst, PHYSYMCLK_REFCLK); + dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK); /* Check if any other SE connected LE and disable them */ for (i = 0; i < 4; i++) { @@ -845,7 +929,7 @@ static void dccg35_disable_symclk_be_new( } } /* Safe to RCG SYMCLK*/ - dccg35_set_physymclk_rcg(dccg, inst, true); + dccg35_set_symclk_be_rcg(dccg, inst, true); } static void dccg35_enable_symclk32_se_new( @@ -2041,6 +2125,132 @@ static void dccg35_dpp_root_clock_control_cb( dccg35_disable_dpp_clk_new(dccg, dpp_inst); } +static void dccg35_enable_symclk32_se_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source phyd32clk) +{ + dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk); +} + +static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_se_new(dccg, inst); +} + +static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_dtbclk_p_src_cb( + struct dccg *dccg, + enum streamclk_source src, + uint32_t inst) +{ + if (src == DTBCLK0) + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst); + else + dccg35_disable_dtbclk_p_new(dccg, inst); +} + +static void dccg35_set_dtbclk_dto_cb( + struct dccg *dccg, + const struct dtbclk_dto_params *params) +{ + /* set_dtbclk_p_src typ called earlier to switch to DTBCLK + * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock + */ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* DTO Output Rate / Pixel Rate = 1/4 */ + int req_dtbclk_khz = params->pixclk_khz / 4; + + if (params->ref_dtbclk_khz && req_dtbclk_khz) { + uint32_t modulo, phase; + + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst); + + // phase / modulo = dtbclk / dtbclk ref + modulo = params->ref_dtbclk_khz * 1000; + phase = req_dtbclk_khz * 1000; + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 1); + + REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1, + 1, 100); + + /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */ + dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1); + + /* The recommended programming sequence to enable DTBCLK DTO to generate + * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should + * be set only after DTO is enabled + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + PIPE_DTO_SRC_SEL[params->otg_inst], 2); + } else { + dccg35_disable_dtbclk_p_new(dccg, params->otg_inst); + + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0, + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + } +} + +static void dccg35_disable_dscclk_cb(struct dccg *dccg, + int inst) +{ + dccg35_disable_dscclk_new(dccg, inst); +} + +static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst) +{ + dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3); +} + +static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +{ + /* Switch to functional clock if already not selected */ + dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst); + + dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst); + +} + +static void dccg35_disable_symclk_se_cb( + struct dccg *dccg, + uint32_t stream_enc_inst, + uint32_t link_enc_inst) +{ + dccg35_disable_symclk_fe_new(dccg, stream_enc_inst); + + /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ +} + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2070,6 +2280,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, + }; struct dccg *dccg35_create( @@ -2091,7 +2302,7 @@ struct dccg *dccg35_create( (void)&dccg35_set_symclk32_se_rcg; (void)&dccg35_set_symclk32_le_rcg; (void)&dccg35_set_physymclk_rcg; - (void)&dccg35_set_physymclk_fe_rcg; + (void)&dccg35_set_symclk_fe_rcg; (void)&dccg35_set_dtbclk_p_rcg; (void)&dccg35_set_dppclk_rcg; (void)&dccg35_set_dpstreamclk_rcg; @@ -2129,6 +2340,18 @@ struct dccg *dccg35_create( (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; (void)&dccg35_update_dpp_dto_cb; (void)&dccg35_dpp_root_clock_control_cb; + (void)&dccg35_disable_symclk_se_cb; + (void)&dccg35_enable_symclk_se_cb; + (void)&dccg35_enable_dscclk_cb; + (void)&dccg35_disable_dscclk_cb; + (void)&dccg35_set_dtbclk_dto_cb; + (void)&dccg35_set_dtbclk_p_src_cb; + (void)&dccg35_set_symclk32_le_root_clock_gating; + (void)&dccg35_disable_symclk32_le_cb; + (void)&dccg35_set_symclk_be_src_new; + (void)&dccg35_set_symclk_be_rcg; + (void)&dccg35_enable_symclk32_se_cb; + (void)&dccg35_disable_symclk32_se_cb; base = &dccg_dcn->base; base->ctx = ctx; From 41cb5a5e8480fa41ee452ee60e620a4c1de3c323 Mon Sep 17 00:00:00 2001 From: Fudong Wang Date: Thu, 25 Jul 2024 16:48:25 +0800 Subject: [PATCH 0525/1523] drm/amd/display: skip crtc power down when ips switch [Why & How] Add a dc debug option to keep crtc on when ips switch. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Fudong Wang Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 95b0413e9f17..b6a5ea93fd45 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -979,6 +979,7 @@ struct dc_debug_options { bool disable_z10; bool enable_z9_disable_interface; bool psr_skip_crtc_disable; + uint32_t ips_skip_crtc_disable_mask; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; uint32_t fixed_vs_aux_delay_config_wa; From 8151a6c13111b465dbabe07c19f572f7cbd16fef Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 12 Jul 2024 16:30:03 -0400 Subject: [PATCH 0526/1523] drm/amd/display: Skip Recompute DSC Params if no Stream on Link [why] Encounter NULL pointer dereference uner mst + dsc setup. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 [how] dsc recompute should be skipped if no mode change detected on the new request. If detected, keep checking whether the stream is already on current state or not. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..915eb2c08ece 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1270,6 +1270,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ From 329ee7087bc9862977fb6e431b8b0ea5e7e261b0 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Thu, 25 Jul 2024 16:09:35 -0400 Subject: [PATCH 0527/1523] drm/amd/display: Address coverity change [Why] Coverity picks up a defect with regards to array underflow. [How] Address coverity issue as recommended. Reviewed-by: Leo Ma Signed-off-by: Chris Park Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index cce425dd62d2..01ea3a31e54d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -517,10 +517,12 @@ static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr if (!use_hpo_encoder) continue; - otg_master->clock_source->funcs->program_pix_clk( + if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED) + otg_master->clock_source->funcs->program_pix_clk( otg_master->clock_source, &otg_master->stream_res.pix_clk_params, - dccg->ctx->dc->link_srv->dp_get_encoding_format(&otg_master->link_config.dp_link_settings), + dccg->ctx->dc->link_srv->dp_get_encoding_format( + &otg_master->link_config.dp_link_settings), &otg_master->pll_settings); } } From 00f06855f6e68954f67616cd5560fca25c755eba Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Fri, 26 Jul 2024 00:24:55 -0400 Subject: [PATCH 0528/1523] drm/amd/display: Add clock control callbacks [why & how] Add clock source selection control functions based on spec Reviewed-by: Muhammad Ahmed Signed-off-by: Hansen Dsouza Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 153 +++++++++++------- 1 file changed, 99 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index b4f441c405bb..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -980,6 +980,21 @@ static void dccg35_disable_symclk32_le_new( dccg35_set_symclk32_le_rcg(dccg, inst, true); } +static void dccg35_enable_physymclk_new(struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, src, inst); +} + +static void dccg35_disable_physymclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst); + dccg35_set_physymclk_rcg(dccg, inst, true); +} + static void dccg35_enable_dpp_clk_new( struct dccg *dccg, int inst, @@ -2138,11 +2153,62 @@ static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) dccg35_disable_symclk32_se_new(dccg, inst); } +static void dccg35_enable_symclk32_le_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source src) +{ + dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src); +} + static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) { dccg35_disable_symclk32_le_new(dccg, inst); } +static void dccg35_set_symclk32_le_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_physymclk_cb( + struct dccg *dccg, + int inst, + enum physymclk_clock_source clk_src, + bool force_enable) +{ + /* force_enable = 0 indicates we can switch to ref clock */ + if (force_enable) + dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_physymclk_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* Redundant RCG already done in disable_physymclk + * power_on = 1 indicates we need to ungate + */ + if (power_on) + dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK); + else + dccg35_disable_physymclk_new(dccg, inst); +} + static void dccg35_set_symclk32_le_root_clock_gating( struct dccg *dccg, int inst, @@ -2251,6 +2317,37 @@ static void dccg35_disable_symclk_se_cb( /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ } +static const struct dccg_funcs dccg35_funcs_new = { + .update_dpp_dto = dccg35_update_dpp_dto_cb, + .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, + .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, + .dccg_init = dccg35_init, + .set_dpstreamclk = dccg35_set_dpstreamclk_cb, + .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, + .enable_symclk32_se = dccg35_enable_symclk32_se_cb, + .disable_symclk32_se = dccg35_disable_symclk32_se_cb, + .enable_symclk32_le = dccg35_enable_symclk32_le_cb, + .disable_symclk32_le = dccg35_disable_symclk32_le_cb, + .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb, + .set_physymclk = dccg35_set_physymclk_cb, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb, + .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb, + .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, + .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, + .otg_add_pixel = dccg31_otg_add_pixel, + .otg_drop_pixel = dccg31_otg_drop_pixel, + .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, + .disable_dsc = dccg35_disable_dscclk_cb, + .enable_dsc = dccg35_enable_dscclk_cb, + .set_pixel_rate_div = dccg35_set_pixel_rate_div, + .get_pixel_rate_div = dccg35_get_pixel_rate_div, + .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync, + .set_valid_pixel_rate = dccg35_set_valid_pixel_rate, + .enable_symclk_se = dccg35_enable_symclk_se_cb, + .disable_symclk_se = dccg35_disable_symclk_se_cb, + .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb, +}; + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -2296,62 +2393,10 @@ struct dccg *dccg35_create( BREAK_TO_DEBUGGER(); return NULL; } - - /* Temporary declaration to handle unused static functions */ - (void)&dccg35_set_dsc_clk_rcg; - (void)&dccg35_set_symclk32_se_rcg; - (void)&dccg35_set_symclk32_le_rcg; - (void)&dccg35_set_physymclk_rcg; - (void)&dccg35_set_symclk_fe_rcg; - (void)&dccg35_set_dtbclk_p_rcg; - (void)&dccg35_set_dppclk_rcg; - (void)&dccg35_set_dpstreamclk_rcg; - (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_set_dsc_clk_src_new; - (void)&dccg35_set_symclk32_se_src_new; - (void)&dccg35_is_symclk32_se_src_functional_le_new; - (void)&dccg35_set_symclk32_le_src_new; - (void)&dcn35_set_dppclk_src_new; - (void)&dccg35_set_dtbclk_p_src_new; - (void)&dccg35_set_dpstreamclk_src_new; - (void)&dccg35_set_physymclk_src_new; - (void)&dccg35_is_symclk_fe_src_functional_be; - (void)&dccg35_set_symclk_fe_src_new; - (void)&dccg35_is_fe_rcg; - (void)&dccg35_is_symclk32_se_rcg; - (void)&dccg35_enable_symclk_fe_new; - (void)&dccg35_disable_symclk_fe_new; - (void)&dccg35_enable_symclk_be_new; (void)&dccg35_disable_symclk_be_new; - (void)&dccg35_enable_symclk32_se_new; - (void)&dccg35_disable_symclk32_se_new; - (void)&dccg35_enable_symclk32_le_new; - (void)&dccg35_disable_symclk32_le_new; - (void)&dccg35_enable_dpp_clk_new; - (void)&dccg35_enable_dpp_clk_new; - (void)&dccg35_disable_dscclk_new; - (void)&dccg35_enable_dscclk_new; - (void)&dccg35_enable_dtbclk_p_new; - (void)&dccg35_disable_dtbclk_p_new; - (void)&dccg35_enable_dpstreamclk_new; - (void)&dccg35_disable_dpstreamclk_new; - (void)&dccg35_set_dpstreamclk_cb; - (void)&dccg35_dpp_root_clock_control_cb; - (void)&dccg35_set_dpstreamclk_root_clock_gating_cb; - (void)&dccg35_update_dpp_dto_cb; - (void)&dccg35_dpp_root_clock_control_cb; - (void)&dccg35_disable_symclk_se_cb; - (void)&dccg35_enable_symclk_se_cb; - (void)&dccg35_enable_dscclk_cb; - (void)&dccg35_disable_dscclk_cb; - (void)&dccg35_set_dtbclk_dto_cb; - (void)&dccg35_set_dtbclk_p_src_cb; (void)&dccg35_set_symclk32_le_root_clock_gating; - (void)&dccg35_disable_symclk32_le_cb; - (void)&dccg35_set_symclk_be_src_new; - (void)&dccg35_set_symclk_be_rcg; - (void)&dccg35_enable_symclk32_se_cb; - (void)&dccg35_disable_symclk32_se_cb; + (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; From e80f8f491df873ea2e07c941c747831234814612 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 25 Jul 2024 18:42:21 -0400 Subject: [PATCH 0529/1523] drm/amd/display: Revert Avoid overflow assignment This reverts commit a15268787b79 ("drm/amd/display: Avoid overflow assignment in link_dp_cts") Due to regression causing DPMS hang. Reviewed-by: Alex Hung Signed-off-by: Gabe Teeger Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 2 +- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 +-- drivers/gpu/drm/amd/display/include/dpcd_defs.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 95c275bf649b..519c3df78ee5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -727,7 +727,7 @@ struct dp_audio_test_data_flags { struct dp_audio_test_data { struct dp_audio_test_data_flags flags; - uint32_t sampling_rate; + uint8_t sampling_rate; uint8_t channel_count; uint8_t pattern_type; uint8_t pattern_period[8]; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 32d5a4b14333..df3781081da7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -775,8 +775,7 @@ bool dp_set_test_pattern( core_link_read_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); - if (pattern <= PHY_TEST_PATTERN_END_DP11) - training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; + training_pattern.v1_3.LINK_QUAL_PATTERN_SET = pattern; core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET, &training_pattern.raw, sizeof(training_pattern)); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index c246235e4afe..aee5170f5fb2 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -76,7 +76,6 @@ enum dpcd_phy_test_patterns { PHY_TEST_PATTERN_D10_2, PHY_TEST_PATTERN_SYMBOL_ERROR, PHY_TEST_PATTERN_PRBS7, - PHY_TEST_PATTERN_END_DP11 = PHY_TEST_PATTERN_PRBS7, PHY_TEST_PATTERN_80BIT_CUSTOM,/* For DP1.2 only */ PHY_TEST_PATTERN_CP2520_1, PHY_TEST_PATTERN_CP2520_2, From a0fcd3df4591043d447bb08919eed2ce68fbdb5b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 25 Jul 2024 14:18:10 -0400 Subject: [PATCH 0530/1523] drm/amd/display: Add DML2.1 option to disable DRR clamped P-State Strategies [WHY & HOW] When DRR is active with variable refresh rate, add the ability to block DRR clamped P-State strategies (such as SubVP). Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 4 +++- .../gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h | 1 + .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 7 +++++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b6a5ea93fd45..49725f06a2d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -511,6 +511,7 @@ enum in_game_fams_config { INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams INGAME_FAMS_DISABLE, // disable in-game fams INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display + INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies }; /** diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 41ecf00ed196..d35dd507cb9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -66,7 +66,9 @@ static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_contex disable_fams2; pmo_options->disable_fams2 = disable_fams2; - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming; + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index a824ce56c54e..1c773bbb9992 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -74,6 +74,7 @@ struct dml2_pmo_options { bool disable_drr_var; bool disable_drr_clamped; bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; bool disable_fams2; bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ bool disable_dyn_odm; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 9c6397aafd38..06e786995390 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1441,8 +1441,11 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled || - (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable))) { + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && From 130376ab8300ef06231ebac4db147f06d601d53c Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 28 Jul 2024 14:55:56 -0400 Subject: [PATCH 0531/1523] drm/amd/display: 3.2.295 This version brings along following fixes: - Clean up some files style problems - Program the DET segment when initializing pipes in dcn10_hwseq - Fix overlay with pre-blend color processing - Disable SubVP if Hardware Rotation is Used - Fix few things in DML - Re-enable panel replay feature - Fix null pointer dereference under mst+dsc setup Acked-by: Tom Chung Signed-off-by: Aric Cyr Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 49725f06a2d5..7873daf72608 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.294" +#define DC_VER "3.2.295" #define MAX_SURFACES 3 #define MAX_PLANES 6 From aa94b623cb9233b91ed342dd87ecd62e56ff4938 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Sat, 3 Aug 2024 21:30:18 +0530 Subject: [PATCH 0532/1523] drm/amdgpu: Add address alignment support to DCC buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add address alignment support to the DCC VRAM buffers. v2: - adjust size based on the max_texture_channel_caches values only for GFX12 DCC buffers. - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only for DCC buffers. - roundup non power of two DCC buffer adjusted size to nearest power of two number as the buddy allocator does not support non power of two alignments. This applies only to the contiguous DCC buffers. v3:(Alex) - rewrite the max texture channel caches comparison code in an algorithmic way to determine the alignment size. v4:(Alex) - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c and add a new gmc func callback for dcc alignment. If the callback is non-NULL, call it to get the alignment, otherwise, use the default. v5:(Alex) - Set the Alignment to a default value if the callback doesn't exist. - Add the callback to amdgpu_gmc_funcs. v6: - Fix checkpatch warning reported by Intel CI. v7:(Christian) - remove the AMDGPU_GEM_CREATE_GFX12_DCC flag and keep a flag that checks the BO pinning and for a specific hw generation. v8:(Christian) - move this check into gmc_v12_0_get_dcc_alignment. v9: - Fix 32bit build errors Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Reviewed-by: Frank Min Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 35 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 18 ++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..b2c94f12da9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,18 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +533,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +568,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..26efce9aa410 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) From 86598c3819fdc70e59d28221bfa7bc36e9f5777e Mon Sep 17 00:00:00 2001 From: Frank Min Date: Thu, 1 Aug 2024 12:20:18 +0800 Subject: [PATCH 0533/1523] drm/amdgpu: correct sdma7 max dw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit correct sdma7 max dw into 8 Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index e2918318600b..cfd8e183ad50 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1730,7 +1730,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, From 57b09a168ffe88a4c088e8f7ca5de2ffbb8fefa3 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 11:12:24 +0800 Subject: [PATCH 0534/1523] drm/amd/pm: fix unchecked return value warning for vega10_hwmgr This resolves the unchecked return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 6e717ddbb029..9ace863792d4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -2934,9 +2934,7 @@ static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) } } - vega10_enable_smc_features(hwmgr, false, feature_mask); - - return 0; + return vega10_enable_smc_features(hwmgr, false, feature_mask); } /** From c0277b9d7c2ee9ee5dbc948548984f0fbb861301 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 10:38:37 +0800 Subject: [PATCH 0535/1523] drm/amdgpu: fix unchecked return value warning for amdgpu_gfx This resolves the unchecded return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..5c9f36f01db0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -860,8 +860,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r int r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (r) + return r; + } r = amdgpu_ras_block_late_init(adev, ras_block); if (r) @@ -1005,7 +1008,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ pr_err("critical bug! too many kiq readers\n"); goto failed_unlock; } - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1071,7 +1077,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 } spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_wreg(ring, reg, v); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1107,6 +1116,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 failed_undo: amdgpu_ring_undo(ring); +failed_unlock: spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); From 92549780e32718d64a6d08bbbb3c6fffecb541c7 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 1 Aug 2024 13:47:55 +0800 Subject: [PATCH 0536/1523] drm/amdgpu: fix unchecked return value warning for amdgpu_atombios This resolves the unchecded return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 35 ++++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 7dc102f0bc1d..0c8975ac5af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,8 +1018,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1039,8 +1040,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1058,8 +1060,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1070,8 +1073,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1113,8 +1117,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1211,8 +1216,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1221,8 +1227,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; From 46142cc1b9272d664e0258e105b537735bfeeccc Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 5 Aug 2024 19:17:04 +0530 Subject: [PATCH 0537/1523] drm/amdgpu: Add DCC GFX12 flag to enable address alignment We require this flag AMDGPU_GEM_CREATE_GFX12_DCC or any other kernel level GFX12 DCC flag to differentiate the DCC buffers and other pinned display buffers(which has TTM_PL_FLAG_CONTIGUOUS enabled). If we use the TTM_PL_FLAG_CONTIGUOUS flag for DCC buffers, we may over allocate for all the pinned display buffers unnecessarily that leads to memory allocation failure. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index b2c94f12da9e..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -512,7 +512,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - if (adev->gmc.gmc_funcs->get_dcc_alignment) + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); remaining_size = (u64)vres->base.size; From bd44ca3de49cc1badcff7a96010fa2c64f04868c Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 6 Aug 2024 11:56:45 -0400 Subject: [PATCH 0538/1523] dma-debug: avoid deadlock between dma debug vs printk and netconsole Currently the dma debugging code can end up indirectly calling printk under the radix_lock. This happens when a radix tree node allocation fails. This is a problem because the printk code, when used together with netconsole, can end up inside the dma debugging code while trying to transmit a message over netcons. This creates the possibility of either a circular deadlock on the same CPU, with that CPU trying to grab the radix_lock twice, or an ABBA deadlock between different CPUs, where one CPU grabs the console lock first and then waits for the radix_lock, while the other CPU is holding the radix_lock and is waiting for the console lock. The trace captured by lockdep is of the ABBA variant. -> #2 (&dma_entry_hash[i].lock){-.-.}-{2:2}: _raw_spin_lock_irqsave+0x5a/0x90 debug_dma_map_page+0x79/0x180 dma_map_page_attrs+0x1d2/0x2f0 bnxt_start_xmit+0x8c6/0x1540 netpoll_start_xmit+0x13f/0x180 netpoll_send_skb+0x20d/0x320 netpoll_send_udp+0x453/0x4a0 write_ext_msg+0x1b9/0x460 console_flush_all+0x2ff/0x5a0 console_unlock+0x55/0x180 vprintk_emit+0x2e3/0x3c0 devkmsg_emit+0x5a/0x80 devkmsg_write+0xfd/0x180 do_iter_readv_writev+0x164/0x1b0 vfs_writev+0xf9/0x2b0 do_writev+0x6d/0x110 do_syscall_64+0x80/0x150 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #0 (console_owner){-.-.}-{0:0}: __lock_acquire+0x15d1/0x31a0 lock_acquire+0xe8/0x290 console_flush_all+0x2ea/0x5a0 console_unlock+0x55/0x180 vprintk_emit+0x2e3/0x3c0 _printk+0x59/0x80 warn_alloc+0x122/0x1b0 __alloc_pages_slowpath+0x1101/0x1120 __alloc_pages+0x1eb/0x2c0 alloc_slab_page+0x5f/0x150 new_slab+0x2dc/0x4e0 ___slab_alloc+0xdcb/0x1390 kmem_cache_alloc+0x23d/0x360 radix_tree_node_alloc+0x3c/0xf0 radix_tree_insert+0xf5/0x230 add_dma_entry+0xe9/0x360 dma_map_page_attrs+0x1d2/0x2f0 __bnxt_alloc_rx_frag+0x147/0x180 bnxt_alloc_rx_data+0x79/0x160 bnxt_rx_skb+0x29/0xc0 bnxt_rx_pkt+0xe22/0x1570 __bnxt_poll_work+0x101/0x390 bnxt_poll+0x7e/0x320 __napi_poll+0x29/0x160 net_rx_action+0x1e0/0x3e0 handle_softirqs+0x190/0x510 run_ksoftirqd+0x4e/0x90 smpboot_thread_fn+0x1a8/0x270 kthread+0x102/0x120 ret_from_fork+0x2f/0x40 ret_from_fork_asm+0x11/0x20 This bug is more likely than it seems, because when one CPU has run out of memory, chances are the other has too. The good news is, this bug is hidden behind the CONFIG_DMA_API_DEBUG, so not many users are likely to trigger it. Signed-off-by: Rik van Riel Reported-by: Konstantin Ovsepian Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index a6e3792b15f8..d570535342cb 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -416,8 +416,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) From 1c4a057d01f4432704c4dc8842b6e888a91d95df Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 29 Jul 2024 13:57:24 -0700 Subject: [PATCH 0539/1523] dt-bindings: display: panel: samsung,atna45dc02: Document ATNA45DC02 The Samsung ATNA45DC02 panel is an AMOLED eDP panel, similar to the existing ATNA45AF01 and ATNA33XC20 panel but with a higher resolution. Signed-off-by: Rob Clark Acked-by: Conor Dooley Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240729205726.7923-1-robdclark@gmail.com --- .../bindings/display/panel/samsung,atna33xc20.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 5192c93fbd67..87c601bcf20a 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -17,10 +17,13 @@ properties: oneOf: # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel - const: samsung,atna33xc20 - # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel - items: - - const: samsung,atna45af01 - - const: samsung,atna33xc20 + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 + - const: samsung,atna33xc20 enable-gpios: true port: true From 929725bd7eb4eea1f75197d9847f3f1ea5afdad1 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Wed, 31 Jul 2024 19:10:20 +0000 Subject: [PATCH 0540/1523] drm/atomic: allow no-op FB_ID updates for async flips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-space is allowed to submit any property in an async flip as long as the value doesn't change. However we missed one case: as things stand, the kernel rejects no-op FB_ID changes on non-primary planes. Fix this by changing the conditional and skipping drm_atomic_check_prop_changes() only for FB_ID on the primary plane (instead of skipping for FB_ID on any plane). Fixes: 0e26cc72c71c ("drm: Refuse to async flip with atomic prop changes") Signed-off-by: Simon Ser Reviewed-by: André Almeida Tested-by: Xaver Hugl Cc: Alex Deucher Cc: Christian König Cc: Michel Dänzer Cc: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240731191014.878320-1-contact@emersion.fr --- drivers/gpu/drm/drm_atomic_uapi.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7609c798d73d..7936c2023955 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1071,23 +1071,16 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } if (async_flip && - prop != config->prop_fb_id && - prop != config->prop_in_fence_fd && - prop != config->prop_fb_damage_clips) { + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); From 3e7917c0cdad835a5121520fc5686d954b7a61ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2024 08:58:21 +0000 Subject: [PATCH 0541/1523] net: linkwatch: use system_unbound_wq linkwatch_event() grabs possibly very contended RTNL mutex. system_wq is not suitable for such work. Inspired by many noisy syzbot reports. 3 locks held by kworker/0:7/5266: #0: ffff888015480948 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3206 [inline] #0: ffff888015480948 ((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x90a/0x1830 kernel/workqueue.c:3312 #1: ffffc90003f6fd00 ((linkwatch_work).work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3207 [inline] , at: process_scheduled_works+0x945/0x1830 kernel/workqueue.c:3312 #2: ffffffff8fa6f208 (rtnl_mutex){+.+.}-{3:3}, at: linkwatch_event+0xe/0x60 net/core/link_watch.c:276 Reported-by: syzbot Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240805085821.1616528-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/link_watch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8ec35194bfcb..ab150641142a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_wq, &linkwatch_work, 0); + mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else - schedule_delayed_work(&linkwatch_work, delay); + queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } From fbc05142ccdd0061f6d0e489608935943d2984a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:01:40 -0700 Subject: [PATCH 0542/1523] perf tools: Add tools/include/uapi/README Write down the reason why we keep a copy of headers to the README file instead of adding it to every commit messages. Suggested-by: Jani Nikula Original-by: Arnaldo Carvalho de Melo Original-by: Ingo Molnar Signed-off-by: Namhyung Kim --- tools/include/uapi/README | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 tools/include/uapi/README diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 000000000000..7147b1b2cb28 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + From aef21f6b6a4aae648c890e74c2322d10ab267249 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 09:59:26 -0700 Subject: [PATCH 0543/1523] tools/include: Sync uapi/drm/i915_drm.h with the kernel sources To pick up changes from: 0f1bb41bf396 drm/i915: Support replaying GPU hangs with captured context image This should be used to beautify DRM syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Namhyung Kim --- tools/include/uapi/drm/i915_drm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. From a625df3995c31a5d8cf46f2337b207e93bef9bdd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0544/1523] tools/include: Sync uapi/linux/kvm.h with the kernel sources And other arch-specific UAPI headers to pick up changes from: 4b23e0c199b2 KVM: Ensure new code that references immediate_exit gets extra scrutiny 85542adb65ec KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag 6fef518594bc KVM: x86: Add a capability to configure bus frequency for APIC timer 34ff65901735 x86/sev: Use kernel provided SVSM Calling Areas 5dcc1e76144f Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD 9a0d2f4995dd KVM: PPC: Book3S HV: Add one-reg interface for HASHPKEYR register e9eb790b2557 KVM: PPC: Book3S HV: Add one-reg interface for HASHKEYR register 1a1e6865f516 KVM: PPC: Book3S HV: Add one-reg interface for DEXCR register This should be used to beautify KVM syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h diff -u tools/arch/powerpc/include/uapi/asm/kvm.h arch/powerpc/include/uapi/asm/kvm.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/arch/powerpc/include/uapi/asm/kvm.h | 3 ++ tools/arch/x86/include/uapi/asm/kvm.h | 49 +++++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 1 + tools/include/uapi/linux/kvm.h | 17 +++++++- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a..eaeda001784e 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..bf57a824f722 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc0..637efc055145 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; From 8ec9497d3ef34fab216e277eca5035811f06b421 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0545/1523] tools/include: Sync uapi/linux/perf.h with the kernel sources To pick up changes from: 608f6976c309 perf/x86/intel: Support new data source for Lunar Lake This should be used to beautify perf syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Ian Rogers Cc: Adrian Hunter Cc: "Liang, Kan" Cc: linux-perf-users@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ From b9735006762677c2cd794bfcb1463a9a6ed558dd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0546/1523] tools/include: Sync uapi/sound/asound.h with the kernel sources To pick up changes from: f05c1ffc2745 ALSA: pcm: reinvent the stream synchronization ID API This should be used to beautify sound syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/sound/asound.h include/uapi/sound/asound.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: linux-sound@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/sound/asound.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92..8bf7e8a0eb6f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { From ecabb5e6ce54711c28706fc794d77adb3ecd0605 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 5 Aug 2024 11:07:10 +0530 Subject: [PATCH 0547/1523] drm/xe/xe2: Add performance turning changes Update performance tuning according to the hardware spec. Bspec: 72161 Signed-off-by: Shekhar Chauhan Reviewed-by: Sai Teja Pottumuttu Reviewed-by: Akshata Jahagirdar Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240805053710.877119-1-shekhar.chauhan@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_tuning.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index c50643ab4c84..2c8c4d4218db 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) +#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) + #define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) @@ -193,6 +196,7 @@ #define GSCPSMI_BASE XE_REG(0x880c) #define CCCHKNREG1 XE_REG_MCR(0x8828) +#define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) /* Fuse readout registers for GT */ diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3817b7743b0c..faa1bf42e50e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -39,7 +39,8 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }, { XE_RTP_NAME("Tuning: Compression Overfetch"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), + SET(CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), @@ -50,6 +51,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: Stateless compression control"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, {} }; From 4eb0aab6e4434ada240286d934651dfdb2e08301 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Fri, 2 Aug 2024 15:21:27 -0700 Subject: [PATCH 0548/1523] drm/xe/guc: Bump minimum required GuC version to v70.29.2 The VF API version for this release is 1.13.4. Bumping the minimum required GuC version just before force-probe removal allows us to set a baseline for what API features are expected to be available. I.e., at this point there is no need for any version checking in the code before using a feature. Of course, if/when the API is extended in future GuC releases, those new features will need API version checks in the code. Bump the recommended GuC versions to match. Also add numerical comparison helpers to simplify the version number checks. v2: Reword commit message and make comparison helpers GuC specific - review feedback from Daniele, done by JohnH Signed-off-by: Julia Filipchuk Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240802222129.3976212-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_guc.h | 10 ++++++++++ drivers/gpu/drm/xe/xe_uc_fw.c | 27 ++++++++++++++------------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e0bbf98f849d..c3e6b51f7a09 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -11,6 +11,16 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" +/* + * GuC version number components are defined to be only 8-bit size, + * so converting to a 32bit 8.8.8 integer allows simple (and safe) + * numerical comparisons. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5b70d23724c4..6bd1962bce36 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" @@ -105,15 +106,15 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ @@ -309,10 +310,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - /* We don't support GuC releases older than 70.19 */ - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", - release->major, release->minor); + /* We don't support GuC releases older than 70.29.2 */ + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", + release->major, release->minor, release->patch); return -EINVAL; } From 9cc033e07d025dee1fac178c20ba96c56f8d9a91 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Fri, 2 Aug 2024 15:21:28 -0700 Subject: [PATCH 0549/1523] drm/xe/guc: Define GuC version v70.29.2 for BMG UAPI version 1.13.4 Signed-off-by: Julia Filipchuk Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240802222129.3976212-4-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 6bd1962bce36..4bb2a4a80ddc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -106,6 +106,7 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ From 379d9af3f3da2da1bbfa67baf1820c72a080d1f1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 6 Aug 2024 14:51:13 +0800 Subject: [PATCH 0550/1523] selinux: fix potential counting error in avc_add_xperms_decision() The count increases only when a node is successfully added to the linked list. Cc: stable@vger.kernel.org Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Signed-off-by: Zhen Lei Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 32eb67fb3e42..7087cd2b802d 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struct avc_node *node, { struct avc_xperms_decision_node *dest_xpd; - node->ae.xp_node->xp.len++; dest_xpd = avc_xperms_decision_alloc(src->used); if (!dest_xpd) return -ENOMEM; avc_copy_xperms_decision(&dest_xpd->xpd, src); list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head); + node->ae.xp_node->xp.len++; return 0; } From fe992163575b187405899c5abaad8ef6fb828ff6 Mon Sep 17 00:00:00 2001 From: Sarthak Singh Date: Wed, 24 Jul 2024 22:57:06 +0530 Subject: [PATCH 0551/1523] rust: Support latest version of `rust-analyzer` Sets the `sysroot` field in rust-project.json which is now needed in newer versions of rust-analyzer instead of the `sysroot_src` field. Till [1] `rust-analyzer` used to guess the `sysroot` based on the `sysroot_src` at [2]. Now `sysroot` is a required parameter for a `rust-project.json` file. It is required because `rust-analyzer` need it to find the proc-macro server [3]. In the current version of `rust-analyzer` the `sysroot_src` is only used to include the inbuilt library crates (std, core, alloc, etc) [4]. Since we already specify the core library to be included in the `rust-project.json` we don't need to define the `sysroot_src`. Code editors like VS Code try to use the latest version of rust-analyzer (which is updated every week) instead of the version of rust-analyzer that comes with the rustup toolchain (which is updated every six weeks along with the rust version). Without this change `rust-analyzer` is breaking for anyone using VS Code. As they are getting the latest version of `rust-analyzer` with the changes made in [1]. `rust-analyzer` will also start breaking for other developers as they update their rust version (assuming that also updates the rust-analyzer version on their system). This patch should work with every setup as there is no more guess work being done by `rust-analyzer`. [ Lukas, who leads the rust-analyzer team, says: `sysroot_src` is required now if you want to have the sysroot source libraries be loaded. I think we used to infer it as `{sysroot}/lib/rustlib/src/rust/library` before when only the `sysroot` field was given but that was since changed to make it possible in having a sysroot without the standard library sources (that is only have the binaries available). So if you want the library sources to be loaded by rust-analyzer you will have to set that field as well now. - Miguel ] Link: https://github.com/rust-lang/rust-analyzer/pull/17287 [1] Link: https://github.com/rust-lang/rust-analyzer/blob/f372a8a1176ff8dd5f45ab2ddd45f3530db0374f/crates/project-model/src/workspace.rs#L367-L374 [2] Link: https://github.com/rust-lang/rust-analyzer/blob/eeb192b79aeac47b40add66347022af17a74fbaf/crates/project-model/src/sysroot.rs#L180-L192 [3] Link: https://github.com/search?q=repo%3AVeykril%2Frust-analyzer%20src_root()&type=code [4] Tested-by: Dirk Behme Signed-off-by: Sarthak Singh Link: https://rust-for-linux.zulipchat.com/#narrow/stream/291565-Help/topic/How.20to.20rust-analyzer.20correctly.20working Link: https://lore.kernel.org/r/20240724172713.899399-1-sarthak.singh99@gmail.com [ Formatted comment, fixed typo and removed spurious empty line. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- scripts/generate_rust_analyzer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index 1f10f92737f2..6c0644b6090c 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -350,7 +350,7 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ - $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ + $(rustc_sysroot) $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f270c7b0cf34..d2bc63cde8c6 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -145,6 +145,7 @@ def main(): parser.add_argument('--cfgs', action='append', default=[]) parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot", type=pathlib.Path) parser.add_argument("sysroot_src", type=pathlib.Path) parser.add_argument("exttree", type=pathlib.Path, nargs="?") args = parser.parse_args() @@ -154,9 +155,12 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) + # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. + assert args.sysroot in args.sysroot_src.parents + rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), - "sysroot_src": str(args.sysroot_src), + "sysroot": str(args.sysroot), } json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) From 9ba48db9f77ce0001dbb882476fa46e092feb695 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 6 Aug 2024 20:53:31 +0800 Subject: [PATCH 0552/1523] i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume Add the missing geni_icc_disable() before return in geni_i2c_runtime_resume(). Fixes: bf225ed357c6 ("i2c: i2c-qcom-geni: Add interconnect support") Signed-off-by: Gaosheng Cui Reviewed-by: Vladimir Zapolskiy Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 78f43648e9f3..365e37bba0f3 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -992,6 +992,7 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) ret = geni_se_resources_on(&gi2c->se); if (ret) { clk_disable_unprepare(gi2c->core_clk); + geni_icc_disable(&gi2c->se); return ret; } From c7a19018bd557c24072b59088ad2684fd83ea3f4 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Mon, 5 Aug 2024 16:52:00 -0700 Subject: [PATCH 0553/1523] net: dsa: microchip: Fix Wake-on-LAN check to not return an error The wol variable in ksz_port_set_mac_address() is declared with random data, but the code in ksz_get_wol call may not be executed so the WAKE_MAGIC check may be invalid resulting in an error message when setting a MAC address after starting the DSA driver. Fixes: 3b454b6390c3 ("net: dsa: microchip: ksz9477: Add Wake on Magic Packet support") Signed-off-by: Tristram Ha Reviewed-by: Oleksij Rempel Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240805235200.24982-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b074b4bb0629..b120e66d5669 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -3764,6 +3764,11 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, return -EBUSY; } + /* Need to initialize variable as the code to fill in settings may + * not be executed. + */ + wol.wolopts = 0; + ksz_get_wol(ds, dp->index, &wol); if (wol.wolopts & WAKE_MAGIC) { dev_err(ds->dev, From 1ca645a2f74a4290527ae27130c8611391b07dbf Mon Sep 17 00:00:00 2001 From: ZHANG Yuntian Date: Sat, 3 Aug 2024 15:46:51 +0800 Subject: [PATCH 0554/1523] net: usb: qmi_wwan: add MeiG Smart SRM825L Add support for MeiG Smart SRM825L which is based on Qualcomm 315 chip. T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2dee ProdID=4d22 Rev= 4.14 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=6f345e48 C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: ZHANG Yuntian Link: https://patch.msgid.link/D1EB81385E405DFE+20240803074656.567061-1-yt@radxa.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cfda32047cff..4823dbdf5465 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1432,6 +1432,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ + {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From dae5d79a3bcc3101e1e02a1c2999ca74d1efde05 Mon Sep 17 00:00:00 2001 From: Dominik Grzegorzek Date: Tue, 6 Aug 2024 18:30:08 +0300 Subject: [PATCH 0555/1523] drm/xe: Export xe_hw_engine's mmio accessors Export hw engine's mmio accessors. This is in preparation to use these from eudebug code. v2: s/hw_engine_mmio/xe_hw_engine_mmio (Matthew) v3: kernel doc (Matthew) Cc: Matthew Brost Signed-off-by: Dominik Grzegorzek Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240806153009.1081382-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 88 +++++++++++++++++++------------ drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++ 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 403eb1d2d20a..c7c44de7c30f 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -277,8 +277,18 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, - u32 val) +/** + * xe_hw_engine_mmio_write32() - Write engine register + * @hwe: engine + * @reg: register to write into + * @val: desired 32-bit value to write + * + * This function will write val into an engine specific register. + * Forcewake must be held by the caller. + * + */ +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, + struct xe_reg reg, u32 val) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -288,7 +298,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +/** + * xe_hw_engine_mmio_read32() - Read engine register + * @hwe: engine + * @reg: register to read from + * + * This function will read from an engine specific register. + * Forcewake must be held by the caller. + * + * Return: value of the 32-bit register. + */ +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -307,14 +327,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0), + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, @@ -800,7 +820,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, unsigned int dss; u16 group, instance; - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) return; @@ -896,53 +916,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; snapshot->reg.ring_execlist_status = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); snapshot->reg.ring_execlist_status |= val << 32; snapshot->reg.ring_execlist_sq_contents = - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); snapshot->reg.ring_execlist_sq_contents |= val << 32; - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); snapshot->reg.ring_acthd |= val << 32; - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); snapshot->reg.ring_bbaddr |= val << 32; snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd |= val << 32; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); snapshot->reg.ring_start |= val << 32; } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); } snapshot->reg.ring_head = - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); snapshot->reg.ring_mi_mode = - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index d227ffe557eb..022819a4a8eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -78,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class); u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); + #endif From b62ef7e4021f74bb12445987efd8109ccca289c4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 6 Aug 2024 18:30:09 +0300 Subject: [PATCH 0556/1523] drm/xe: Add kernel doc for xe_hw_engine_lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kerneldoc was missing from earlier commit where we exported xe_hw_engine_lookup. Add it. Cc: Dominik Grzegorzek Cc: Mika Kuoppala Cc: Matthew Brost Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: intel-xe@lists.freedesktop.org Signed-off-by: Mika Kuoppala Reviewed-by: Jonathan Cavitt Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240806153009.1081382-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c7c44de7c30f..402dfa748e16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -329,11 +329,11 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), - xe_bo_ggtt_addr(hwe->hwsp)); + xe_bo_ggtt_addr(hwe->hwsp)); xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), - _MASKED_BIT_DISABLE(STOP_RING)); + _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } @@ -1173,6 +1173,16 @@ static const enum xe_engine_class user_to_xe_engine_class[] = { [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, }; +/** + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance + * @xe: xe device + * @eci: engine class and instance + * + * This function will find a hardware engine for given engine + * class and instance. + * + * Return: If found xe_hw_engine pointer, NULL otherwise. + */ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) From eb91c456f3714c336f0812dccab422ec0e72bde4 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Tue, 6 Aug 2024 21:33:51 -0500 Subject: [PATCH 0557/1523] ALSA: hda/realtek: Add Framework Laptop 13 (Intel Core Ultra) to quirks The Framework Laptop 13 (Intel Core Ultra) has an ALC285 that ships in a similar configuration to the ALC295 in previous models. It requires the same quirk for headset detection. Signed-off-by: Dustin L. Howett Cc: Link: https://patch.msgid.link/20240806-alsa-hda-realtek-add-framework-laptop-13-intel-core-ultra-to-quirks-v1-1-42d6ce2dbf14@howett.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1645d21d422f..480e82df7a4c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10678,6 +10678,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. From 264b5b5980061d8c6a6a30c031cdec1179fe2bae Mon Sep 17 00:00:00 2001 From: David Gow Date: Sun, 4 Aug 2024 17:18:47 +0800 Subject: [PATCH 0558/1523] drm/i915: Allow evicting to use the requested placement In commit a78a8da51b36 ("drm/ttm: replace busy placement with flags v6"), the old system of having a separate placement list (for placements which should be used without eviction) and a 'busy' placement list (for placements which should be attempted if eviction is required) was replaced with a new one where placements could be marked 'FALLBACK' (to be attempted if eviction is required) or 'DESIRED' (to be attempted first, but not if eviction is required). i915 had always included the requested placement in the list of 'busy' placements: i.e., the placement could be used either if eviction is required or not. But when the new system was put in place, the requested (first) placement was marked 'DESIRED', so would never be used if eviction became necessary. While a bug in the original commit prevented this flag from working, when this was fixed in 4a0e7b3c ("drm/i915: fix applying placement flag"), it caused long hangs on DG2 systems with small BAR. Don't mark the requested placement DESIRED (or FALLBACK), allowing it to be used in both situations. This matches the old behaviour, and resolves the hangs. Thanks to Justin Brewer for bisecting the issue. Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6") Fixes: 4a0e7b3c3753 ("drm/i915: fix applying placement flag") Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11255 Signed-off-by: David Gow Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240804091851.122186-2-david@davidgow.net (cherry picked from commit 54bf0af90844fbf18f5be3272eda69198dfdb622) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0f..fb848fd8ba15 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { From 787db3bb6ed5cee56fc97fecdd61517d89763f0a Mon Sep 17 00:00:00 2001 From: David Gow Date: Sun, 4 Aug 2024 17:18:48 +0800 Subject: [PATCH 0559/1523] drm/i915: Attempt to get pages without eviction first In commit a78a8da51b36 ("drm/ttm: replace busy placement with flags v6"), __i915_ttm_get_pages was updated to use flags instead of the separate 'busy' placement list. However, the behaviour was subtly changed. Originally, the function would attempt to use the preferred placement without eviction, and give an opportunity to restart the operation before falling back to allowing eviction. This was unintentionally changed, as the preferred placement was not given the TTM_PL_FLAG_DESIRED flag, and so eviction could be triggered in that first pass. This caused thrashing, and a significant performance regression on DG2 systems with small BAR. For example, Minecraft and Team Fortress 2 would drop to single-digit framerates. Restore the original behaviour by marking the initial placement as desired on that first attempt. Also, rework this to use a separate struct ttm_palcement, as the individual placements are marked 'const', so hot-patching the flags is even more dodgy than before. Thanks to Justin Brewer for bisecting this. Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6") Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11255 Signed-off-by: David Gow Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240804091851.122186-3-david@davidgow.net (cherry picked from commit 92653f2a572505adaf7f13f695c1907e71a1dc84) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index fb848fd8ba15..5c72462d1f57 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -778,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -799,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); From e102b5ed6e283a144793cab8fcd95f61d0ddbadb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 6 Aug 2024 13:07:22 +0200 Subject: [PATCH 0560/1523] drm/xe: Fix access_ok check in user_fence_create Check size of the data not size of the pointer. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407300421.IBkAja96-lkp@intel.com/ Fixes: 0fde907da2d5 ("drm/xe: Validate user fence during creation") Cc: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Tejas Upadhyay Reviewed-by: Apoorva Singh Link: https://patchwork.freedesktop.org/patch/msgid/20240806110722.28661-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 3aa6270e5dd7..ca826aeb41ea 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); - if (!access_ok(ptr, sizeof(ptr))) + if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); From e688c220732e518c2eb1639e9ef77d4a9311713c Mon Sep 17 00:00:00 2001 From: Miao Wang Date: Wed, 7 Aug 2024 17:37:11 +0800 Subject: [PATCH 0561/1523] LoongArch: Enable general EFI poweroff method efi_shutdown_init() can register a general sys_off handler named efi_power_off(). Enable this by providing efi_poweroff_required(), like arm and x86. Since EFI poweroff is also supported on LoongArch, and the enablement makes the poweroff function usable for hardwares which lack ACPI S5. We prefer ACPI poweroff rather than EFI poweroff (like x86), so we only require EFI poweroff if acpi_gbl_reduced_hardware or acpi_no_s5 is true. Cc: stable@vger.kernel.org Acked-by: Ard Biesheuvel Signed-off-by: Miao Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/efi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 000825406c1f..2bf86aeda874 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -66,6 +66,12 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } +bool efi_poweroff_required(void) +{ + return efi_enabled(EFI_RUNTIME_SERVICES) && + (acpi_gbl_reduced_hardware || acpi_no_s5); +} + unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; #if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) From 4574815abf43e2bf05643e1b3f7a2e5d6df894f0 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Aug 2024 17:37:11 +0800 Subject: [PATCH 0562/1523] LoongArch: Use accessors to page table entries instead of direct dereference As very well explained in commit 20a004e7b017cce282 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables"), an architecture whose page table walker can modify the PTE in parallel must use READ_ONCE()/ WRITE_ONCE() macro to avoid any compiler transformation. So apply that to LoongArch which is such an architecture, in order to avoid potential problems. Similar to commit edf955647269422e ("riscv: Use accessors to page table entries instead of direct dereference"). Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hugetlb.h | 4 +-- arch/loongarch/include/asm/kfence.h | 6 ++-- arch/loongarch/include/asm/pgtable.h | 50 +++++++++++++++++----------- arch/loongarch/kvm/mmu.c | 8 ++--- arch/loongarch/mm/hugetlbpage.c | 6 ++-- arch/loongarch/mm/init.c | 10 +++--- arch/loongarch/mm/kasan_init.c | 10 +++--- arch/loongarch/mm/pgtable.c | 2 +- 8 files changed, 53 insertions(+), 43 deletions(-) diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index aa44b3fe43dd..5da32c00d483 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -34,7 +34,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t clear; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_val(clear) = (unsigned long)invalid_pte_table; set_pte_at(mm, addr, ptep, clear); @@ -65,7 +65,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - int changed = !pte_same(*ptep, pte); + int changed = !pte_same(ptep_get(ptep), pte); if (changed) { set_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 92636e82957c..da9e93024626 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -53,13 +53,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (WARN_ON(!pte) || pte_none(*pte)) + if (WARN_ON(!pte) || pte_none(ptep_get(pte))) return false; if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~(_PAGE_VALID | _PAGE_PRESENT))); else - set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | (_PAGE_VALID | _PAGE_PRESENT))); preempt_disable(); local_flush_tlb_one(addr); diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 3fbf1f37c58e..85431f20a14d 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define KFENCE_AREA_START (VMEMMAP_END + 1) #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) +#define ptep_get(ptep) READ_ONCE(*(ptep)) +#define pmdp_get(pmdp) READ_ONCE(*(pmdp)) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -147,11 +150,6 @@ static inline int p4d_present(p4d_t p4d) return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void p4d_clear(p4d_t *p4dp) -{ - p4d_val(*p4dp) = (unsigned long)invalid_pud_table; -} - static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)p4d_val(p4d); @@ -159,7 +157,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *p4d = p4dval; + WRITE_ONCE(*p4d, p4dval); +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, __p4d((unsigned long)invalid_pud_table)); } #define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) @@ -193,17 +196,20 @@ static inline int pud_present(pud_t pud) return pud_val(pud) != (unsigned long)invalid_pmd_table; } -static inline void pud_clear(pud_t *pudp) -{ - pud_val(*pudp) = ((unsigned long)invalid_pmd_table); -} - static inline pmd_t *pud_pgtable(pud_t pud) { return (pmd_t *)pud_val(pud); } -#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +static inline void set_pud(pud_t *pud, pud_t pudval) +{ + WRITE_ONCE(*pud, pudval); +} + +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud((unsigned long)invalid_pmd_table)); +} #define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) @@ -231,12 +237,15 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) != (unsigned long)invalid_pte_table; } -static inline void pmd_clear(pmd_t *pmdp) +static inline void set_pmd(pmd_t *pmd, pmd_t pmdval) { - pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); + WRITE_ONCE(*pmd, pmdval); } -#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd((unsigned long)invalid_pte_table)); +} #define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) @@ -314,7 +323,8 @@ extern void paging_init(void); static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); + if (pte_val(pteval) & _PAGE_GLOBAL) { pte_t *buddy = ptep_buddy(ptep); /* @@ -341,8 +351,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ - if (pte_none(*buddy)) - pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + if (pte_none(ptep_get(buddy))) + WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ } } @@ -350,7 +360,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* Preserve global status for the pair */ - if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) set_pte(ptep, __pte(_PAGE_GLOBAL)); else set_pte(ptep, __pte(0)); @@ -603,7 +613,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old = *pmdp; + pmd_t old = pmdp_get(pmdp); pmd_clear(pmdp); diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 2634a9e8d82c..28681dfb4b85 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -714,19 +714,19 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 12222c56cb59..e4068906143b 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -39,11 +39,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { + if (pgd_present(pgdp_get(pgd))) { p4d = p4d_offset(pgd, addr); - if (p4d_present(*p4d)) { + if (p4d_present(p4dp_get(p4d))) { pud = pud_offset(p4d, addr); - if (pud_present(*pud)) + if (pud_present(pudp_get(pud))) pmd = pmd_offset(pud, addr); } } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index bf789d114c2d..8a87a482c8f4 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -141,7 +141,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int huge = pmd_val(*pmd) & _PAGE_HUGE; + int huge = pmd_val(pmdp_get(pmd)) & _PAGE_HUGE; if (huge) vmemmap_verify((pte_t *)pmd, node, addr, next); @@ -173,7 +173,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pud) panic("%s: Failed to allocate memory\n", __func__); @@ -184,7 +184,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pmd) panic("%s: Failed to allocate memory\n", __func__); @@ -195,7 +195,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *pte; pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -216,7 +216,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ptep = populate_kernel_pte(addr); - if (!pte_none(*ptep)) { + if (!pte_none(ptep_get(ptep))) { pte_ERROR(*ptep); return; } diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index c608adc99845..427d6b1aec09 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -105,7 +105,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (__pmd_none(early, READ_ONCE(*pmdp))) { + if (__pmd_none(early, pmdp_get(pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); if (!early) @@ -118,7 +118,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (__pud_none(early, READ_ONCE(*pudp))) { + if (__pud_none(early, pudp_get(pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); if (!early) @@ -131,7 +131,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) { - if (__p4d_none(early, READ_ONCE(*p4dp))) { + if (__p4d_none(early, p4dp_get(p4dp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); if (!early) @@ -154,7 +154,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && __pte_none(early, ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, @@ -166,7 +166,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, do { next = pmd_addr_end(addr, end); kasan_pte_populate(pmdp, addr, next, node, early); - } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, pmdp_get(pmdp))); } static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index bda018150000..eb6a29b491a7 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -128,7 +128,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot) void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); flush_tlb_all(); } From 296b03ce389b4f7b3d7ea5664e53d432fb17e745 Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Wed, 7 Aug 2024 17:37:14 +0800 Subject: [PATCH 0563/1523] LoongArch: KVM: Remove unnecessary definition of KVM_PRIVATE_MEM_SLOTS 1. "KVM_PRIVATE_MEM_SLOTS" is renamed as "KVM_INTERNAL_MEM_SLOTS". 2. "KVM_INTERNAL_MEM_SLOTS" defaults to zero, so it is not necessary to define it in LoongArch's asm/kvm_host.h. Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=bdd1c37a315bc50ab14066c4852bc8dcf070451e Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=b075450868dbc0950f0942617f222eeb989cad10 Reviewed-by: Bibo Mao Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 44b54965f5b4..5f0677e03817 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -26,8 +26,6 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) From 494b0792d962e8efac72b3a5b6d9bcd4e6fa8cf0 Mon Sep 17 00:00:00 2001 From: Dandan Zhang Date: Wed, 7 Aug 2024 17:37:14 +0800 Subject: [PATCH 0564/1523] LoongArch: KVM: Remove undefined a6 argument comment for kvm_hypercall() The kvm_hypercall() set for LoongArch is limited to a1-a5. So the mention of a6 in the comment is undefined that needs to be rectified. Reviewed-by: Bibo Mao Signed-off-by: Wentao Guan Signed-off-by: Dandan Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_para.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 335fb86778e2..43ec61589e6c 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -39,9 +39,9 @@ struct kvm_steal_time { * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args + * a1-a5: args * Return value will be placed in a0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ static __always_inline long kvm_hypercall0(u64 fid) { From 382b6eabb0316b7334d97afbdcf33a4e20b0ecd8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 2 Aug 2024 14:04:27 +0000 Subject: [PATCH 0565/1523] usb: gadget: f_fs: restore ffs_func_disable() functionality The blamed commit made ffs_func_disable() always return -EINVAL as the method calls ffs_func_set_alt() with the ``alt`` argument being ``(unsigned)-1``, which is always greater than MAX_ALT_SETTINGS. Use the MAX_ALT_SETTINGS check just in the f->set_alt() code path, f->disable() doesn't care about the ``alt`` parameter. Make a surgical fix, but really the f->disable() code shall be pulled out from ffs_func_set_alt(), the code will become clearer. A patch will follow. Note that ffs_func_disable() always returning -EINVAL made pixel6 crash on USB disconnect. Fixes: 2f550553e23c ("usb: gadget: f_fs: Add the missing get_alt callback") Cc: stable Reported-by: William McVicker Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240802140428.2000312-2-tudor.ambarus@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d8b096859337..0bfed1741b3e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3731,10 +3731,10 @@ static int ffs_func_set_alt(struct usb_function *f, struct ffs_data *ffs = func->ffs; int ret = 0, intf; - if (alt > MAX_ALT_SETTINGS) - return -EINVAL; - if (alt != (unsigned)-1) { + if (alt > MAX_ALT_SETTINGS) + return -EINVAL; + intf = ffs_func_revmap_intf(func, interface); if (intf < 0) return intf; From a59d8cc9292c58bccec7d8fa27eb59d0a3a6aa0d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 2 Aug 2024 14:04:28 +0000 Subject: [PATCH 0566/1523] usb: gadget: f_fs: pull out f->disable() from ffs_func_set_alt() The ``alt`` parameter was used as a way to differentiate between f->disable() and f->set_alt(). As the code paths diverge quite a bit, pull out the f->disable() code from ffs_func_set_alt(), everything will become clearer and less error prone. No change in functionality intended. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240802140428.2000312-3-tudor.ambarus@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 36 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 0bfed1741b3e..e0ceaa721949 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3731,14 +3731,12 @@ static int ffs_func_set_alt(struct usb_function *f, struct ffs_data *ffs = func->ffs; int ret = 0, intf; - if (alt != (unsigned)-1) { - if (alt > MAX_ALT_SETTINGS) - return -EINVAL; + if (alt > MAX_ALT_SETTINGS) + return -EINVAL; - intf = ffs_func_revmap_intf(func, interface); - if (intf < 0) - return intf; - } + intf = ffs_func_revmap_intf(func, interface); + if (intf < 0) + return intf; if (ffs->func) ffs_func_eps_disable(ffs->func); @@ -3753,12 +3751,6 @@ static int ffs_func_set_alt(struct usb_function *f, if (ffs->state != FFS_ACTIVE) return -ENODEV; - if (alt == (unsigned)-1) { - ffs->func = NULL; - ffs_event_add(ffs, FUNCTIONFS_DISABLE); - return 0; - } - ffs->func = func; ret = ffs_func_eps_enable(func); if (ret >= 0) { @@ -3770,7 +3762,23 @@ static int ffs_func_set_alt(struct usb_function *f, static void ffs_func_disable(struct usb_function *f) { - ffs_func_set_alt(f, 0, (unsigned)-1); + struct ffs_function *func = ffs_func_from_usb(f); + struct ffs_data *ffs = func->ffs; + + if (ffs->func) + ffs_func_eps_disable(ffs->func); + + if (ffs->state == FFS_DEACTIVATED) { + ffs->state = FFS_CLOSING; + INIT_WORK(&ffs->reset_work, ffs_reset_work); + schedule_work(&ffs->reset_work); + return; + } + + if (ffs->state == FFS_ACTIVE) { + ffs->func = NULL; + ffs_event_add(ffs, FUNCTIONFS_DISABLE); + } } static int ffs_func_setup(struct usb_function *f, From becac61a771a4a127e0c38c28110a55cb84d9f41 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 2 Aug 2024 14:41:56 +0800 Subject: [PATCH 0567/1523] usb: typec: tcpm: avoid sink goto SNK_UNATTACHED state if not received source capability message Since commit (122968f8dda8 usb: typec: tcpm: avoid resets for missing source capability messages), state will change from SNK_WAIT_CAPABILITIES to SNK_WAIT_CAPABILITIES_TIMEOUT. We need to change SNK_WAIT_CAPABILITIES -> SNK_READY path to SNK_WAIT_CAPABILITIES_TIMEOUT -> SNK_READY accordingly. Otherwise, the sink port will never change to SNK_READY state if the source does't have PD capability. [ 503.547183] pending state change SNK_WAIT_CAPABILITIES -> SNK_WAIT_CAPABILITIES_TIMEOUT @ 310 ms [rev3 NONE_AMS] [ 503.857239] state change SNK_WAIT_CAPABILITIES -> SNK_WAIT_CAPABILITIES_TIMEOUT [delayed 310 ms] [ 503.857254] PD TX, header: 0x87 [ 503.862440] PD TX complete, status: 2 [ 503.862484] state change SNK_WAIT_CAPABILITIES_TIMEOUT -> SNK_UNATTACHED [rev3 NONE_AMS] Fixes: 122968f8dda8 ("usb: typec: tcpm: avoid resets for missing source capability messages") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Reviewed-by: Sebastian Reichel Reviewed-by: Heikki Krogerus Reviewed-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20240802064156.1846768-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 26f9006e95e1..cce39818e99a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4515,7 +4515,7 @@ static inline enum tcpm_state hard_reset_state(struct tcpm_port *port) return ERROR_RECOVERY; if (port->pwr_role == TYPEC_SOURCE) return SRC_UNATTACHED; - if (port->state == SNK_WAIT_CAPABILITIES) + if (port->state == SNK_WAIT_CAPABILITIES_TIMEOUT) return SNK_READY; return SNK_UNATTACHED; } From 65ba8cef0416816b912c04850fc2468329994353 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 6 Aug 2024 14:20:29 +0300 Subject: [PATCH 0568/1523] usb: typec: ucsi: Fix a deadlock in ucsi_send_command_common() The function returns with the ppm_lock held if the PPM is busy or there's an error. Reported-and-tested-by: Luciano Coelho Fixes: 5e9c1662a89b ("usb: typec: ucsi: rework command execution functions") Signed-off-by: Heikki Krogerus Reported-by: Luciano Coelho Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20240806112029.2984319-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dcd3765cc1f5..432a2d6266d7 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -238,13 +238,10 @@ static int ucsi_send_command_common(struct ucsi *ucsi, u64 cmd, mutex_lock(&ucsi->ppm_lock); ret = ucsi_run_command(ucsi, cmd, &cci, data, size, conn_ack); - if (cci & UCSI_CCI_BUSY) { - ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false); - return ret ? ret : -EBUSY; - } - - if (cci & UCSI_CCI_ERROR) - return ucsi_read_error(ucsi, connector_num); + if (cci & UCSI_CCI_BUSY) + ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false) ?: -EBUSY; + else if (cci & UCSI_CCI_ERROR) + ret = ucsi_read_error(ucsi, connector_num); mutex_unlock(&ucsi->ppm_lock); return ret; From 0829b5bcdd3ba077c408e71e22ac2bfa85bf2c95 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 May 2024 11:12:01 +0100 Subject: [PATCH 0569/1523] drm/i915: 2 GiB of relocations ought to be enough for anybody* Kernel test robot reports i915 can hit a warn in kvmalloc_node which has a purpose of dissalowing crazy size kernel allocations. This was added in 7661809d493b ("mm: don't allow oversized kvmalloc() calls"): /* Don't even allow crazy sizes */ if (WARN_ON_ONCE(size > INT_MAX)) return NULL; This would be kind of okay since i915 at one point dropped the need for making a shadow copy of the relocation list, but then it got re-added in fd1500fcd442 ("Revert "drm/i915/gem: Drop relocation slowpath".") a year after Linus added the above warning. It is plausible that the issue was not seen until now because to trigger gem_exec_reloc test requires a combination of an relatively older generation hardware but with at least 8GiB of RAM installed. Probably even more depending on runtime checks. Lets cap what we allow userspace to pass in using the matching limit. There should be no issue for real userspace since we are talking about "crazy" number of relocations which have no practical purpose. *) Well IGT tests might get upset but they can be easily adjusted. Signed-off-by: Tvrtko Ursulin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202405151008.6ddd1aaf-oliver.sang@intel.com Cc: Kees Cook Cc: Kent Overstreet Cc: Joonas Lahtinen Cc: Rodrigo Vivi Reviewed-by: Kees Cook Reviewed-by: Joonas Lahtinen Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240521101201.18978-1-tursulin@igalia.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 090724fa766c..8aff06933f54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1533,7 +1533,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) u64_to_user_ptr(entry->relocs_ptr); unsigned long remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) + if (unlikely(remain > N_RELOC(INT_MAX))) return -EINVAL; /* @@ -1641,7 +1641,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) if (size == 0) return 0; - if (size > N_RELOC(ULONG_MAX)) + if (size > N_RELOC(INT_MAX)) return -EINVAL; addr = u64_to_user_ptr(entry->relocs_ptr); From cff59d8631e1409ffdd22d9d717e15810181b32c Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 1 Aug 2024 13:25:48 +0200 Subject: [PATCH 0570/1523] s390/uv: Panic for set and remove shared access UVC errors The return value uv_set_shared() and uv_remove_shared() (which are wrappers around the share() function) is not always checked. The system integrity of a protected guest depends on the Share and Unshare UVCs being successful. This means that any caller that fails to check the return value will compromise the security of the protected guest. No code path that would lead to such violation of the security guarantees is currently exercised, since all the areas that are shared never get unshared during the lifetime of the system. This might change and become an issue in the future. The Share and Unshare UVCs can only fail in case of hypervisor misbehaviour (either a bug or malicious behaviour). In such cases there is no reasonable way forward, and the system needs to panic. This patch replaces the return at the end of the share() function with a panic, to guarantee system integrity. Fixes: 5abb9351dfd9 ("s390/uv: introduce guest side ultravisor code") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20240801112548.85303-1-imbrenda@linux.ibm.com Message-ID: <20240801112548.85303-1-imbrenda@linux.ibm.com> [frankja@linux.ibm.com: Fixed up patch subject] Signed-off-by: Janosch Frank --- arch/s390/include/asm/uv.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0b5f8f3e84f1..153d93468b77 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -441,7 +441,10 @@ static inline int share(unsigned long addr, u16 cmd) if (!uv_call(0, (u64)&uvcb)) return 0; - return -EINVAL; + pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", + uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", + uvcb.header.rc, uvcb.header.rrc); + panic("System security cannot be guaranteed unless the system panics now.\n"); } /* From ff9bf4b34104955017822e9bc42aeeb526ee2a80 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 21:14:08 -0400 Subject: [PATCH 0571/1523] lockdep: Fix lockdep_set_notrack_class() for CONFIG_LOCK_STAT We won't find a contended lock if it's not being tracked. Signed-off-by: Kent Overstreet --- kernel/locking/lockdep.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 58c88220a478..0349f957e672 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, ip); @@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, _RET_IP_); From 7442b5cdf259e2fb112560904b7002ce48d15578 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 20:33:25 -0400 Subject: [PATCH 0572/1523] bcachefs: Don't rely on implicit unsigned -> signed integer conversion implicit integer conversion is a fertile source of bugs, and we really would rather not have the min()/max() macros doing it implicitly. bcachefs appears to be the only place in the kernel where this happens, so let's fix it. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e..f13e619b4b21 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -166,8 +166,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } From 90b211fa2da3f36939e84b6426988832a62caf4b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 30 Jul 2024 20:35:59 -0400 Subject: [PATCH 0573/1523] bcachefs: Add a comment for bucket helper types We've had bugs in the past with incorrect integer conversions in disk accounting code, which is why bucket helpers now always return s64s; add a comment explaining this. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index f13e619b4b21..96a0444ea78f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; From 02026e8931366158d7395f87afeb0b535210dbee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 22:49:31 -0400 Subject: [PATCH 0574/1523] bcachefs: Add missing bch2_trans_begin() call Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12..7ee3b75480df 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); From c1e4446247b2a8919649fb9aae2d86f53bf3d1e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 21:02:34 -0400 Subject: [PATCH 0575/1523] bcachefs: Improved allocator debugging for ec chasing down a device removal deadlock with erasure coding Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 6 ++++-- fs/bcachefs/alloc_foreground.h | 2 +- fs/bcachefs/ec.c | 31 ++++++++++++++++++++----------- fs/bcachefs/sysfs.c | 6 +++++- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292e..8683fe4fae5b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026..c78a64ec0553 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c63..8d0cca2f14ed 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2235,6 +2235,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2264,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276..f393023a3ae2 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; From 2caca9fb166f82937110368768002511628e6e1f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Aug 2024 23:30:48 -0400 Subject: [PATCH 0576/1523] bcachefs: ec should not allocate from ro devs This fixes a device removal deadlock when using erasure coding. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8d0cca2f14ed..84f1cbf6497f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) From 019f87f1ef967c5a5b263f21ad100f46c874505a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 19 Jul 2024 10:57:53 +0200 Subject: [PATCH 0577/1523] platform: cznic: turris-omnia-mcu: Make watchdog code optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the watchdog part of the driver optional, under a boolean config option. Move the dependency to WATCHDOG to this new option, and change the WATCHDOG_CORE dependency to selection, as is done in most watchdog drivers. This makes the turris-omnia-mcu driver available for compilation even if WATCHDOG is disabled. Fixes: ed46f1f7731d ("platform: cznic: turris-omnia-mcu: fix Kconfig dependencies") Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240719085756.30598-2-kabel@kernel.org Signed-off-by: Arnd Bergmann --- drivers/platform/cznic/Kconfig | 17 ++++++++++++++--- drivers/platform/cznic/Makefile | 2 +- drivers/platform/cznic/turris-omnia-mcu.h | 10 ++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index cb0d4d686d8a..b56c343e21d6 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -17,11 +17,9 @@ config TURRIS_OMNIA_MCU depends on MACH_ARMADA_38X || COMPILE_TEST depends on I2C depends on OF - depends on WATCHDOG depends on GPIOLIB depends on HW_RANDOM depends on RTC_CLASS - depends on WATCHDOG_CORE select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the @@ -31,7 +29,6 @@ config TURRIS_OMNIA_MCU disabled) and the ability to configure wake up from this mode (via rtcwake) - true random number generator (if available on the MCU) - - MCU watchdog - GPIO pins - to get front button press events (the front button can be configured either to generate press events to the CPU or to change @@ -44,7 +41,21 @@ config TURRIS_OMNIA_MCU to be able to program SOC's OTP on board revisions 32+ - to get input from the LED output pins of the WAN ethernet PHY, LAN switch and MiniPCIe ports + Other features can be enabled by subsequent config options. To compile this driver as a module, choose M here; the module will be called turris-omnia-mcu. +if TURRIS_OMNIA_MCU + +config TURRIS_OMNIA_MCU_WATCHDOG + bool "Turris Omnia MCU watchdog" + default y + depends on WATCHDOG + select WATCHDOG_CORE + help + Say Y here to add support for watchdog provided by CZ.NIC's Turris + Omnia MCU. + +endif # TURRIS_OMNIA_MCU + endif # CZNIC_PLATFORMS diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index eae4c6b341ff..7599b4835056 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -5,4 +5,4 @@ turris-omnia-mcu-y := turris-omnia-mcu-base.o turris-omnia-mcu-y += turris-omnia-mcu-gpio.o turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o turris-omnia-mcu-y += turris-omnia-mcu-trng.o -turris-omnia-mcu-y += turris-omnia-mcu-watchdog.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 2ca56ae13aa9..85bf9ab39356 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -47,8 +47,10 @@ struct omnia_mcu { u32 rtc_alarm; bool front_button_poweron; +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG /* MCU watchdog */ struct watchdog_device wdt; +#endif /* true random number generator */ struct hwrng trng; @@ -189,6 +191,14 @@ extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); int omnia_mcu_register_trng(struct omnia_mcu *mcu); + +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG int omnia_mcu_register_watchdog(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_watchdog(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #endif /* __TURRIS_OMNIA_MCU_H */ From c7da0d4e33ce262dbed7b9ae4cf013aad0f541f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 19 Jul 2024 10:57:54 +0200 Subject: [PATCH 0578/1523] platform: cznic: turris-omnia-mcu: Make TRNG code optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the TRNG part of the driver optional, under a boolean config option. This makes the driver turris-omnia-mcu available for compilation even if HW_RANDOM is disabled. Fixes: ed46f1f7731d ("platform: cznic: turris-omnia-mcu: fix Kconfig dependencies") Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240719085756.30598-3-kabel@kernel.org Signed-off-by: Arnd Bergmann --- drivers/platform/cznic/Kconfig | 10 ++++++++-- drivers/platform/cznic/Makefile | 2 +- drivers/platform/cznic/turris-omnia-mcu.h | 10 ++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index b56c343e21d6..98f17562646e 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -18,7 +18,6 @@ config TURRIS_OMNIA_MCU depends on I2C depends on OF depends on GPIOLIB - depends on HW_RANDOM depends on RTC_CLASS select GPIOLIB_IRQCHIP help @@ -28,7 +27,6 @@ config TURRIS_OMNIA_MCU - board poweroff into true low power mode (with voltage regulators disabled) and the ability to configure wake up from this mode (via rtcwake) - - true random number generator (if available on the MCU) - GPIO pins - to get front button press events (the front button can be configured either to generate press events to the CPU or to change @@ -56,6 +54,14 @@ config TURRIS_OMNIA_MCU_WATCHDOG Say Y here to add support for watchdog provided by CZ.NIC's Turris Omnia MCU. +config TURRIS_OMNIA_MCU_TRNG + bool "Turris Omnia MCU true random number generator" + default y + depends on HW_RANDOM + help + Say Y here to add support for the true random number generator + provided by CZ.NIC's Turris Omnia MCU. + endif # TURRIS_OMNIA_MCU endif # CZNIC_PLATFORMS diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index 7599b4835056..0c28fa859391 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o turris-omnia-mcu-y += turris-omnia-mcu-gpio.o turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o -turris-omnia-mcu-y += turris-omnia-mcu-trng.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 85bf9ab39356..d07a32cfe238 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -52,9 +52,11 @@ struct omnia_mcu { struct watchdog_device wdt; #endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG /* true random number generator */ struct hwrng trng; struct completion trng_entropy_ready; +#endif }; int omnia_cmd_write_read(const struct i2c_client *client, @@ -190,7 +192,15 @@ extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); + +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG int omnia_mcu_register_trng(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_trng(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG int omnia_mcu_register_watchdog(struct omnia_mcu *mcu); From 74a22fced5a012c57f56d1cf7ea926cc366a2a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 19 Jul 2024 10:57:55 +0200 Subject: [PATCH 0579/1523] platform: cznic: turris-omnia-mcu: Make poweroff and wakeup code optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the system poweroff and RTC wakeup part of the driver optional, under a boolean config option. Move the dependency to RTC_CLASS to this new option. This makes the turris-omnia-mcu driver available for compilation even if RTC_CLASS is disabled. Fixes: ed46f1f7731d ("platform: cznic: turris-omnia-mcu: fix Kconfig dependencies") Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240719085756.30598-4-kabel@kernel.org Signed-off-by: Arnd Bergmann --- drivers/platform/cznic/Kconfig | 13 +++++++++---- drivers/platform/cznic/Makefile | 2 +- drivers/platform/cznic/turris-omnia-mcu-base.c | 2 ++ drivers/platform/cznic/turris-omnia-mcu.h | 13 +++++++++++-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index 98f17562646e..f02856226dd7 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -18,15 +18,11 @@ config TURRIS_OMNIA_MCU depends on I2C depends on OF depends on GPIOLIB - depends on RTC_CLASS select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the microcontroller on the CZ.NIC's Turris Omnia SOHO router. The features include: - - board poweroff into true low power mode (with voltage regulators - disabled) and the ability to configure wake up from this mode (via - rtcwake) - GPIO pins - to get front button press events (the front button can be configured either to generate press events to the CPU or to change @@ -45,6 +41,15 @@ config TURRIS_OMNIA_MCU if TURRIS_OMNIA_MCU +config TURRIS_OMNIA_MCU_SYSOFF_WAKEUP + bool "Turris Omnia MCU system off and RTC wakeup" + default y + depends on RTC_CLASS + help + Say Y here to add support for CZ.NIC's Turris Omnia board poweroff + into true low power mode (with voltage regulators disabled) and the + ability to configure wake up from this mode (via rtcwake). + config TURRIS_OMNIA_MCU_WATCHDOG bool "Turris Omnia MCU watchdog" default y diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index 0c28fa859391..380530ba74f7 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o turris-omnia-mcu-y += turris-omnia-mcu-gpio.o -turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP) += turris-omnia-mcu-sys-off-wakeup.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index c68a7a84a951..7b514e60273d 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -198,7 +198,9 @@ static const struct attribute_group omnia_mcu_base_group = { static const struct attribute_group *omnia_mcu_groups[] = { &omnia_mcu_base_group, &omnia_mcu_gpio_group, +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP &omnia_mcu_poweroff_group, +#endif NULL }; diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index d07a32cfe238..75fa2111546f 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -42,10 +42,12 @@ struct omnia_mcu { unsigned long last_status; bool button_pressed_emul; +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP /* RTC device for configuring wake-up */ struct rtc_device *rtcdev; u32 rtc_alarm; bool front_button_poweron; +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG /* MCU watchdog */ @@ -188,10 +190,17 @@ static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; -extern const struct attribute_group omnia_mcu_poweroff_group; - int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); + +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP +extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG int omnia_mcu_register_trng(struct omnia_mcu *mcu); From af340b7aa21c351ba08950f664af601888633614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 19 Jul 2024 10:57:56 +0200 Subject: [PATCH 0580/1523] platform: cznic: turris-omnia-mcu: Make GPIO code optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the GPIO part of the driver optional, under a boolean config option. Move the dependency to GPIOLIB and OF and the selection of GPIOLIB_IRQCHIP to this new option. This makes the turris-omnia-mcu driver available for compilation even if GPIOLIB or OF are disabled. Fixes: ed46f1f7731d ("platform: cznic: turris-omnia-mcu: fix Kconfig dependencies") Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240719085756.30598-5-kabel@kernel.org Signed-off-by: Arnd Bergmann --- drivers/platform/cznic/Kconfig | 42 +++++++++++-------- drivers/platform/cznic/Makefile | 2 +- .../platform/cznic/turris-omnia-mcu-base.c | 2 + drivers/platform/cznic/turris-omnia-mcu.h | 9 ++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index f02856226dd7..a111eca8ff57 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -16,31 +16,38 @@ config TURRIS_OMNIA_MCU tristate "Turris Omnia MCU driver" depends on MACH_ARMADA_38X || COMPILE_TEST depends on I2C - depends on OF - depends on GPIOLIB - select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the microcontroller on the CZ.NIC's Turris Omnia SOHO router. - The features include: - - GPIO pins - - to get front button press events (the front button can be - configured either to generate press events to the CPU or to change - front LEDs panel brightness) - - to enable / disable USB port voltage regulators and to detect - USB overcurrent - - to detect MiniPCIe / mSATA card presence in MiniPCIe port 0 - - to configure resets of various peripherals on board revisions 32+ - - to enable / disable the VHV voltage regulator to the SOC in order - to be able to program SOC's OTP on board revisions 32+ - - to get input from the LED output pins of the WAN ethernet PHY, LAN - switch and MiniPCIe ports - Other features can be enabled by subsequent config options. + This option only enables the core part of the driver. Specific + features can be enabled by subsequent config options. To compile this driver as a module, choose M here; the module will be called turris-omnia-mcu. if TURRIS_OMNIA_MCU +config TURRIS_OMNIA_MCU_GPIO + bool "Turris Omnia MCU GPIOs" + default y + depends on GPIOLIB + depends on OF + select GPIOLIB_IRQCHIP + help + Say Y here to add support for controlling MCU GPIO pins and receiving + MCU interrupts on CZ.NIC's Turris Omnia. + This enables you to + - get front button press events (the front button can be configured + either to generate press events to the CPU or to change front LEDs + panel brightness), + - enable / disable USB port voltage regulators and to detect USB + overcurrent, + - detect MiniPCIe / mSATA card presence in MiniPCIe port 0, + - configure resets of various peripherals on board revisions 32+, + - enable / disable the VHV voltage regulator to the SOC in order to be + able to program SOC's OTP on board revisions 32+, + - get input from the LED output pins of the WAN ethernet PHY, LAN + switch and MiniPCIe ports. + config TURRIS_OMNIA_MCU_SYSOFF_WAKEUP bool "Turris Omnia MCU system off and RTC wakeup" default y @@ -62,6 +69,7 @@ config TURRIS_OMNIA_MCU_WATCHDOG config TURRIS_OMNIA_MCU_TRNG bool "Turris Omnia MCU true random number generator" default y + depends on TURRIS_OMNIA_MCU_GPIO depends on HW_RANDOM help Say Y here to add support for the true random number generator diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index 380530ba74f7..ce6d997f34d6 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o -turris-omnia-mcu-y += turris-omnia-mcu-gpio.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_GPIO) += turris-omnia-mcu-gpio.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP) += turris-omnia-mcu-sys-off-wakeup.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index 7b514e60273d..58f9afae2867 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -197,7 +197,9 @@ static const struct attribute_group omnia_mcu_base_group = { static const struct attribute_group *omnia_mcu_groups[] = { &omnia_mcu_base_group, +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO &omnia_mcu_gpio_group, +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP &omnia_mcu_poweroff_group, #endif diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 75fa2111546f..fed0d357fea3 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -33,6 +33,7 @@ struct omnia_mcu { u8 board_first_mac[ETH_ALEN]; u8 board_revision; +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO /* GPIO chip */ struct gpio_chip gc; struct mutex lock; @@ -41,6 +42,7 @@ struct omnia_mcu { struct delayed_work button_release_emul_work; unsigned long last_status; bool button_pressed_emul; +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP /* RTC device for configuring wake-up */ @@ -188,9 +190,16 @@ static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); } +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP extern const struct attribute_group omnia_mcu_poweroff_group; From a626ada4184b1888c1c5a4566071643f6e8081a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Jul 2024 16:49:23 +0200 Subject: [PATCH 0581/1523] doc: platform: cznic: turris-omnia-mcu: Fix sphinx-build warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix sphinx-build warnings ERROR: Unexpected indentation. WARNING: Block quote ends without a blank line; unexpected unindent in Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/all/20240702174938.04c12aab@canb.auug.org.au/ Fixes: dfa556e45ae9 ("platform: cznic: turris-omnia-mcu: Add support for MCU connected GPIOs") Tested-by: Stephen Rothwell Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240730144924.25552-2-kabel@kernel.org Signed-off-by: Arnd Bergmann --- .../ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu index 307a55f599cb..210a39043dc1 100644 --- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu @@ -32,9 +32,9 @@ Description: (RW) The front button on the Turris Omnia router can be interrupt. This file switches between these two modes: - - "mcu" makes the button press event be handled by the MCU to - change the LEDs panel intensity. - - "cpu" makes the button press event be handled by the CPU. + - "mcu" makes the button press event be handled by the MCU to + change the LEDs panel intensity. + - "cpu" makes the button press event be handled by the CPU. Format: %s. From e1793fea0350330a6a50721ecb2ad66846e0c51e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Jul 2024 16:49:24 +0200 Subject: [PATCH 0582/1523] doc: platform: cznic: turris-omnia-mcu: Use double backticks for attribute value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use double backticks instead of quotes for sysfs attribute value. This makes sphinx generate the "mcu" and "cpu" values in monospace when rendering to HTML. Fixes: dfa556e45ae9 ("platform: cznic: turris-omnia-mcu: Add support for MCU connected GPIOs") Tested-by: Stephen Rothwell Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20240730144924.25552-3-kabel@kernel.org Signed-off-by: Arnd Bergmann --- .../ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu index 210a39043dc1..35a8f6dae5bf 100644 --- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu @@ -32,9 +32,9 @@ Description: (RW) The front button on the Turris Omnia router can be interrupt. This file switches between these two modes: - - "mcu" makes the button press event be handled by the MCU to + - ``mcu`` makes the button press event be handled by the MCU to change the LEDs panel intensity. - - "cpu" makes the button press event be handled by the CPU. + - ``cpu`` makes the button press event be handled by the CPU. Format: %s. From cddaac0459c004c439510bd109929466b0d5908e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Aug 2024 09:12:58 -0700 Subject: [PATCH 0583/1523] ARM: pxa/gumstix: fix attaching properties to vbus gpio device Commit f1d6588af93b tried to convert GPIO lookup tables to software properties for the vbus gpio device, bit forgot the most important step: actually attaching the new properties to the device. Also fix up the name of the property array to reflect the board name, and add missing gpio/property.h and devices.h includes absence of which causes compile failures on some configurations. Switch "#ifdef CONFIG_USB_PXA25X" to "#if IS_ENABLED(CONFIG_USB_PXA25X)" because it should not matter if the driver is buolt in or a module, it still need vbus controls. Reported-by: Arnd Bergmann Fixes: f1d6588af93b ("ARM: pxa/gumstix: convert vbus gpio to use software nodes") Signed-off-by: Dmitry Torokhov Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/gumstix.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index efa6faa62a2c..1713bdf3b71e 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include "udc.h" #include "gumstix.h" +#include "devices.h" #include "generic.h" @@ -99,8 +101,8 @@ static void __init gumstix_mmc_init(void) } #endif -#ifdef CONFIG_USB_PXA25X -static const struct property_entry spitz_mci_props[] __initconst = { +#if IS_ENABLED(CONFIG_USB_PXA25X) +static const struct property_entry gumstix_vbus_props[] __initconst = { PROPERTY_ENTRY_GPIO("vbus-gpios", &pxa2xx_gpiochip_node, GPIO_GUMSTIX_USB_GPIOn, GPIO_ACTIVE_HIGH), PROPERTY_ENTRY_GPIO("pullup-gpios", &pxa2xx_gpiochip_node, @@ -109,8 +111,9 @@ static const struct property_entry spitz_mci_props[] __initconst = { }; static const struct platform_device_info gumstix_gpio_vbus_info __initconst = { - .name = "gpio-vbus", - .id = PLATFORM_DEVID_NONE, + .name = "gpio-vbus", + .id = PLATFORM_DEVID_NONE, + .properties = gumstix_vbus_props, }; static void __init gumstix_udc_init(void) From c48b5a4cf3125adb679e28ef093f66ff81368d05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 6 Aug 2024 20:48:43 +0200 Subject: [PATCH 0584/1523] x86/mm: Fix PTI for i386 some more So it turns out that we have to do two passes of pti_clone_entry_text(), once before initcalls, such that device and late initcalls can use user-mode-helper / modprobe and once after free_initmem() / mark_readonly(). Now obviously mark_readonly() can cause PMD splits, and pti_clone_pgtable() doesn't like that much. Allow the late clone to split PMDs so that pagetables stay in sync. [peterz: Changelog and comments] Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Guenter Roeck Link: https://lkml.kernel.org/r/20240806184843.GX37996@noisy.programming.kicks-ass.net --- arch/x86/mm/pti.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index bfdf5f45b137..851ec8f1363a 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!pmd) return NULL; - /* We can't do anything sensible if we hit a large mapping. */ + /* Large PMD mapping found */ if (pmd_leaf(*pmd)) { - WARN_ON(1); - return NULL; + /* Clear the PMD if we hit a large mapping from the first round */ + if (late_text) { + set_pmd(pmd, __pmd(0)); + } else { + WARN_ON_ONCE(1); + return NULL; + } } if (pmd_none(*pmd)) { @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void) if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) return; - target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false); if (WARN_ON(!target_pte)) return; @@ -301,7 +306,7 @@ enum pti_clone_level { static void pti_clone_pgtable(unsigned long start, unsigned long end, - enum pti_clone_level level) + enum pti_clone_level level, bool late_text) { unsigned long addr; @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, return; /* Allocate PTE in the user page-table */ - target_pte = pti_user_pagetable_walk_pte(addr); + target_pte = pti_user_pagetable_walk_pte(addr, late_text); if (WARN_ON(!target_pte)) return; @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void) phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); pte_t *target_pte; - target_pte = pti_user_pagetable_walk_pte(va); + target_pte = pti_user_pagetable_walk_pte(va, false); if (WARN_ON(!target_pte)) return; @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void) start = CPU_ENTRY_AREA_BASE; end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); - pti_clone_pgtable(start, end, PTI_CLONE_PMD); + pti_clone_pgtable(start, end, PTI_CLONE_PMD, false); } #endif /* CONFIG_X86_64 */ @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry text and force it RO. */ -static void pti_clone_entry_text(void) +static void pti_clone_entry_text(bool late) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_LEVEL_KERNEL_IMAGE); + PTI_LEVEL_KERNEL_IMAGE, late); } /* @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void) * pti_set_kernel_image_nonglobal() did to clear the * global bit. */ - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); + pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false); /* * pti_clone_pgtable() will set the global bit in any PMDs @@ -638,8 +643,15 @@ void __init pti_init(void) /* Undo all global bits from the init pagetables in head_64.S: */ pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ - pti_clone_entry_text(); + /* + * This is very early in boot. Device and Late initcalls can do + * modprobe before free_initmem() and mark_readonly(). This + * pti_clone_entry_text() allows those user-mode-helpers to function, + * but notably the text is still RW. + */ + pti_clone_entry_text(false); pti_setup_espfix64(); pti_setup_vsyscall(); } @@ -656,10 +668,11 @@ void pti_finalize(void) if (!boot_cpu_has(X86_FEATURE_PTI)) return; /* - * We need to clone everything (again) that maps parts of the - * kernel image. + * This is after free_initmem() (all initcalls are done) and we've done + * mark_readonly(). Text is now NX which might've split some PMDs + * relative to the early clone. */ - pti_clone_entry_text(); + pti_clone_entry_text(true); pti_clone_kernel_text(); debug_checkwx_user(); From edbbaae42a56f9a2b39c52ef2504dfb3fb0a7858 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Aug 2024 10:20:44 +0300 Subject: [PATCH 0585/1523] genirq/irqdesc: Honor caller provided affinity in alloc_desc() Currently, whenever a caller is providing an affinity hint for an interrupt, the allocation code uses it to calculate the node and copies the cpumask into irq_desc::affinity. If the affinity for the interrupt is not marked 'managed' then the startup of the interrupt ignores irq_desc::affinity and uses the system default affinity mask. Prevent this by setting the IRQD_AFFINITY_SET flag for the interrupt in the allocator, which causes irq_setup_affinity() to use irq_desc::affinity on interrupt startup if the mask contains an online CPU. [ tglx: Massaged changelog ] Fixes: 45ddcecbfa94 ("genirq: Use affinity hint in irqdesc allocation") Signed-off-by: Shay Drory Signed-off-by: Thomas Gleixner Cc: Link: https://lore.kernel.org/all/20240806072044.837827-1-shayd@nvidia.com --- kernel/irq/irqdesc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 07e99c936ba5..1dee88ba0ae4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -530,6 +530,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; From f1cb9d5aefba07fc52b06b7bd5fdcd9ef91157b4 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Tue, 6 Aug 2024 19:20:11 +0300 Subject: [PATCH 0586/1523] wifi: rtlwifi: rtl8192du: Initialise value32 in _rtl92du_init_queue_reserved_page GCC complains: In file included from include/linux/ieee80211.h:21, from include/net/mac80211.h:20, from drivers/net/wireless/realtek/rtlwifi/rtl8192du/../wifi.h:14, from drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c:4: In function 'u32p_replace_bits', inlined from '_rtl92du_init_queue_reserved_page.isra' at drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c:225:2: >> include/linux/bitfield.h:189:18: warning: 'value32' is used uninitialized [-Wuninitialized] Part of the variable is indeed left uninitialised. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408062100.DWhN0CYH-lkp@intel.com/ Fixes: e769c67105d3 ("wifi: rtlwifi: Add rtl8192du/hw.{c,h}") Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://patch.msgid.link/2a808244-93d0-492c-b304-ae1974df5df9@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c index 700c6e2bcad1..ff458fb8514d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c @@ -181,11 +181,11 @@ static void _rtl92du_init_queue_reserved_page(struct ieee80211_hw *hw, struct rtl_hal *rtlhal = rtl_hal(rtlpriv); u32 txqpagenum, txqpageunit; u32 txqremainingpage; + u32 value32 = 0; u32 numhq = 0; u32 numlq = 0; u32 numnq = 0; u32 numpubq; - u32 value32; if (rtlhal->macphymode != SINGLEMAC_SINGLEPHY) { numpubq = NORMAL_PAGE_NUM_PUBQ_92D_DUAL_MAC; From 25a7123579ecac9a89a7e5b8d8a580bee4b68acd Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 15 Jul 2024 17:39:10 +0200 Subject: [PATCH 0587/1523] ice: Fix reset handler Synchronize OICR IRQ when preparing for reset to avoid potential race conditions between the reset procedure and OICR Fixes: 4aad5335969f ("ice: add individual interrupt allocation") Signed-off-by: Grzegorz Nitka Signed-off-by: Sergey Temerkhanov Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3de020020bc4..6f97ed471fe9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -559,6 +559,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ From bca515d58367494d8699ab53c645b57b71fb4785 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 15 Jul 2024 17:39:11 +0200 Subject: [PATCH 0588/1523] ice: Skip PTP HW writes during PTP reset procedure Block HW write access for the driver while the device is in reset to avoid potential race condition and access to the PTP HW in non-nominal state which could lead to undesired effects Fixes: 4aad5335969f ("ice: add individual interrupt allocation") Signed-off-by: Grzegorz Nitka Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Sergey Temerkhanov Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e2786cc13286..ef2e858f49bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; + /* Skip HW writes if reset is in progress */ + if (pf->hw.reset_ongoing) + return; + switch (hw->ptp.phy_model) { case ICE_PHY_E810: /* Do not reconfigure E810 PHY */ From c181da18a7302c5de510fe975a3a333299c6e4b7 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Fri, 26 Jul 2024 06:19:28 -0400 Subject: [PATCH 0589/1523] ice: Fix incorrect assigns of FEC counts Commit ac21add2540e ("ice: Implement driver functionality to dump fec statistics") introduces obtaining FEC correctable and uncorrectable stats per netdev in ICE driver. Unfortunately the assignment of values to fec_stats structure has been done incorrectly. This commit fixes the assignments. Fixes: ac21add2540e ("ice: Implement driver functionality to dump fec statistics") Reviewed-by: Wojciech Drewek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8c990c976132..bc79ba974e49 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4673,10 +4673,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, if (err) return err; - fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) + - fec_corr_low_val; - fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) + - fec_uncorr_low_val; + fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) + + fec_corr_low_val; + fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) + + fec_uncorr_low_val; return 0; } From 568901e709d7fa564dfdc75816ea59fec65d20a0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0590/1523] tools/include: Sync uapi/asm-generic/unistd.h with the kernel sources And arch syscall tables to pick up changes from: b1e31c134a8a powerpc: restore some missing spu syscalls d3882564a77c syscalls: fix compat_sys_io_pgetevents_time64 usage 54233a425403 uretprobe: change syscall number, again 63ded110979b uprobe: Change uretprobe syscall scope and number 9142be9e6443 x86/syscall: Mark exit[_group] syscall handlers __noreturn 9aae1baa1c5d x86, arm: Add missing license tag to syscall tables files 5c28424e9a34 syscalls: Fix to add sys_uretprobe to syscall.tbl 190fec72df4a uprobe: Wire up uretprobe system call This should be used to beautify syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Arnd Bergmann Cc: linux-arch@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 6 +++++- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 8 +++++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d02723..5bf6148cac2b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..7093ee21c0d1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# +# [ [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal From ed86525f1f4b738bae75c73e89f25430bd0af1b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0591/1523] tools/include: Sync network socket headers with the kernel sources To pick up changes from: d25a92ccae6b net/smc: Introduce IPPROTO_SMC 060f4ba6e403 io_uring/net: move charging socket out of zc io_uring bb6aaf736680 net: Split a __sys_listen helper for io_uring dc2e77979412 net: Split a __sys_bind helper for io_uring This should be used to beautify socket syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/in.h | 2 ++ tools/perf/trace/beauty/include/linux/socket.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370b..df9cdb8bbfb8 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, From 845295f4004c7e1591bab4bad01b51f37d32272f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0592/1523] tools/include: Sync filesystem headers with the kernel sources To pick up changes from: 0f9ca80fa4f9 fs: Add initial atomic write support info to statx f9af549d1fd3 fs: export mount options via statmount() 0a3deb11858a fs: Allow listmount() in foreign mount namespace 09b31295f833 fs: export the mount ns id via statmount d04bccd8c19d listmount: allow listing in reverse order bfc69fd05ef9 fs/procfs: add build ID fetching to PROCMAP_QUERY API ed5d583a88a9 fs/procfs: implement efficient VMA querying API for /proc//maps This should be used to beautify FS syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/stat.h | 12 +- .../perf/trace/beauty/include/uapi/linux/fs.h | 163 +++++++++++++++++- .../trace/beauty/include/uapi/linux/mount.h | 10 +- .../trace/beauty/include/uapi/linux/stat.h | 12 +- 4 files changed, 189 insertions(+), 8 deletions(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd664..753971770733 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ From f6d9883f8e680460be4714d4d35c7acac1dffeaf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0593/1523] tools/include: Sync x86 headers with the kernel sources To pick up changes from: 149fd4712bcd perf/x86/intel: Support Perfmon MSRs aliasing 21b362cc762a x86/resctrl: Enable shared RMID mode on Sub-NUMA Cluster (SNC) systems 4f460bff7b6a cpufreq: acpi: move MSR_K7_HWCR_CPB_DIS_BIT into msr-index.h 7ea81936b853 x86/cpufeatures: Add HWP highest perf change feature flag 78ce84b9e0a5 x86/cpufeatures: Flip the /proc/cpuinfo appearance logic 1beb348d5c7f x86/sev: Provide SVSM discovery support This should be used to beautify x86 syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: x86@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 803 ++++++++++++----------- tools/arch/x86/include/asm/msr-index.h | 11 + 2 files changed, 414 insertions(+), 400 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661..dd4682857c12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c..82c6a4d350e0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 From d5b854893d27b4030943a10cf28a07189aab0c36 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 0594/1523] tools/include: Sync arm64 headers with the kernel sources To pick up changes from: 9ef54a384526 arm64: cputype: Add Cortex-A725 definitions 58d245e03c32 arm64: cputype: Add Cortex-X1C definitions fd2ff5f0b320 arm64: cputype: Add Cortex-X925 definitions add332c40328 arm64: cputype: Add Cortex-A720 definitions be5a6f238700 arm64: cputype: Add Cortex-X3 definitions This should be used to beautify x86 syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/asm/cputype.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a2..5fd7caea4419 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) From ab84ba647f2c94ac4d0c3fc6951c49f08aa1fcf7 Mon Sep 17 00:00:00 2001 From: Zhiquan Li Date: Mon, 5 Aug 2024 18:35:31 +0800 Subject: [PATCH 0595/1523] x86/acpi: Remove __ro_after_init from acpi_mp_wake_mailbox On a platform using the "Multiprocessor Wakeup Structure"[1] to startup secondary CPUs the control processor needs to memremap() the physical address of the MP Wakeup Structure mailbox to the variable acpi_mp_wake_mailbox, which holds the virtual address of mailbox. To wake up the AP the control processor writes the APIC ID of AP, the wakeup vector and the ACPI_MP_WAKE_COMMAND_WAKEUP command into the mailbox. Current implementation doesn't consider the case which restricts boot time CPU bringup to 1 with the kernel parameter "maxcpus=1" and brings other CPUs online later from user space as it sets acpi_mp_wake_mailbox to read-only after init. So when the first AP is tried to brought online after init, the attempt to update the variable results in a kernel panic. The memremap() call that initializes the variable cannot be moved into acpi_parse_mp_wake() because memremap() is not functional at that point in the boot process. Also as the APs might never be brought up, keep the memremap() call in acpi_wakeup_cpu() so that the operation only takes place when needed. Fixes: 24dd05da8c79 ("x86/apic: Mark acpi_mp_wake_* variables as __ro_after_init") Signed-off-by: Zhiquan Li Signed-off-by: Thomas Gleixner Reviewed-by: Kirill A. Shutemov Link: https://lore.kernel.org/all/20240805103531.1230635-1-zhiquan1.li@intel.com --- arch/x86/kernel/acpi/madt_wakeup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 6cfe762be28b..d5ef6215583b 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -19,7 +19,7 @@ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; /* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; From e639222a51196c69c70b49b67098ce2f9919ed08 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 6 Aug 2024 19:22:07 +0800 Subject: [PATCH 0596/1523] x86/paravirt: Fix incorrect virt spinlock setting on bare metal The kernel can change spinlock behavior when running as a guest. But this guest-friendly behavior causes performance problems on bare metal. The kernel uses a static key to switch between the two modes. In theory, the static key is enabled by default (run in guest mode) and should be disabled for bare metal (and in some guests that want native behavior or paravirt spinlock). A performance drop is reported when running encode/decode workload and BenchSEE cache sub-workload. Bisect points to commit ce0a1b608bfc ("x86/paravirt: Silence unused native_pv_lock_init() function warning"). When CONFIG_PARAVIRT_SPINLOCKS is disabled the virt_spin_lock_key is incorrectly set to true on bare metal. The qspinlock degenerates to test-and-set spinlock, which decreases the performance on bare metal. Set the default value of virt_spin_lock_key to false. If booting in a VM, enable this key. Later during the VM initialization, if other high-efficient spinlock is preferred (e.g. paravirt-spinlock), or the user wants the native qspinlock (via nopvspin boot commandline), the virt_spin_lock_key is disabled accordingly. This results in the following decision matrix: X86_FEATURE_HYPERVISOR Y Y Y N CONFIG_PARAVIRT_SPINLOCKS Y Y N Y/N PV spinlock Y N N Y/N virt_spin_lock_key N Y/N Y N Fixes: ce0a1b608bfc ("x86/paravirt: Silence unused native_pv_lock_init() function warning") Reported-by: Prem Nath Dey Reported-by: Xiaoping Zhou Suggested-by: Dave Hansen Suggested-by: Qiuxu Zhuo Suggested-by: Nikolay Borisov Signed-off-by: Chen Yu Signed-off-by: Thomas Gleixner Reviewed-by: Nikolay Borisov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240806112207.29792-1-yu.c.chen@intel.com --- arch/x86/include/asm/qspinlock.h | 12 +++++++----- arch/x86/kernel/paravirt.c | 7 +++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index a053c1293975..68da67df304d 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu) #ifdef CONFIG_PARAVIRT /* - * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. * - * Native (and PV wanting native due to vCPU pinning) should disable this key. - * It is done in this backwards fashion to only have a single direction change, - * which removes ordering between native_pv_spin_init() and HV setup. + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. */ -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); /* * Shortcut for the queued_spin_lock_slowpath() function that allows diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5358d43886ad..fec381533555 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -51,13 +51,12 @@ DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif -DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); void __init native_pv_lock_init(void) { - if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && - !boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_disable(&virt_spin_lock_key); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&virt_spin_lock_key); } static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) From 10f2ad032defe906240d0c3b62dcbceace96b230 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Aug 2024 12:51:44 +0100 Subject: [PATCH 0597/1523] KVM: arm64: Enforce dependency on an ARMv8.4-aware toolchain With the NV support of TLBI-range operations, KVM makes use of instructions that are only supported by binutils versions >= 2.30. This breaks the build for very old toolchains. Make KVM support conditional on having ARMv8.4 support in the assembler, side-stepping the issue. Fixes: 5d476ca57d7d ("KVM: arm64: nv: Add handling of range-based TLBI operations") Reported-by: Viresh Kumar Suggested-by: Arnd Bergmann Signed-off-by: Marc Zyngier Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240807115144.3237260-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 58f09370d17e..8304eb342be9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -19,6 +19,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on AS_HAS_ARMV8_4 select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER From 01ab08cafeced7ae1d6c01a08218742c8182f8da Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 7 Aug 2024 13:20:24 +0800 Subject: [PATCH 0598/1523] KVM: arm64: vgic-debug: Exit the iterator properly w/o LPI In case the guest doesn't have any LPI, we previously relied on the iterator setting 'intid = nr_spis + VGIC_NR_PRIVATE_IRQS' && 'lpi_idx = 1' to exit the iterator. But it was broken with commit 85d3ccc8b75b ("KVM: arm64: vgic-debug: Use an xarray mark for debug iterator") -- the intid remains at 'nr_spis + VGIC_NR_PRIVATE_IRQS - 1', and we end up endlessly printing the last SPI's state. Consider that it's meaningless to search the LPI xarray and populate lpi_idx when there is no LPI, let's just skip the process for that case. The result is that * If there's no LPI, we focus on the intid and exit the iterator when it runs out of the valid SPI range. * Otherwise we keep the current logic and let the xarray drive the iterator. Fixes: 85d3ccc8b75b ("KVM: arm64: vgic-debug: Use an xarray mark for debug iterator") Signed-off-by: Zenghui Yu Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240807052024.2084-1-yuzenghui@huawei.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bcbc8c986b1d..bc74d06398ef 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -45,7 +45,8 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && + iter->nr_lpis) { if (iter->lpi_idx < iter->nr_lpis) xa_find_after(&dist->lpi_xa, &iter->intid, VGIC_LPI_MAX_INTID, @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - iter->lpi_idx > iter->nr_lpis; + (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) From 7e814a20f6da2bd2044b1a4682dd92a6f0df5a92 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 22 Jul 2024 17:33:11 +0100 Subject: [PATCH 0599/1523] KVM: arm64: Tidying up PAuth code in KVM Tidy up some of the PAuth trapping code to clear up some comments and avoid clang/checkpatch warnings. Also, don't bother setting PAuth HCR_EL2 bits in pKVM, since it's handled by the hypervisor. Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20240722163311.1493879-1-tabba@google.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_ptrauth.h | 2 +- arch/arm64/kvm/arm.c | 14 ++++---------- arch/arm64/kvm/hyp/include/hyp/switch.h | 1 - arch/arm64/kvm/hyp/nvhe/switch.c | 5 ++--- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index d81bac256abc..6199c9f7ec6e 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -104,7 +104,7 @@ alternative_else_nop_endif #define __ptrauth_save_key(ctxt, key) \ do { \ - u64 __val; \ + u64 __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 23e1fa56c02d..9bef7638342e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -522,10 +522,10 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) { - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu) && !is_protected_kvm_enabled()) { /* - * Either we're running running an L2 guest, and the API/APK - * bits come from L1's HCR_EL2, or API/APK are both set. + * Either we're running an L2 guest, and the API/APK bits come + * from L1's HCR_EL2, or API/APK are both set. */ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { u64 val; @@ -542,16 +542,10 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Save the host keys if there is any chance for the guest * to use pauth, as the entry code will reload the guest * keys in that case. - * Protected mode is the exception to that rule, as the - * entry into the EL2 code eagerly switch back and forth - * between host and hyp keys (and kvm_hyp_ctxt is out of - * reach anyway). */ - if (is_protected_kvm_enabled()) - return; - if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); ptrauth_save_keys(ctxt); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f59ccfe11ab9..37ff87d782b6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6af179c6356d..8f5c56d5b1cd 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -173,9 +173,8 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { /* - * Make sure we handle the exit for workarounds and ptrauth - * before the pKVM handling, as the latter could decide to - * UNDEF. + * Make sure we handle the exit for workarounds before the pKVM + * handling, as the latter could decide to UNDEF. */ return (kvm_hyp_handle_sysreg(vcpu, exit_code) || kvm_handle_pvm_sysreg(vcpu, exit_code)); From ad518452fd263766946346324810f14bd8bb8b34 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Jul 2024 17:21:13 +0100 Subject: [PATCH 0600/1523] KVM: selftests: arm64: Correct feature test for S1PIE in get-reg-list The ID register for S1PIE is ID_AA64MMFR3_EL1.S1PIE which is bits 11:8 but get-reg-list uses a shift of 4, checking SCTLRX instead. Use a shift of 8 instead. Fixes: 5f0419a0083b ("KVM: selftests: get-reg-list: add Permission Indirection registers") Signed-off-by: Mark Brown Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20240731-kvm-arm64-fix-s1pie-test-v1-1-a9253f3b7db4@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d721760..4abebde78187 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -32,13 +32,13 @@ static struct feature_id_reg feat_id_regs[] = { { ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 }, { ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 } }; From 6dd1e4c045afa6a4ba5d46f044c83bd357c593c2 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 7 Aug 2024 17:00:56 +0800 Subject: [PATCH 0601/1523] selinux: add the processing of the failure of avc_add_xperms_decision() When avc_add_xperms_decision() fails, the information recorded by the new avc node is incomplete. In this case, the new avc node should be released instead of replacing the old avc node. Cc: stable@vger.kernel.org Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Suggested-by: Stephen Smalley Signed-off-by: Zhen Lei Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/avc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 7087cd2b802d..b49c44869dc4 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, node->ae.avd.auditdeny &= ~perms; break; case AVC_CALLBACK_ADD_XPERMS: - avc_add_xperms_decision(node, xpd); + rc = avc_add_xperms_decision(node, xpd); + if (rc) { + avc_node_kill(node); + goto out_unlock; + } break; } avc_node_replace(node, orig); From e037a26ead187901f83cad9c503ccece5ff6817a Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Sat, 6 Jul 2024 11:38:07 -0400 Subject: [PATCH 0602/1523] igc: Fix packet still tx after gate close by reducing i226 MAC retry buffer Testing uncovered that even when the taprio gate is closed, some packets still transmit. According to i225/6 hardware errata [1], traffic might overflow the planned QBV window. This happens because MAC maintains an internal buffer, primarily for supporting half duplex retries. Therefore, even when the gate closes, residual MAC data in the buffer may still transmit. To mitigate this for i226, reduce the MAC's internal buffer from 192 bytes to the recommended 88 bytes by modifying the RETX_CTL register value. This follows guidelines from: [1] Ethernet Controller I225/I22 Spec Update Rev 2.1 Errata Item 9: TSN: Packet Transmission Might Cross Qbv Window [2] I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control Note that the RETX_CTL register can't be used in TSN mode because half duplex feature cannot coexist with TSN. Test Steps: 1. Send taprio cmd to board A: tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 4 \ map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 \ base-time 0 \ sched-entry S 0x07 500000 \ sched-entry S 0x0f 500000 \ flags 0x2 \ txtime-delay 0 Note that for TC3, gate should open for 500us and close for another 500us. 3. Take tcpdump log on Board B. 4. Send udp packets via UDP tai app from Board A to Board B. 5. Analyze tcpdump log via wireshark log on Board B. Ensure that the total time from the first to the last packet received during one cycle for TC3 does not exceed 500us. Fixes: 43546211738e ("igc: Add new device ID's") Signed-off-by: Faizal Rahim Acked-by: Vinicius Costa Gomes Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 6 ++++ drivers/net/ethernet/intel/igc/igc_tsn.c | 34 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 5f92b3c7c3d4..511384f3ec5c 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -404,6 +404,12 @@ #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Retry Buffer Control */ +#define IGC_RETX_CTL 0x041C +#define IGC_RETX_CTL_WATERMARK_MASK 0xF +#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ +#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ + /* Transmit Scheduling Latency */ /* Latency between transmission scheduling (LaunchTime) and the time * the packet is transmitted to the network in nanosecond. diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 22cefb1eeedf..46d4c3275bbb 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -78,6 +78,15 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) wr32(IGC_GTXOFFSET, txoffset); } +static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl; + + retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; + wr32(IGC_RETX_CTL, retxctl); +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -91,6 +100,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + if (igc_is_device_id_i226(hw)) + igc_tsn_restore_retx_default(adapter); + tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); @@ -111,6 +123,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) return 0; } +/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes + * to 88 Bytes by setting RETX_CTL register using the recommendation from: + * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 + * Item 9: TSN: Packet Transmission Might Cross the Qbv Window + * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + */ +static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl, watermark; + + retxctl = rd32(IGC_RETX_CTL); + watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; + /* Set QBVFULLTH value using watermark and set QBVFULLEN */ + retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | + IGC_RETX_CTL_QBVFULLEN; + wr32(IGC_RETX_CTL, retxctl); +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -123,6 +154,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + if (igc_is_device_id_i226(hw)) + igc_tsn_set_retx_qbvfullthreshold(adapter); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; u32 txqctl = 0; From f8d6acaee9d35cbff3c3cfad94641666c596f8da Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Sun, 7 Jul 2024 08:53:16 -0400 Subject: [PATCH 0603/1523] igc: Fix qbv_config_change_errors logics When user issues these cmds: 1. Either a) or b) a) mqprio with hardware offload disabled b) taprio with txtime-assist feature enabled 2. etf 3. tc qdisc delete 4. taprio with base time in the past At step 4, qbv_config_change_errors wrongly increased by 1. Excerpt from IEEE 802.1Q-2018 8.6.9.3.1: "If AdminBaseTime specifies a time in the past, and the current schedule is running, then: Increment ConfigChangeError counter" qbv_config_change_errors should only increase if base time is in the past and no taprio is active. In user perspective, taprio was not active when first triggered at step 4. However, i225/6 reuses qbv for etf, so qbv is enabled with a dummy schedule at step 2 where it enters igc_tsn_enable_offload() and qbv_count got incremented to 1. At step 4, it enters igc_tsn_enable_offload() again, qbv_count is incremented to 2. Because taprio is running, tc_setup_type is TC_SETUP_QDISC_ETF and qbv_count > 1, qbv_config_change_errors value got incremented. This issue happens due to reliance on qbv_count field where a non-zero value indicates that taprio is running. But qbv_count increases regardless if taprio is triggered by user or by other tsn feature. It does not align with qbv_config_change_errors expectation where it is only concerned with taprio triggered by user. Fixing this by relocating the qbv_config_change_errors logic to igc_save_qbv_schedule(), eliminating reliance on qbv_count and its inaccuracies from i225/6's multiple uses of qbv feature for other TSN features. The new function created: igc_tsn_is_taprio_activated_by_user() uses taprio_offload_enable field to indicate that the current running taprio was triggered by user, instead of triggered by non-qbv feature like etf. Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter") Signed-off-by: Faizal Rahim Reviewed-by: Simon Horman Acked-by: Vinicius Costa Gomes Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++-- drivers/net/ethernet/intel/igc/igc_tsn.c | 16 ++++++++-------- drivers/net/ethernet/intel/igc/igc_tsn.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8daf938afc36..dfd6c00b4205 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6315,12 +6315,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; + igc_ptp_read(adapter, &now); + + if (igc_tsn_is_taprio_activated_by_user(adapter) && + is_base_time_past(qopt->base_time, &now)) + adapter->qbv_config_change_errors++; + adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; adapter->taprio_offload_enable = true; - igc_ptp_read(adapter, &now); - for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 46d4c3275bbb..8ed7b965484d 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -87,6 +87,14 @@ static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) wr32(IGC_RETX_CTL, retxctl); } +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + adapter->taprio_offload_enable; +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -296,14 +304,6 @@ skip_cbs: s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); - - /* Increase the counter if scheduling into the past while - * Gate Control List (GCL) is running. - */ - if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - (adapter->qbv_count > 1)) - adapter->qbv_config_change_errors++; } else { if (igc_is_device_id_i226(hw)) { ktime_t adjust_time, expires_time; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index b53e6af560b7..98ec845a86bf 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -7,5 +7,6 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); #endif /* _IGC_BASE_H */ From 0afeaeb5dae86aceded0d5f0c3a54d27858c0c6f Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Sun, 7 Jul 2024 08:53:17 -0400 Subject: [PATCH 0604/1523] igc: Fix reset adapter logics when tx mode change Following the "igc: Fix TX Hang issue when QBV Gate is close" changes, remaining issues with the reset adapter logic in igc_tsn_offload_apply() have been observed: 1. The reset adapter logics for i225 and i226 differ, although they should be the same according to the guidelines in I225/6 HW Design Section 7.5.2.1 on software initialization during tx mode changes. 2. The i225 resets adapter every time, even though tx mode doesn't change. This occurs solely based on the condition igc_is_device_id_i225() when calling schedule_work(). 3. i226 doesn't reset adapter for tsn->legacy tx mode changes. It only resets adapter for legacy->tsn tx mode transitions. 4. qbv_count introduced in the patch is actually not needed; in this context, a non-zero value of qbv_count is used to indicate if tx mode was unconditionally set to tsn in igc_tsn_enable_offload(). This could be replaced by checking the existing register IGC_TQAVCTRL_TRANSMIT_MODE_TSN bit. This patch resolves all issues and enters schedule_work() to reset the adapter only when changing tx mode. It also removes reliance on qbv_count. qbv_count field will be removed in a future patch. Test ran: 1. Verify reset adapter behaviour in i225/6: a) Enrol a new GCL Reset adapter observed (tx mode change legacy->tsn) b) Enrol a new GCL without deleting qdisc No reset adapter observed (tx mode remain tsn->tsn) c) Delete qdisc Reset adapter observed (tx mode change tsn->legacy) 2. Tested scenario from "igc: Fix TX Hang issue when QBV Gate is closed" to confirm it remains resolved. Fixes: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed") Signed-off-by: Faizal Rahim Reviewed-by: Simon Horman Acked-by: Vinicius Costa Gomes Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_tsn.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 8ed7b965484d..ada751430517 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -49,6 +49,13 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) return new_flags; } +static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); +} + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -365,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter) return err; } +static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) +{ + bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & + IGC_FLAG_TSN_ANY_ENABLED); + + return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || + (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); +} + int igc_tsn_offload_apply(struct igc_adapter *adapter) { - struct igc_hw *hw = &adapter->hw; - - /* Per I225/6 HW Design Section 7.5.2.1, transmit mode - * cannot be changed dynamically. Require reset the adapter. + /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change + * from legacy->tsn or tsn->legacy, then reset adapter is needed. */ if (netif_running(adapter->netdev) && - (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { + igc_tsn_will_tx_mode_change(adapter)) { schedule_work(&adapter->reset_task); return 0; } From 6c3fc0b1c3d073bd6fc3bf43dbd0e64240537464 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Sun, 7 Jul 2024 08:53:18 -0400 Subject: [PATCH 0605/1523] igc: Fix qbv tx latency by setting gtxoffset A large tx latency issue was discovered during testing when only QBV was enabled. The issue occurs because gtxoffset was not set when QBV is active, it was only set when launch time is active. The patch "igc: Correct the launchtime offset" only sets gtxoffset when the launchtime_enable field is set by the user. Enabling launchtime_enable ultimately sets the register IGC_TXQCTL_QUEUE_MODE_LAUNCHT (referred to as LaunchT in the SW user manual). Section 7.5.2.6 of the IGC i225/6 SW User Manual Rev 1.2.4 states: "The latency between transmission scheduling (launch time) and the time the packet is transmitted to the network is listed in Table 7-61." However, the patch misinterprets the phrase "launch time" in that section by assuming it specifically refers to the LaunchT register, whereas it actually denotes the generic term for when a packet is released from the internal buffer to the MAC transmit logic. This launch time, as per that section, also implicitly refers to the QBV gate open time, where a packet waits in the buffer for the QBV gate to open. Therefore, latency applies whenever QBV is in use. TSN features such as QBU and QAV reuse QBV, making the latency universal to TSN features. Discussed with i226 HW owner (Shalev, Avi) and we were in agreement that the term "launch time" used in Section 7.5.2.6 is not clear and can be easily misinterpreted. Avi will update this section to: "When TQAVCTRL.TRANSMIT_MODE = TSN, the latency between transmission scheduling and the time the packet is transmitted to the network is listed in Table 7-61." Fix this issue by using igc_tsn_is_tx_mode_in_tsn() as a condition to write to gtxoffset, aligning with the newly updated SW User Manual. Tested: 1. Enrol taprio on talker board base-time 0 cycle-time 1000000 flags 0x2 index 0 cmd S gatemask 0x1 interval1 index 0 cmd S gatemask 0x1 interval2 Note: interval1 = interval for a 64 bytes packet to go through interval2 = cycle-time - interval1 2. Take tcpdump on listener board 3. Use udp tai app on talker to send packets to listener 4. Check the timestamp on listener via wireshark Test Result: 100 Mbps: 113 ~193 ns 1000 Mbps: 52 ~ 84 ns 2500 Mbps: 95 ~ 223 ns Note that the test result is similar to the patch "igc: Correct the launchtime offset". Fixes: 790835fcc0cb ("igc: Correct the launchtime offset") Signed-off-by: Faizal Rahim Reviewed-by: Simon Horman Acked-by: Vinicius Costa Gomes Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index ada751430517..d68fa7f3d5f0 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -61,7 +61,7 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u16 txoffset; - if (!is_any_launchtime(adapter)) + if (!igc_tsn_is_tx_mode_in_tsn(adapter)) return; switch (adapter->link_speed) { From 541b80216cd1d511841c3c8d9559a7b13f4f79f2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:48 +0200 Subject: [PATCH 0606/1523] Bluetooth: hci_qca: don't call pwrseq_power_off() twice for QCA6390 Now that we call pwrseq_power_off() for all models that hold a valid power sequencing handle, we can remove the switch case for QCA_6390. The switch will now use the default label for this model but that's fine: if it has the BT-enable GPIO than we should use it. Fixes: eba1718717b0 ("Bluetooth: hci_qca: make pwrseq calls the default if available") Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca6466676902..a20dd5015346 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2187,10 +2187,6 @@ static void qca_power_shutdown(struct hci_uart *hu) } break; - case QCA_QCA6390: - pwrseq_power_off(qcadev->bt_power->pwrseq); - break; - default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } From f3660957303b822e5dd6e10fe9e1e19afc6d33de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:49 +0200 Subject: [PATCH 0607/1523] Bluetooth: hci_qca: fix QCA6390 support on non-DT platforms QCA6390 can albo be used on non-DT systems so we must not make the power sequencing the only option. Check if the serdev device consumes an OF node. If so: honor the new contract as per the DT bindings. If not: fall back to the previous behavior by falling through to the existing default label. Fixes: 9a15ce685706 ("Bluetooth: qca: use the power sequencer for QCA6390") Reported-by: Wren Turkal Closes: https://lore.kernel.org/linux-bluetooth/27e6a6c5-fb63-4219-be0b-eefa2c116e06@penguintechs.org/ Signed-off-by: Bartosz Golaszewski Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index a20dd5015346..2baed7d0f479 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2412,11 +2412,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) break; case QCA_QCA6390: - qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, - "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + if (dev_of_node(&serdev->dev)) { + qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, + "bluetooth"); + if (IS_ERR(qcadev->bt_power->pwrseq)) + return PTR_ERR(qcadev->bt_power->pwrseq); + break; + } + fallthrough; default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", From e1d28be268cfe978e19a18a3a9b37a4a7d37745e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:50 +0200 Subject: [PATCH 0608/1523] Bluetooth: hci_qca: fix a NULL-pointer derefence at shutdown Unlike qca_regulator_init(), qca_power_shutdown() may be called for QCA_ROME which does not have qcadev->bt_power assigned. Add a NULL-pointer check before dereferencing the struct qca_power pointer. Fixes: eba1718717b0 ("Bluetooth: hci_qca: make pwrseq calls the default if available") Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/linux-bluetooth/su3wp6s44hrxf4ijvsdfzbvv4unu4ycb7kkvwbx6ltdafkldir@4g7ydqm2ap5j/ Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 2baed7d0f479..45adc1560d94 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2160,7 +2160,7 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; - if (power->pwrseq) { + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); return; From c531e63871c0b50c8c4e62c048535a08886fba3e Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 31 Jul 2024 12:19:36 +0300 Subject: [PATCH 0609/1523] Bluetooth: l2cap: always unlock channel in l2cap_conless_channel() Add missing call to 'l2cap_chan_unlock()' on receive error handling path in 'l2cap_conless_channel()'. Fixes: a24cce144b98 ("Bluetooth: Fix reference counting of global L2CAP channels") Reported-by: syzbot+45ac74737e866894acb0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=45ac74737e866894acb0 Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c3c26bbb5dda..9988ba382b68 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6774,6 +6774,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { + l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } From b5431dc2803ac159d6d4645ae237d15c3cb252db Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 29 Jul 2024 21:58:10 +0200 Subject: [PATCH 0610/1523] Bluetooth: hci_sync: avoid dup filtering when passive scanning with adv monitor This restores behaviour (including the comment) from now-removed hci_request.c, and also matches existing code for active scanning. Without this, the duplicates filter is always active when passive scanning, which makes it impossible to work with devices that send nontrivial dynamic data in their advertisement reports. Fixes: abfeea476c68 ("Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY") Signed-off-by: Anton Khirnov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a31d39a821f4..e79cd40bd079 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3019,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; + + /* Disable duplicates filter when scanning for advertisement + * monitor for the following reasons. + * + * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm + * controllers ignore RSSI_Sampling_Period when the duplicates + * filter is enabled. + * + * For SW pattern filtering, when we're not doing interleaved + * scanning, it is necessary to disable duplicates filter, + * otherwise hosts can only receive one advertisement and it's + * impossible to know if a peer is still in range. + */ + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; From 11893e144ed75be55d99349760513ca104781fc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2024 15:06:45 -0600 Subject: [PATCH 0611/1523] io_uring/net: ensure expanded bundle recv gets marked for cleanup If the iovec inside the kmsg isn't already allocated AND one gets expanded beyond the fixed size, then the request may not already have been marked for cleanup. Ensure that it is. Cc: stable@vger.kernel.org Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv") Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index 594490a1389b..97a48408cec3 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1094,6 +1094,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } else { void __user *buf; From 70ed519ed59da3a92c3acedeb84a30e5a66051ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2024 15:08:17 -0600 Subject: [PATCH 0612/1523] io_uring/net: ensure expanded bundle send gets marked for cleanup If the iovec inside the kmsg isn't already allocated AND one gets expanded beyond the fixed size, then the request may not already have been marked for cleanup. Ensure that it is. Cc: stable@vger.kernel.org Fixes: a05d1f625c7a ("io_uring/net: support bundles for send") Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index 97a48408cec3..050bea5e7256 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -623,6 +623,7 @@ retry_bundle: if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } From 8fe8ac24adcd76b12edbfdefa078567bfff117d4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2024 15:09:33 -0600 Subject: [PATCH 0613/1523] io_uring/net: don't pick multiple buffers for non-bundle send If a send is issued marked with IOSQE_BUFFER_SELECT for selecting a buffer, unless it's a bundle, it should not select multiple buffers. Cc: stable@vger.kernel.org Fixes: a05d1f625c7a ("io_uring/net: support bundles for send") Signed-off-by: Jens Axboe --- io_uring/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 050bea5e7256..d08abcca89cc 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -601,17 +601,18 @@ retry_bundle: .iovs = &kmsg->fast_iov, .max_len = INT_MAX, .nr_iovs = 1, - .mode = KBUF_MODE_EXPAND, }; if (kmsg->free_iov) { arg.nr_iovs = kmsg->free_iov_nr; arg.iovs = kmsg->free_iov; - arg.mode |= KBUF_MODE_FREE; + arg.mode = KBUF_MODE_FREE; } if (!(sr->flags & IORING_RECVSEND_BUNDLE)) arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; ret = io_buffers_select(req, &arg, issue_flags); if (unlikely(ret < 0)) From b1560408692cd0ab0370cfbe9deb03ce97ab3f6d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 30 Jul 2024 11:06:57 -0400 Subject: [PATCH 0614/1523] tracing: Have format file honor EVENT_FILE_FL_FREED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When eventfs was introduced, special care had to be done to coordinate the freeing of the file meta data with the files that are exposed to user space. The file meta data would have a ref count that is set when the file is created and would be decremented and freed after the last user that opened the file closed it. When the file meta data was to be freed, it would set a flag (EVENT_FILE_FL_FREED) to denote that the file is freed, and any new references made (like new opens or reads) would fail as it is marked freed. This allowed other meta data to be freed after this flag was set (under the event_mutex). All the files that were dynamically created in the events directory had a pointer to the file meta data and would call event_release() when the last reference to the user space file was closed. This would be the time that it is safe to free the file meta data. A shortcut was made for the "format" file. It's i_private would point to the "call" entry directly and not point to the file's meta data. This is because all format files are the same for the same "call", so it was thought there was no reason to differentiate them. The other files maintain state (like the "enable", "trigger", etc). But this meant if the file were to disappear, the "format" file would be unaware of it. This caused a race that could be trigger via the user_events test (that would create dynamic events and free them), and running a loop that would read the user_events format files: In one console run: # cd tools/testing/selftests/user_events # while true; do ./ftrace_test; done And in another console run: # cd /sys/kernel/tracing/ # while true; do cat events/user_events/__test_event/format; done 2>/dev/null With KASAN memory checking, it would trigger a use-after-free bug report (which was a real bug). This was because the format file was not checking the file's meta data flag "EVENT_FILE_FL_FREED", so it would access the event that the file meta data pointed to after the event was freed. After inspection, there are other locations that were found to not check the EVENT_FILE_FL_FREED flag when accessing the trace_event_file. Add a new helper function: event_file_file() that will make sure that the event_mutex is held, and will return NULL if the trace_event_file has the EVENT_FILE_FL_FREED flag set. Have the first reference of the struct file pointer use event_file_file() and check for NULL. Later uses can still use the event_file_data() helper function if the event_mutex is still held and was not released since the event_file_file() call. Link: https://lore.kernel.org/all/20240719204701.1605950-1-minipli@grsecurity.net/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Ajay Kaher Cc: Ilkka Naulapää Cc: Linus Torvalds Cc: Al Viro Cc: Dan Carpenter Cc: Beau Belgrave Cc: Florian Fainelli Cc: Alexey Makhalov Cc: Vasavi Sirnapalli Link: https://lore.kernel.org/20240730110657.3b69d3c1@gandalf.local.home Fixes: b63db58e2fa5d ("eventfs/tracing: Add callback for release of an eventfs_inode") Reported-by: Mathias Krause Tested-by: Mathias Krause Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.h | 23 ++++++++++++++++++++ kernel/trace/trace_events.c | 33 +++++++++++++++++------------ kernel/trace/trace_events_hist.c | 4 ++-- kernel/trace/trace_events_inject.c | 2 +- kernel/trace/trace_events_trigger.c | 6 +++--- 5 files changed, 49 insertions(+), 19 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8783bebd0562..bd3e3069300e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1634,6 +1634,29 @@ static inline void *event_file_data(struct file *filp) extern struct mutex event_mutex; extern struct list_head ftrace_events; +/* + * When the trace_event_file is the filp->i_private pointer, + * it must be taken under the event_mutex lock, and then checked + * if the EVENT_FILE_FL_FREED flag is set. If it is, then the + * data pointed to by the trace_event_file can not be trusted. + * + * Use the event_file_file() to access the trace_event_file from + * the filp the first time under the event_mutex and check for + * NULL. If it is needed to be retrieved again and the event_mutex + * is still held, then the event_file_data() can be used and it + * is guaranteed to be valid. + */ +static inline struct trace_event_file *event_file_file(struct file *filp) +{ + struct trace_event_file *file; + + lockdep_assert_held(&event_mutex); + file = READ_ONCE(file_inode(filp)->i_private); + if (!file || file->flags & EVENT_FILE_FL_FREED) + return NULL; + return file; +} + extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6ef29eba90ce..f08fbaf8cad6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1386,12 +1386,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, char buf[4] = "0"; mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); - if (!file || flags & EVENT_FILE_FL_FREED) + if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && @@ -1424,8 +1424,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 1: ret = -ENODEV; mutex_lock(&event_mutex); - file = event_file_data(filp); - if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) { + file = event_file_file(filp); + if (likely(file)) { ret = tracing_update_buffers(file->tr); if (ret < 0) { mutex_unlock(&event_mutex); @@ -1540,7 +1540,8 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -1572,7 +1573,8 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; @@ -1627,12 +1629,14 @@ static int f_show(struct seq_file *m, void *v) static void *f_start(struct seq_file *m, loff_t *pos) { + struct trace_event_file *file; void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - if (!event_file_data(m->private)) + file = event_file_file(m->private); + if (!file) return ERR_PTR(-ENODEV); while (l < *pos && p) @@ -1706,8 +1710,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file && !(file->flags & EVENT_FILE_FL_FREED)) + file = event_file_file(filp); + if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); @@ -1736,9 +1740,13 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return PTR_ERR(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file) - err = apply_event_filter(file, buf); + file = event_file_file(filp); + if (file) { + if (file->flags & EVENT_FILE_FL_FREED) + err = -ENODEV; + else + err = apply_event_filter(file, buf); + } mutex_unlock(&event_mutex); kfree(buf); @@ -2485,7 +2493,6 @@ static int event_callback(const char *name, umode_t *mode, void **data, if (strcmp(name, "format") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_event_format_fops; - *data = call; return 1; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6ece1308d36a..5f9119eb7c67 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5601,7 +5601,7 @@ static int hist_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; @@ -5880,7 +5880,7 @@ static int hist_debug_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 8650562bdaa9..a8f076809db4 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -299,7 +299,7 @@ event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, strim(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (file) { call = file->event_call; size = parse_entry(buf, call, &entry); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4bec043c8690..a5e3d6acf1e1 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -159,7 +159,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos) /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) return ERR_PTR(-ENODEV); @@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) mutex_lock(&event_mutex); - if (unlikely(!event_file_data(file))) { + if (unlikely(!event_file_file(file))) { mutex_unlock(&event_mutex); return -ENODEV; } @@ -293,7 +293,7 @@ static ssize_t event_trigger_regex_write(struct file *file, strim(buf); mutex_lock(&event_mutex); - event_file = event_file_data(file); + event_file = event_file_file(file); if (unlikely(!event_file)) { mutex_unlock(&event_mutex); kfree(buf); From 6e2fdceffdc6bd7b8ba314a1d1b976721533c8f9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Jul 2024 14:42:08 -0400 Subject: [PATCH 0615/1523] tracing: Use refcount for trace_event_file reference counter Instead of using an atomic counter for the trace_event_file reference counter, use the refcount interface. It has various checks to make sure the reference counting is correct, and will warn if it detects an error (like refcount_inc() on '0'). Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20240726144208.687cce24@rorschach.local.home Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 2 +- kernel/trace/trace_events.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626..fed58e54f15e 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -680,7 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; - atomic_t ref; /* ref count for opened files */ + refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f08fbaf8cad6..7266ec2a4eea 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,18 +992,18 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) void event_file_get(struct trace_event_file *file) { - atomic_inc(&file->ref); + refcount_inc(&file->ref); } void event_file_put(struct trace_event_file *file) { - if (WARN_ON_ONCE(!atomic_read(&file->ref))) { + if (WARN_ON_ONCE(!refcount_read(&file->ref))) { if (file->flags & EVENT_FILE_FL_FREED) kmem_cache_free(file_cachep, file); return; } - if (atomic_dec_and_test(&file->ref)) { + if (refcount_dec_and_test(&file->ref)) { /* Count should only go to zero when it is freed */ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) return; @@ -3003,7 +3003,7 @@ trace_create_new_event(struct trace_event_call *call, atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); - event_file_get(file); + refcount_set(&file->ref, 1); return file; } From f2aaed194a54d78c307c44d1829c7e1ba67e9ba5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 4 Dec 2023 16:35:04 -0500 Subject: [PATCH 0616/1523] drm/amd/display: Replace dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd In the commit c2cec7a872b6 ("drm/amd/display: Wake DMCUB before sending a command for replay feature"), replaced dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd in multiple areas, but due to merge issues the replacement of this function in the dmub_replay_copy_settings was missed. This commit replaces the old dm_execute_dmub_cmd with dc_wake_and_execute_dmub_cmd. Fixes: 3601a35a2e9d ("drm/amd/display: Wake DMCUB before sending a command for replay feature") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit 6cc213b9aa34bc3213e20f9256345c5cc1495b0b) --- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..4d960dc5ce89 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } From 4df19b14f6311c860223f349356da2c08ae92101 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 16:53:42 -0600 Subject: [PATCH 0617/1523] drm/amd/display: Add missing DET segments programming The commit 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") introduced a code refactor for DCHUB, but during the merge process into amd-staging-drm-next, the program det segments were removed. This commit adds the DET segment programming for DCN35. Fixes: 5034b935f62a ("drm/amd/display: Modify DHCUB waterwark structures and functions") Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit 675d9ac9d0de765531e94f9fdc536989a997a324) --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..f115c7a285e7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From 437cf8bb0e1a56fa0491610706ddafd04b3b1a9b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:40:22 -0600 Subject: [PATCH 0618/1523] drm/amd/display: Add dcc propagation value Initialize the field dcc_meta_propagation_delay_us with 10 ms. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit 74bad61c5d83f5af8a855c8b7dc8e20377c74d46) --- drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..34b02147881d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, From eb880ffddd5da8a014669deaf7bb3e7e9ecd06f4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 22 Jul 2024 20:33:40 -0600 Subject: [PATCH 0619/1523] drm/amd/display: Add missing mcache registers Add missing register programming for mcache in DCN401. Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit a00a177055cced5cd2bb057a1ace9a95a286bc49) --- .../gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..bb46f30d11d0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ From 5f142b3826a0d223e947501fa9fe4ca912d9db26 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Mon, 29 Jul 2024 09:24:20 +0800 Subject: [PATCH 0620/1523] drm/amd/pm: update powerplay structure on smu v14.0.2/3 update powerplay structure on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit f905d0c328b440fabaaf265350bf4187ccd5f59b) --- .../amd/pm/swsmu/inc/smu_v14_0_2_pptable.h | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif From aa5c9701ebd654284c55eba30d0a38eec49f2946 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 31 Jul 2024 11:58:46 +0800 Subject: [PATCH 0621/1523] drm/amdgpu: force to use legacy inv in mmhub MMHUB v4.1.0 only support fixed cache mode, so only use legacy invalidation accordingly. Signed-off-by: Likun Gao Reviewed-by: Frank Min Signed-off-by: Alex Deucher (cherry picked from commit 9192c7613ca53572908ba23a4c3f39c7f8ba8021) --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); From 07cd40a0c9843653451f9355170770f6e42489c8 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 24 Jul 2024 09:29:13 -0600 Subject: [PATCH 0622/1523] drm/amd/display: Add missing DCN314 to the DML Makefile Include display_mode_vba_314 and display_rq_dlg_calc_314 to the dml Makefile. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 089525632d40bbfa507f224c20563529b3f8a4b3) --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) From e8097cf1ce9e7ad8516ee95f06f7baaa31506035 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 25 Jul 2024 16:41:38 -0600 Subject: [PATCH 0623/1523] drm/amd/display: Add missing program DET segment call to pipe init Add a callback that program the DET segment when initializing pipes. Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit e1dbe625d6ac2821eb29e087db46cb539d8079f0) --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..ff03b1d98aa7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } From d507ae0dc83b7f43cdf6760b8f1a30aac4fc405a Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: [PATCH 0624/1523] drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit db65eb46de135338d6177f8853e0fd208f19d63e) --- drivers/gpu/drm/drm_buddy.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- include/drm/drm_buddy.h | 2 ++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); From 8ff3bb44cc94b74ebd57fe3be9dedb98dbf92771 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 1 Aug 2024 10:47:16 +0800 Subject: [PATCH 0625/1523] drm/amdgpu: add golden setting for gc v12 Adding Manual GDB golden setting for gc v12 revision 0 ASIC. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit c9875d0a789060facc274dee0d4eb6500d471772) --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..506fa8003388 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) From 829798c789f567ef6ba4b084c15b7b5f3bd98d51 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 7 Mar 2024 19:04:31 +0000 Subject: [PATCH 0626/1523] drm/amdgpu: Forward soft recovery errors to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we discussed before[1], soft recovery should be forwarded to userspace, or we can get into a really bad state where apps will keep submitting hanging command buffers cascading us to a hard reset. 1: https://lore.kernel.org/all/bf23d5ed-9a6b-43e7-84ee-8cbfd0d60f18@froggi.es/ Signed-off-by: Joshua Ashton Reviewed-by: Marek Olšák Signed-off-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 434967aadbbbe3ad9103cc29e9a327de20fdba01) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..908e13455152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) From 5d687a67fda6389b9214815aa0d0adcc44302dc5 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Fri, 2 Aug 2024 11:15:11 +0800 Subject: [PATCH 0627/1523] drm/amdgpu: change non-dcc buffer copy configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without setting cpv bit and 7th ib dw, non-dcc buffer copy will have random corruption So set the cpv bit and clear the 7th ib dw for copy non-dcc buffers Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 5aacf8917fde5bc2a640f3cd49130c0e2e85e726) --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..7e4282609f51 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** From 50e376f1fe3bf571d0645ddf48ad37eb58323919 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 12 Jul 2024 16:30:03 -0400 Subject: [PATCH 0628/1523] drm/amd/display: Skip Recompute DSC Params if no Stream on Link [why] Encounter NULL pointer dereference uner mst + dsc setup. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 [how] dsc recompute should be skipped if no mode change detected on the new request. If detected, keep checking whether the stream is already on current state or not. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 8151a6c13111b465dbabe07c19f572f7cbd16fef) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..915eb2c08ece 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1270,6 +1270,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ From 4a5ad08f537703c35cf7cc29845381805c891d9b Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Sat, 3 Aug 2024 21:30:18 +0530 Subject: [PATCH 0629/1523] drm/amdgpu: Add address alignment support to DCC buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add address alignment support to the DCC VRAM buffers. v2: - adjust size based on the max_texture_channel_caches values only for GFX12 DCC buffers. - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only for DCC buffers. - roundup non power of two DCC buffer adjusted size to nearest power of two number as the buddy allocator does not support non power of two alignments. This applies only to the contiguous DCC buffers. v3:(Alex) - rewrite the max texture channel caches comparison code in an algorithmic way to determine the alignment size. v4:(Alex) - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c and add a new gmc func callback for dcc alignment. If the callback is non-NULL, call it to get the alignment, otherwise, use the default. v5:(Alex) - Set the Alignment to a default value if the callback doesn't exist. - Add the callback to amdgpu_gmc_funcs. v6: - Fix checkpatch warning reported by Intel CI. v7:(Christian) - remove the AMDGPU_GEM_CREATE_GFX12_DCC flag and keep a flag that checks the BO pinning and for a specific hw generation. v8:(Christian) - move this check into gmc_v12_0_get_dcc_alignment. v9: - Fix 32bit build errors Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Reviewed-by: Frank Min Signed-off-by: Alex Deucher (cherry picked from commit aa94b623cb9233b91ed342dd87ecd62e56ff4938) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 35 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 18 ++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..b2c94f12da9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,18 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +533,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +568,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..26efce9aa410 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) From 7fc5f252c0d21b7b89720386344b614733edab32 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Thu, 1 Aug 2024 12:20:18 +0800 Subject: [PATCH 0630/1523] drm/amdgpu: correct sdma7 max dw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit correct sdma7 max dw into 8 Signed-off-by: Frank Min Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 86598c3819fdc70e59d28221bfa7bc36e9f5777e) --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 7e4282609f51..ecee9e7d7e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1617,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, From 6ad9dafba19f15a64f71c2e1a9e3b6932f96628e Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 5 Aug 2024 19:17:04 +0530 Subject: [PATCH 0631/1523] drm/amdgpu: Add DCC GFX12 flag to enable address alignment We require this flag AMDGPU_GEM_CREATE_GFX12_DCC or any other kernel level GFX12 DCC flag to differentiate the DCC buffers and other pinned display buffers(which has TTM_PL_FLAG_CONTIGUOUS enabled). If we use the TTM_PL_FLAG_CONTIGUOUS flag for DCC buffers, we may over allocate for all the pinned display buffers unnecessarily that leads to memory allocation failure. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 46142cc1b9272d664e0258e105b537735bfeeccc) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index b2c94f12da9e..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -512,7 +512,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - if (adev->gmc.gmc_funcs->get_dcc_alignment) + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); remaining_size = (u64)vres->base.size; From 730bbfaf7d4890bd99e637db7767dc68cfeb24e7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 4 Aug 2024 13:36:11 +0200 Subject: [PATCH 0632/1523] spi: spi-fsl-lpspi: Fix scldiv calculation The effective SPI clock frequency should never exceed speed_hz otherwise this might result in undefined behavior of the SPI device. Currently the scldiv calculation could violate this constraint. For the example parameters perclk_rate = 24 MHz and speed_hz = 7 MHz, the function fsl_lpspi_set_bitrate will determine perscale = 0 and scldiv = 1, which is a effective SPI clock of 8 MHz. So fix this by rounding up the quotient of perclk_rate and speed_hz. While this never change within the loop, we can pull this out. Fixes: 5314987de5e5 ("spi: imx: add lpspi bus driver") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20240804113611.83613-1-wahrenst@gmx.net Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 32baa14dfd83..be261ac09df8 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -296,7 +296,7 @@ static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; - unsigned int perclk_rate, scldiv; + unsigned int perclk_rate, scldiv, div; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); @@ -313,8 +313,10 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) return -EINVAL; } + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); + for (prescale = 0; prescale < 8; prescale++) { - scldiv = perclk_rate / config.speed_hz / (1 << prescale) - 2; + scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; break; From 0df2ac59bebfac221463ef57ed3554899b41d75f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 7 Aug 2024 13:51:38 +0200 Subject: [PATCH 0633/1523] tracefs: Fix inode allocation The leading comment above alloc_inode_sb() is pretty explicit about it: /* * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ Switch tracefs over to alloc_inode_sb() to make sure inodes are properly linked. Cc: Ajay Kaher Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Al Viro Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20240807115143.45927-2-minipli@grsecurity.net Fixes: ba37ff75e04b ("eventfs: Implement tracefs_inode_cache") Signed-off-by: Mathias Krause Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1028ab6d9a74..21a7e51fc3c1 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) struct tracefs_inode *ti; unsigned long flags; - ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; From 12c20c65d0460cf34f9a665d8f0c0d77d45a3829 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 23 Jul 2024 14:25:21 +0200 Subject: [PATCH 0634/1523] eventfs: Don't return NULL in eventfs_create_dir() Commit 77a06c33a22d ("eventfs: Test for ei->is_freed when accessing ei->dentry") added another check, testing if the parent was freed after we released the mutex. If so, the function returns NULL. However, all callers expect it to either return a valid pointer or an error pointer, at least since commit 5264a2f4bb3b ("tracing: Fix a NULL vs IS_ERR() bug in event_subsystem_dir()"). Returning NULL will therefore fail the error condition check in the caller. Fix this by substituting the NULL return value with a fitting error pointer. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: stable@vger.kernel.org Fixes: 77a06c33a22d ("eventfs: Test for ei->is_freed when accessing ei->dentry") Link: https://lore.kernel.org/20240723122522.2724-1-minipli@grsecurity.net Reviewed-by: Dan Carpenter Reviewed-by: Ajay Kaher Signed-off-by: Mathias Krause Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5d88c184f0fc..a9c28a1d5dc8 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); - ei = NULL; + ei = ERR_PTR(-EBUSY); } return ei; } From 8e556432477e97ad6179c61b61a32bf5f1af2355 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 23 Jul 2024 23:07:53 +0200 Subject: [PATCH 0635/1523] eventfs: Use SRCU for freeing eventfs_inodes To mirror the SRCU lock held in eventfs_iterate() when iterating over eventfs inodes, use call_srcu() to free them too. This was accidentally(?) degraded to RCU in commit 43aa6f97c2d0 ("eventfs: Get rid of dentry pointers without refcounts"). Cc: Ajay Kaher Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20240723210755.8970-1-minipli@grsecurity.net Fixes: 43aa6f97c2d0 ("eventfs: Get rid of dentry pointers without refcounts") Signed-off-by: Mathias Krause Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index a9c28a1d5dc8..01e99e98457d 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -112,7 +112,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - call_rcu(&ei->rcu, free_ei_rcu); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) From 604b72b32522d548f855ed82842d2e49bf384edb Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Sat, 3 Aug 2024 15:09:26 +0200 Subject: [PATCH 0636/1523] function_graph: Fix the ret_stack used by ftrace_graph_ret_addr() When ftrace_graph_ret_addr() is invoked to convert a found stack return address to its original value, the function can end up producing the following crash: [ 95.442712] BUG: kernel NULL pointer dereference, address: 0000000000000028 [ 95.442720] #PF: supervisor read access in kernel mode [ 95.442724] #PF: error_code(0x0000) - not-present page [ 95.442727] PGD 0 P4D 0- [ 95.442731] Oops: Oops: 0000 [#1] PREEMPT SMP PTI [ 95.442736] CPU: 1 UID: 0 PID: 2214 Comm: insmod Kdump: loaded Tainted: G OE K 6.11.0-rc1-default #1 67c62a3b3720562f7e7db5f11c1fdb40b7a2857c [ 95.442747] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE, [K]=LIVEPATCH [ 95.442750] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [ 95.442754] RIP: 0010:ftrace_graph_ret_addr+0x42/0xc0 [ 95.442766] Code: [...] [ 95.442773] RSP: 0018:ffff979b80ff7718 EFLAGS: 00010006 [ 95.442776] RAX: ffffffff8ca99b10 RBX: ffff979b80ff7760 RCX: ffff979b80167dc0 [ 95.442780] RDX: ffffffff8ca99b10 RSI: ffff979b80ff7790 RDI: 0000000000000005 [ 95.442783] RBP: 0000000000000001 R08: 0000000000000005 R09: 0000000000000000 [ 95.442786] R10: 0000000000000005 R11: 0000000000000000 R12: ffffffff8e9491e0 [ 95.442790] R13: ffffffff8d6f70f0 R14: ffff979b80167da8 R15: ffff979b80167dc8 [ 95.442793] FS: 00007fbf83895740(0000) GS:ffff8a0afdd00000(0000) knlGS:0000000000000000 [ 95.442797] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 95.442800] CR2: 0000000000000028 CR3: 0000000005070002 CR4: 0000000000370ef0 [ 95.442806] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 95.442809] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 95.442816] Call Trace: [ 95.442823] [ 95.442896] unwind_next_frame+0x20d/0x830 [ 95.442905] arch_stack_walk_reliable+0x94/0xe0 [ 95.442917] stack_trace_save_tsk_reliable+0x7d/0xe0 [ 95.442922] klp_check_and_switch_task+0x55/0x1a0 [ 95.442931] task_call_func+0xd3/0xe0 [ 95.442938] klp_try_switch_task.part.5+0x37/0x150 [ 95.442942] klp_try_complete_transition+0x79/0x2d0 [ 95.442947] klp_enable_patch+0x4db/0x890 [ 95.442960] do_one_initcall+0x41/0x2e0 [ 95.442968] do_init_module+0x60/0x220 [ 95.442975] load_module+0x1ebf/0x1fb0 [ 95.443004] init_module_from_file+0x88/0xc0 [ 95.443010] idempotent_init_module+0x190/0x240 [ 95.443015] __x64_sys_finit_module+0x5b/0xc0 [ 95.443019] do_syscall_64+0x74/0x160 [ 95.443232] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 95.443236] RIP: 0033:0x7fbf82f2c709 [ 95.443241] Code: [...] [ 95.443247] RSP: 002b:00007fffd5ea3b88 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 95.443253] RAX: ffffffffffffffda RBX: 000056359c48e750 RCX: 00007fbf82f2c709 [ 95.443257] RDX: 0000000000000000 RSI: 000056356ed4efc5 RDI: 0000000000000003 [ 95.443260] RBP: 000056356ed4efc5 R08: 0000000000000000 R09: 00007fffd5ea3c10 [ 95.443263] R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 [ 95.443267] R13: 000056359c48e6f0 R14: 0000000000000000 R15: 0000000000000000 [ 95.443272] [ 95.443274] Modules linked in: [...] [ 95.443385] Unloaded tainted modules: intel_uncore_frequency(E):1 isst_if_common(E):1 skx_edac(E):1 [ 95.443414] CR2: 0000000000000028 The bug can be reproduced with kselftests: cd linux/tools/testing/selftests make TARGETS='ftrace livepatch' (cd ftrace; ./ftracetest test.d/ftrace/fgraph-filter.tc) (cd livepatch; ./test-livepatch.sh) The problem is that ftrace_graph_ret_addr() is supposed to operate on the ret_stack of a selected task but wrongly accesses the ret_stack of the current task. Specifically, the above NULL dereference occurs when task->curr_ret_stack is non-zero, but current->ret_stack is NULL. Correct ftrace_graph_ret_addr() to work with the right ret_stack. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Reported-by: Miroslav Benes Link: https://lore.kernel.org/20240803131211.17255-1-petr.pavlu@suse.com Fixes: 7aa1eaef9f42 ("function_graph: Allow multiple users to attach to function graph") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index fc205ad167a9..d1d5ea2d0a1b 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -902,7 +902,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, i = *idx ? : task->curr_ret_stack; while (i > 0) { - ret_stack = get_ret_stack(current, i, &i); + ret_stack = get_ret_stack(task, i, &i); if (!ret_stack) break; /* From bcf86c01ca4676316557dd482c8416ece8c2e143 Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Mon, 5 Aug 2024 13:59:22 +0800 Subject: [PATCH 0637/1523] tracing: Fix overflow in get_free_elt() "tracing_map->next_elt" in get_free_elt() is at risk of overflowing. Once it overflows, new elements can still be inserted into the tracing_map even though the maximum number of elements (`max_elts`) has been reached. Continuing to insert elements after the overflow could result in the tracing_map containing "tracing_map->max_size" elements, leaving no empty entries. If any attempt is made to insert an element into a full tracing_map using `__tracing_map_insert()`, it will cause an infinite loop with preemption disabled, leading to a CPU hang problem. Fix this by preventing any further increments to "tracing_map->next_elt" once it reaches "tracing_map->max_elt". Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Fixes: 08d43a5fa063e ("tracing: Add lock-free tracing_map") Co-developed-by: Cheng-Jui Wang Link: https://lore.kernel.org/20240805055922.6277-1-Tze-nan.Wu@mediatek.com Signed-off-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/tracing_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index a4dcf0f24352..3a56e7c8aa4f 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map) struct tracing_map_elt *elt = NULL; int idx; - idx = atomic_inc_return(&map->next_elt); + idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); if (idx < map->max_elts) { elt = *(TRACING_MAP_ELT(map->elts, idx)); if (map->ops && map->ops->elt_init) @@ -699,7 +699,7 @@ void tracing_map_clear(struct tracing_map *map) { unsigned int i; - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); atomic64_set(&map->hits, 0); atomic64_set(&map->drops, 0); @@ -783,7 +783,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits, map->map_bits = map_bits; map->max_elts = (1 << map_bits); - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); map->map_size = (1 << (map_bits + 1)); map->ops = ops; From 58f7e4d7ba32758b861807e77535853cacc1f426 Mon Sep 17 00:00:00 2001 From: Jianhui Zhou <912460177@qq.com> Date: Mon, 5 Aug 2024 19:36:31 +0800 Subject: [PATCH 0638/1523] ring-buffer: Remove unused function ring_buffer_nr_pages() Because ring_buffer_nr_pages() is not an inline function and user accesses buffer->buffers[cpu]->nr_pages directly, the function ring_buffer_nr_pages is removed. Signed-off-by: Jianhui Zhou <912460177@qq.com> Link: https://lore.kernel.org/tencent_F4A7E9AB337F44E0F4B858D07D19EF460708@qq.com Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 1 - kernel/trace/ring_buffer.c | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471e..fd35d4ec12e1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); struct buffer_data_read_page; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28853966aa9a..cebd879a30cb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -692,18 +692,6 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, return ts; } -/** - * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer - * @buffer: The ring_buffer to get the number of pages from - * @cpu: The cpu of the ring_buffer to get the number of pages from - * - * Returns the number of pages used by a per_cpu buffer of the ring buffer. - */ -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) -{ - return buffer->buffers[cpu]->nr_pages; -} - /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from From 0b6743bd60a56a701070b89fb80c327a44b7b3e2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 Aug 2024 18:54:02 -0400 Subject: [PATCH 0639/1523] tracefs: Use generic inode RCU for synchronizing freeing With structure layout randomization enabled for 'struct inode' we need to avoid overlapping any of the RCU-used / initialized-only-once members, e.g. i_lru or i_sb_list to not corrupt related list traversals when making use of the rcu_head. For an unlucky structure layout of 'struct inode' we may end up with the following splat when running the ftrace selftests: [<...>] list_del corruption, ffff888103ee2cb0->next (tracefs_inode_cache+0x0/0x4e0 [slab object]) is NULL (prev is tracefs_inode_cache+0x78/0x4e0 [slab object]) [<...>] ------------[ cut here ]------------ [<...>] kernel BUG at lib/list_debug.c:54! [<...>] invalid opcode: 0000 [#1] PREEMPT SMP KASAN [<...>] CPU: 3 PID: 2550 Comm: mount Tainted: G N 6.8.12-grsec+ #122 ed2f536ca62f28b087b90e3cc906a8d25b3ddc65 [<...>] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [<...>] RIP: 0010:[] __list_del_entry_valid_or_report+0x138/0x3e0 [<...>] Code: 48 b8 99 fb 65 f2 ff ff ff ff e9 03 5c d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff e9 33 5a d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff <0f> 0b 4c 89 e9 48 89 ea 48 89 ee 48 c7 c7 60 8f dd 89 31 c0 e8 2f [<...>] RSP: 0018:fffffe80416afaf0 EFLAGS: 00010283 [<...>] RAX: 0000000000000098 RBX: ffff888103ee2cb0 RCX: 0000000000000000 [<...>] RDX: ffffffff84655fe8 RSI: ffffffff89dd8b60 RDI: 0000000000000001 [<...>] RBP: ffff888103ee2cb0 R08: 0000000000000001 R09: fffffbd0082d5f25 [<...>] R10: fffffe80416af92f R11: 0000000000000001 R12: fdf99c16731d9b6d [<...>] R13: 0000000000000000 R14: ffff88819ad4b8b8 R15: 0000000000000000 [<...>] RBX: tracefs_inode_cache+0x0/0x4e0 [slab object] [<...>] RDX: __list_del_entry_valid_or_report+0x108/0x3e0 [<...>] RSI: __func__.47+0x4340/0x4400 [<...>] RBP: tracefs_inode_cache+0x0/0x4e0 [slab object] [<...>] RSP: process kstack fffffe80416afaf0+0x7af0/0x8000 [mount 2550 2550] [<...>] R09: kasan shadow of process kstack fffffe80416af928+0x7928/0x8000 [mount 2550 2550] [<...>] R10: process kstack fffffe80416af92f+0x792f/0x8000 [mount 2550 2550] [<...>] R14: tracefs_inode_cache+0x78/0x4e0 [slab object] [<...>] FS: 00006dcb380c1840(0000) GS:ffff8881e0600000(0000) knlGS:0000000000000000 [<...>] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [<...>] CR2: 000076ab72b30e84 CR3: 000000000b088004 CR4: 0000000000360ef0 shadow CR4: 0000000000360ef0 [<...>] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [<...>] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [<...>] ASID: 0003 [<...>] Stack: [<...>] ffffffff818a2315 00000000f5c856ee ffffffff896f1840 ffff888103ee2cb0 [<...>] ffff88812b6b9750 0000000079d714b6 fffffbfff1e9280b ffffffff8f49405f [<...>] 0000000000000001 0000000000000000 ffff888104457280 ffffffff8248b392 [<...>] Call Trace: [<...>] [<...>] [] ? lock_release+0x175/0x380 fffffe80416afaf0 [<...>] [] list_lru_del+0x152/0x740 fffffe80416afb48 [<...>] [] list_lru_del_obj+0x113/0x280 fffffe80416afb88 [<...>] [] ? _atomic_dec_and_lock+0x119/0x200 fffffe80416afb90 [<...>] [] iput_final+0x1c4/0x9a0 fffffe80416afbb8 [<...>] [] dentry_unlink_inode+0x44b/0xaa0 fffffe80416afbf8 [<...>] [] __dentry_kill+0x23c/0xf00 fffffe80416afc40 [<...>] [] ? __this_cpu_preempt_check+0x1f/0xa0 fffffe80416afc48 [<...>] [] ? shrink_dentry_list+0x1c5/0x760 fffffe80416afc70 [<...>] [] ? shrink_dentry_list+0x51/0x760 fffffe80416afc78 [<...>] [] shrink_dentry_list+0x288/0x760 fffffe80416afc80 [<...>] [] shrink_dcache_sb+0x155/0x420 fffffe80416afcc8 [<...>] [] ? debug_smp_processor_id+0x23/0xa0 fffffe80416afce0 [<...>] [] ? do_one_tree+0x140/0x140 fffffe80416afcf8 [<...>] [] ? do_remount+0x329/0xa00 fffffe80416afd18 [<...>] [] ? security_sb_remount+0x81/0x1c0 fffffe80416afd38 [<...>] [] reconfigure_super+0x856/0x14e0 fffffe80416afd70 [<...>] [] ? ns_capable_common+0xe7/0x2a0 fffffe80416afd90 [<...>] [] do_remount+0x416/0xa00 fffffe80416afdd0 [<...>] [] path_mount+0x5c4/0x900 fffffe80416afe28 [<...>] [] ? finish_automount+0x13a0/0x13a0 fffffe80416afe60 [<...>] [] ? user_path_at_empty+0xb2/0x140 fffffe80416afe88 [<...>] [] do_mount+0x115/0x1c0 fffffe80416afeb8 [<...>] [] ? path_mount+0x900/0x900 fffffe80416afed8 [<...>] [] ? __kasan_check_write+0x1c/0xa0 fffffe80416afee0 [<...>] [] __do_sys_mount+0x12f/0x280 fffffe80416aff30 [<...>] [] __x64_sys_mount+0xcd/0x2e0 fffffe80416aff70 [<...>] [] ? syscall_trace_enter+0x218/0x380 fffffe80416aff88 [<...>] [] x64_sys_call+0x5d5e/0x6720 fffffe80416affa8 [<...>] [] do_syscall_64+0xcd/0x3c0 fffffe80416affb8 [<...>] [] entry_SYSCALL_64_safe_stack+0x4c/0x87 fffffe80416affe8 [<...>] [<...>] [<...>] RIP: 0033:[<00006dcb382ff66a>] vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] [<...>] Code: 48 8b 0d 29 18 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f6 17 0d 00 f7 d8 64 89 01 48 [<...>] RSP: 002b:0000763d68192558 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [<...>] RAX: ffffffffffffffda RBX: 00006dcb38433264 RCX: 00006dcb382ff66a [<...>] RDX: 000017c3e0d11210 RSI: 000017c3e0d1a5a0 RDI: 000017c3e0d1ae70 [<...>] RBP: 000017c3e0d10fb0 R08: 000017c3e0d11260 R09: 00006dcb383d1be0 [<...>] R10: 000000000020002e R11: 0000000000000246 R12: 0000000000000000 [<...>] R13: 000017c3e0d1ae70 R14: 000017c3e0d11210 R15: 000017c3e0d10fb0 [<...>] RBX: vm_area_struct[mount 2550 2550 file 6dcb38433000-6dcb38434000 5b 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] RCX: vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] [<...>] RDX: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] RSI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] RDI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] RBP: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] RSP: vm_area_struct[mount 2550 2550 anon 763d68173000-763d68195000 7ffffffdd 100133(read|write|mayread|maywrite|growsdown|account)]+0x0/0xb8 [userland map] [<...>] R08: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] R09: vm_area_struct[mount 2550 2550 file 6dcb383d1000-6dcb383d3000 1cd 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] R13: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] R14: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] R15: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] [<...>] [<...>] Modules linked in: [<...>] ---[ end trace 0000000000000000 ]--- The list debug message as well as RBX's symbolic value point out that the object in question was allocated from 'tracefs_inode_cache' and that the list's '->next' member is at offset 0. Dumping the layout of the relevant parts of 'struct tracefs_inode' gives the following: struct tracefs_inode { union { struct inode { struct list_head { struct list_head * next; /* 0 8 */ struct list_head * prev; /* 8 8 */ } i_lru; [...] } vfs_inode; struct callback_head { void (*func)(struct callback_head *); /* 0 8 */ struct callback_head * next; /* 8 8 */ } rcu; }; [...] }; Above shows that 'vfs_inode.i_lru' overlaps with 'rcu' which will destroy the 'i_lru' list as soon as the 'rcu' member gets used, e.g. in call_rcu() or later when calling the RCU callback. This will disturb concurrent list traversals as well as object reuse which assumes these list heads will keep their integrity. For reproduction, the following diff manually overlays 'i_lru' with 'rcu' as, otherwise, one would require some good portion of luck for gambling an unlucky RANDSTRUCT seed: --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -629,6 +629,7 @@ struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; + struct list_head i_lru; /* inode LRU list */ kgid_t i_gid; unsigned int i_flags; @@ -690,7 +691,6 @@ struct inode { u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif - struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_wb_list; /* backing dev writeback list */ union { The tracefs inode does not need to supply its own RCU delayed destruction of its inode. The inode code itself offers both a "destroy_inode()" callback that gets called when the last reference of the inode is released, and the "free_inode()" which is called after a RCU synchronization period from the "destroy_inode()". The tracefs code can unlink the inode from its list in the destroy_inode() callback, and the simply free it from the free_inode() callback. This should provide the same protection. Link: https://lore.kernel.org/all/20240807115143.45927-3-minipli@grsecurity.net/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Ajay Kaher Cc: Ilkka =?utf-8?b?TmF1bGFww6TDpA==?= Link: https://lore.kernel.org/20240807185402.61410544@gandalf.local.home Fixes: baa23a8d4360 ("tracefs: Reset permissions on remount if permissions are options") Reported-by: Mathias Krause Reported-by: Brad Spengler Suggested-by: Al Viro Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 10 ++++------ fs/tracefs/internal.h | 5 +---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 21a7e51fc3c1..1748dff58c3b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) return &ti->vfs_inode; } -static void tracefs_free_inode_rcu(struct rcu_head *rcu) +static void tracefs_free_inode(struct inode *inode) { - struct tracefs_inode *ti; + struct tracefs_inode *ti = get_tracefs(inode); - ti = container_of(rcu, struct tracefs_inode, rcu); kmem_cache_free(tracefs_inode_cachep, ti); } -static void tracefs_free_inode(struct inode *inode) +static void tracefs_destroy_inode(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); unsigned long flags; @@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode) spin_lock_irqsave(&tracefs_inode_lock, flags); list_del_rcu(&ti->list); spin_unlock_irqrestore(&tracefs_inode_lock, flags); - - call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode) static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .show_options = tracefs_show_options, diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index f704d8348357..d83c2a25f288 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -10,10 +10,7 @@ enum { }; struct tracefs_inode { - union { - struct inode vfs_inode; - struct rcu_head rcu; - }; + struct inode vfs_inode; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ struct list_head list; unsigned long flags; From 6d496e02b4a70926c3bd4e7ab6249ff262eb3bc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 18:03:54 -0400 Subject: [PATCH 0640/1523] bcachefs: Add missing path_traverse() to btree_iter_next_node() This fixes a bug exposed by the next path - we pop an assert in path_set_should_be_locked(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b..aa8a049071f4 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ From cecf72798b25fcb00303392407fccf500a746747 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 13:58:57 -0400 Subject: [PATCH 0641/1523] bcachefs: Make allocator stuck timeout configurable, ratelimit messages Limit these messages to once every 2 minutes to avoid spamming logs; with multiple devices the output can be quite significant. Also, up the default timeout to 30 seconds from 10 seconds. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 26 ++++++++++++++++++++++++-- fs/bcachefs/alloc_foreground.h | 7 ++++++- fs/bcachefs/bcachefs.h | 2 ++ fs/bcachefs/bcachefs_format.h | 2 ++ fs/bcachefs/io_misc.c | 6 +----- fs/bcachefs/io_write.c | 5 +---- fs/bcachefs/opts.h | 5 +++++ fs/bcachefs/super-io.c | 4 ++++ 8 files changed, 45 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 8683fe4fae5b..02de5ad2be2c 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1758,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1792,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index c78a64ec0553..386d231ceca3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd..eedf2d6045e7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -893,6 +893,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddf..ad893684db52 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8..177ed331c00b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d97..1d4761d15002 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f..cda1725702ea 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc819832790..c8c2ccbdfbb5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { From 73dc1656f41a42849e43b945fe44d4e3d55eb6c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 16:40:14 -0400 Subject: [PATCH 0642/1523] bcachefs: Use bch2_wait_on_allocator() in btree node alloc path If the allocator gets stuck, we need to know why. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2..e61f9695771e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } From 30b651c8bc788c068a978dc760e9d5f824f7019e Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Wed, 24 Jul 2024 15:35:17 -0600 Subject: [PATCH 0643/1523] selftests: mm: add s390 to ARCH check commit 0518dbe97fe6 ("selftests/mm: fix cross compilation with LLVM") changed the env variable for the architecture from MACHINE to ARCH. This is preventing 3 required TEST_GEN_FILES from being included when cross compiling s390x and errors when trying to run the test suite. This is due to the ARCH variable already being set and the arch folder name being s390. Add "s390" to the filtered list to cover this case and have the 3 files included in the build. Link: https://lkml.kernel.org/r/20240724213517.23918-1-npache@redhat.com Fixes: 0518dbe97fe6 ("selftests/mm: fix cross compilation with LLVM") Signed-off-by: Nico Pache Cc: Mark Brown Cc: Albert Ou Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 901e0d07765b..7b8a5def54a1 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -110,7 +110,7 @@ endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs From 37bf7fbe1db27792b27345871aa5f8ae52cd396c Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Fri, 26 Jul 2024 09:20:09 +0200 Subject: [PATCH 0644/1523] MAINTAINERS: Update LTP members and web LTP project uses now readthedocs.org instance instead of GitHub wiki. LTP maintainers are listed in alphabetical order. Link: https://lkml.kernel.org/r/20240726072009.1021599-1-pvorel@suse.cz Signed-off-by: Petr Vorel Reviewed-by: Li Wang Reviewed-by: Cyril Hrubis Cc: Jan Stancek Cc: Xiao Yang Cc: Yang Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..31805b6e98a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13324,14 +13324,16 @@ F: Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt F: drivers/i2c/muxes/i2c-mux-ltc4306.c LTP (Linux Test Project) +M: Andrea Cervesato M: Cyril Hrubis M: Jan Stancek M: Petr Vorel M: Li Wang M: Yang Xu +M: Xiao Yang L: ltp@lists.linux.it (subscribers-only) S: Maintained -W: http://linux-test-project.github.io/ +W: https://linux-test-project.readthedocs.io/ T: git https://github.com/linux-test-project/ltp.git LTR390 AMBIENT/UV LIGHT SENSOR DRIVER From 7d4df2dad312f270d62fecb0e5c8b086c6d7dcfc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 29 Jul 2024 04:21:58 +0200 Subject: [PATCH 0645/1523] kcov: properly check for softirq context When collecting coverage from softirqs, KCOV uses in_serving_softirq() to check whether the code is running in the softirq context. Unfortunately, in_serving_softirq() is > 0 even when the code is running in the hardirq or NMI context for hardirqs and NMIs that happened during a softirq. As a result, if a softirq handler contains a remote coverage collection section and a hardirq with another remote coverage collection section happens during handling the softirq, KCOV incorrectly detects a nested softirq coverate collection section and prints a WARNING, as reported by syzbot. This issue was exposed by commit a7f3813e589f ("usb: gadget: dummy_hcd: Switch to hrtimer transfer scheduler"), which switched dummy_hcd to using hrtimer and made the timer's callback be executed in the hardirq context. Change the related checks in KCOV to account for this behavior of in_serving_softirq() and make KCOV ignore remote coverage collection sections in the hardirq and NMI contexts. This prevents the WARNING printed by syzbot but does not fix the inability of KCOV to collect coverage from the __usb_hcd_giveback_urb when dummy_hcd is in use (caused by a7f3813e589f); a separate patch is required for that. Link: https://lkml.kernel.org/r/20240729022158.92059-1-andrey.konovalov@linux.dev Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts") Signed-off-by: Andrey Konovalov Reported-by: syzbot+2388cdaeb6b10f0c13ac@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac Acked-by: Marco Elver Cc: Alan Stern Cc: Aleksandr Nogikh Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Marcello Sylvester Bauer Cc: Signed-off-by: Andrew Morton --- kernel/kcov.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index f0a69d402066..274b6b7c718d 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area, kmsan_unpoison_memory(&area->list, sizeof(area->list)); } +/* + * Unlike in_serving_softirq(), this function returns false when called during + * a hardirq or an NMI that happened in the softirq context. + */ +static inline bool in_softirq_really(void) +{ + return in_serving_softirq() && !in_hardirq() && !in_nmi(); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * so we ignore code executed in interrupts, unless we are in a remote * coverage collection section in a softirq. */ - if (!in_task() && !(in_serving_softirq() && t->kcov_softirq)) + if (!in_task() && !(in_softirq_really() && t->kcov_softirq)) return false; mode = READ_ONCE(t->kcov_mode); /* @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle) if (WARN_ON(!kcov_check_handle(handle, true, true, true))) return; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); @@ -991,7 +1000,7 @@ void kcov_remote_stop(void) int sequence; unsigned long flags; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); From 5161b48712dcd08ec427c450399d4d1483e21dea Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 18 Jul 2024 16:36:07 +0800 Subject: [PATCH 0646/1523] mm: list_lru: fix UAF for memory cgroup The mem_cgroup_from_slab_obj() is supposed to be called under rcu lock or cgroup_mutex or others which could prevent returned memcg from being freed. Fix it by adding missing rcu read lock. Found by code inspection. [songmuchun@bytedance.com: only grab rcu lock when necessary, per Vlastimil] Link: https://lkml.kernel.org/r/20240801024603.1865-1-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20240718083607.42068-1-songmuchun@bytedance.com Fixes: 0a97c01cd20b ("list_lru: allow explicit memcg and NUMA node selection") Signed-off-by: Muchun Song Acked-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Nhat Pham Cc: Signed-off-by: Andrew Morton --- mm/list_lru.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index a29d96929d7c..9b7ff06e9d32 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -85,6 +85,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) } #endif /* CONFIG_MEMCG */ +/* The caller must ensure the memcg lifetime. */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -109,14 +110,22 @@ EXPORT_SYMBOL_GPL(list_lru_add); bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_add(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_add(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_add_obj); +/* The caller must ensure the memcg lifetime. */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -139,11 +148,18 @@ EXPORT_SYMBOL_GPL(list_lru_del); bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_del(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_del(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_del_obj); From b66b1b71d7ff5464d23a0ac6f73fae461b7264fd Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 31 Jul 2024 13:46:19 +0800 Subject: [PATCH 0647/1523] mm: shmem: avoid allocating huge pages larger than MAX_PAGECACHE_ORDER for shmem Similar to commit d659b715e94ac ("mm/huge_memory: avoid PMD-size page cache if needed"), ARM64 can support 512MB PMD-sized THP when the base page size is 64KB, which is larger than the maximum supported page cache size MAX_PAGECACHE_ORDER. This is not expected. To fix this issue, use THP_ORDERS_ALL_FILE_DEFAULT for shmem to filter allowable huge orders. [baolin.wang@linux.alibaba.com: remove comment, per Barry] Link: https://lkml.kernel.org/r/c55d7ef7-78aa-4ed6-b897-c3e03a3f3ab7@linux.alibaba.com [wangkefeng.wang@huawei.com: remove local `orders'] Link: https://lkml.kernel.org/r/87769ae8-b6c6-4454-925d-1864364af9c8@huawei.com Link: https://lkml.kernel.org/r/117121665254442c3c7f585248296495e5e2b45c.1722404078.git.baolin.wang@linux.alibaba.com Fixes: e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem") Signed-off-by: Baolin Wang Signed-off-by: Kefeng Wang Reviewed-by: Barry Song Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: Gavin Shan Cc: Hugh Dickins Cc: Lance Yang Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/shmem.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 2faa9daaf54b..b5be73b04329 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1629,11 +1629,6 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma->vm_flags; - /* - * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that - * are enabled for this vma. - */ - unsigned long orders = BIT(PMD_ORDER + 1) - 1; loff_t i_size; int order; @@ -1678,7 +1673,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, if (global_huge) mask |= READ_ONCE(huge_shmem_orders_inherit); - return orders & mask; + return THP_ORDERS_ALL_FILE_DEFAULT & mask; } static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, From 4cbf320b1500fe64fcef8c96ed74dfc1ae2c9e2c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 31 Jul 2024 13:46:20 +0800 Subject: [PATCH 0648/1523] mm: shmem: fix incorrect aligned index when checking conflicts In the shmem_suitable_orders() function, xa_find() is used to check for conflicts in the pagecache to select suitable huge orders. However, when checking each huge order in every loop, the aligned index is calculated from the previous iteration, which may cause suitable huge orders to be missed. We should use the original index each time in the loop to calculate a new aligned index for checking conflicts to avoid this issue. Link: https://lkml.kernel.org/r/07433b0f16a152bffb8cee34934a5c040e8e2ad6.1722404078.git.baolin.wang@linux.alibaba.com Fixes: e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem") Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Cc: Barry Song <21cnbao@gmail.com> Cc: Gavin Shan Cc: Hugh Dickins Cc: Lance Yang Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Zi Yan Cc: Barry Song Cc: Kefeng Wang Signed-off-by: Andrew Morton --- mm/shmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b5be73b04329..5a77acf6ac6a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1681,6 +1681,7 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault unsigned long orders) { struct vm_area_struct *vma = vmf->vma; + pgoff_t aligned_index; unsigned long pages; int order; @@ -1692,9 +1693,9 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault order = highest_order(orders); while (orders) { pages = 1UL << order; - index = round_down(index, pages); - if (!xa_find(&mapping->i_pages, &index, - index + pages - 1, XA_PRESENT)) + aligned_index = round_down(index, pages); + if (!xa_find(&mapping->i_pages, &aligned_index, + aligned_index + pages - 1, XA_PRESENT)) break; order = next_order(&orders, order); } From 9972605a238339b85bd16b084eed5f18414d22db Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 2 Aug 2024 16:58:22 -0700 Subject: [PATCH 0649/1523] memcg: protect concurrent access to mem_cgroup_idr Commit 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") decoupled the memcg IDs from the CSS ID space to fix the cgroup creation failures. It introduced IDR to maintain the memcg ID space. The IDR depends on external synchronization mechanisms for modifications. For the mem_cgroup_idr, the idr_alloc() and idr_replace() happen within css callback and thus are protected through cgroup_mutex from concurrent modifications. However idr_remove() for mem_cgroup_idr was not protected against concurrency and can be run concurrently for different memcgs when they hit their refcnt to zero. Fix that. We have been seeing list_lru based kernel crashes at a low frequency in our fleet for a long time. These crashes were in different part of list_lru code including list_lru_add(), list_lru_del() and reparenting code. Upon further inspection, it looked like for a given object (dentry and inode), the super_block's list_lru didn't have list_lru_one for the memcg of that object. The initial suspicions were either the object is not allocated through kmem_cache_alloc_lru() or somehow memcg_list_lru_alloc() failed to allocate list_lru_one() for a memcg but returned success. No evidence were found for these cases. Looking more deeply, we started seeing situations where valid memcg's id is not present in mem_cgroup_idr and in some cases multiple valid memcgs have same id and mem_cgroup_idr is pointing to one of them. So, the most reasonable explanation is that these situations can happen due to race between multiple idr_remove() calls or race between idr_alloc()/idr_replace() and idr_remove(). These races are causing multiple memcgs to acquire the same ID and then offlining of one of them would cleanup list_lrus on the system for all of them. Later access from other memcgs to the list_lru cause crashes due to missing list_lru_one. Link: https://lkml.kernel.org/r/20240802235822.1830976-1-shakeel.butt@linux.dev Fixes: 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Shakeel Butt Acked-by: Muchun Song Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 960371788687..f29157288b7d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3386,11 +3386,28 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1) static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&memcg_idr_lock); + ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1, + GFP_NOWAIT); + spin_unlock(&memcg_idr_lock); + idr_preload_end(); + return ret; +} static void mem_cgroup_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { + spin_lock(&memcg_idr_lock); idr_remove(&mem_cgroup_idr, memcg->id.id); + spin_unlock(&memcg_idr_lock); + memcg->id.id = 0; } } @@ -3524,8 +3541,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) if (!memcg) return ERR_PTR(error); - memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, - 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL); + memcg->id.id = mem_cgroup_alloc_id(); if (memcg->id.id < 0) { error = memcg->id.id; goto fail; @@ -3667,7 +3683,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) * publish it here at the end of onlining. This matches the * regular ID destruction during offlining. */ + spin_lock(&memcg_idr_lock); idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + spin_unlock(&memcg_idr_lock); return 0; offline_kmem: From f2087995fb7165a88b50dde02f3909e448522e0a Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Sun, 4 Aug 2024 14:45:54 +0900 Subject: [PATCH 0650/1523] mailmap: update entry for David Heidelberg Link my old gmail address to my active email. Link: https://lkml.kernel.org/r/20240804054704.859503-1-david@ixit.cz Signed-off-by: David Heidelberg Cc: David S. Miller Cc: Jiri Kosina Cc: Manivannan Sadhasivam Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e51d76df75c2..8ee01d9d7046 100644 --- a/.mailmap +++ b/.mailmap @@ -166,6 +166,7 @@ Daniel Borkmann Daniel Borkmann David Brownell David Collins +David Heidelberg David Rheinsberg David Rheinsberg David Rheinsberg From 6d45e1c948a8b7ed6ceddb14319af69424db730c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 6 Aug 2024 13:46:47 -0400 Subject: [PATCH 0651/1523] padata: Fix possible divide-by-0 panic in padata_mt_helper() We are hit with a not easily reproducible divide-by-0 panic in padata.c at bootup time. [ 10.017908] Oops: divide error: 0000 1 PREEMPT SMP NOPTI [ 10.017908] CPU: 26 PID: 2627 Comm: kworker/u1666:1 Not tainted 6.10.0-15.el10.x86_64 #1 [ 10.017908] Hardware name: Lenovo ThinkSystem SR950 [7X12CTO1WW]/[7X12CTO1WW], BIOS [PSE140J-2.30] 07/20/2021 [ 10.017908] Workqueue: events_unbound padata_mt_helper [ 10.017908] RIP: 0010:padata_mt_helper+0x39/0xb0 : [ 10.017963] Call Trace: [ 10.017968] [ 10.018004] ? padata_mt_helper+0x39/0xb0 [ 10.018084] process_one_work+0x174/0x330 [ 10.018093] worker_thread+0x266/0x3a0 [ 10.018111] kthread+0xcf/0x100 [ 10.018124] ret_from_fork+0x31/0x50 [ 10.018138] ret_from_fork_asm+0x1a/0x30 [ 10.018147] Looking at the padata_mt_helper() function, the only way a divide-by-0 panic can happen is when ps->chunk_size is 0. The way that chunk_size is initialized in padata_do_multithreaded(), chunk_size can be 0 when the min_chunk in the passed-in padata_mt_job structure is 0. Fix this divide-by-0 panic by making sure that chunk_size will be at least 1 no matter what the input parameters are. Link: https://lkml.kernel.org/r/20240806174647.1050398-1-longman@redhat.com Fixes: 004ed42638f4 ("padata: add basic support for multithreaded jobs") Signed-off-by: Waiman Long Cc: Daniel Jordan Cc: Steffen Klassert Cc: Waiman Long Cc: Signed-off-by: Andrew Morton --- kernel/padata.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/padata.c b/kernel/padata.c index 53f4bc912712..0fa6c2895460 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -517,6 +517,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) if (job->numa_aware) { int old_node = atomic_read(&last_used_nid); From d27a835f41d947f62e6a95e89ba523299c9e6437 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 5 Aug 2024 12:38:56 +0800 Subject: [PATCH 0652/1523] net/smc: add the max value of fallback reason count The number of fallback reasons defined in the smc_clc.h file has reached 36. For historical reasons, some are no longer quoted, and there's 33 actually in use. So, add the max value of fallback reason count to 36. Fixes: 6ac1e6563f59 ("net/smc: support smc v2.x features validate") Fixes: 7f0620b9940b ("net/smc: support max connections per lgr negotiation") Fixes: 69b888e3bb4b ("net/smc: support max links per lgr negotiation in clc handshake") Signed-off-by: Zhengchao Shao Reviewed-by: Wenjia Zhang Reviewed-by: D. Wythe Link: https://patch.msgid.link/20240805043856.565677-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/smc/smc_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 9d32058db2b5..e19177ce4092 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -19,7 +19,7 @@ #include "smc_clc.h" -#define SMC_MAX_FBACK_RSN_CNT 30 +#define SMC_MAX_FBACK_RSN_CNT 36 enum { SMC_BUF_8K, From e3862093ee93fcfbdadcb7957f5f8974fffa806a Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Tue, 6 Aug 2024 10:13:27 +0900 Subject: [PATCH 0653/1523] net: dsa: bcm_sf2: Fix a possible memory leak in bcm_sf2_mdio_register() bcm_sf2_mdio_register() calls of_phy_find_device() and then phy_device_remove() in a loop to remove existing PHY devices. of_phy_find_device() eventually calls bus_find_device(), which calls get_device() on the returned struct device * to increment the refcount. The current implementation does not decrement the refcount, which causes memory leak. This commit adds the missing phy_device_free() call to decrement the refcount via put_device() to balance the refcount. Fixes: 771089c2a485 ("net: dsa: bcm_sf2: Ensure that MDIO diversion is used") Signed-off-by: Joe Hattori Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240806011327.3817861-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ed1e6560df25..0e663ec0c12a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -675,8 +675,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->user_mii_bus); From da03f5d1b2c319a2b74fe76edeadcd8fa5f44376 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 5 Aug 2024 22:37:42 -0700 Subject: [PATCH 0654/1523] bnxt_en : Fix memory out-of-bounds in bnxt_fill_hw_rss_tbl() A recent commit has modified the code in __bnxt_reserve_rings() to set the default RSS indirection table to default only when the number of RX rings is changing. While this works for newer firmware that requires RX ring reservations, it causes the regression on older firmware not requiring RX ring resrvations (BNXT_NEW_RM() returns false). With older firmware, RX ring reservations are not required and so hw_resc->resv_rx_rings is not always set to the proper value. The comparison: if (old_rx_rings != bp->hw_resc.resv_rx_rings) in __bnxt_reserve_rings() may be false even when the RX rings are changing. This will cause __bnxt_reserve_rings() to skip setting the default RSS indirection table to default to match the current number of RX rings. This may later cause bnxt_fill_hw_rss_tbl() to use an out-of-range index. We already have bnxt_check_rss_tbl_no_rmgr() to handle exactly this scenario. We just need to move it up in bnxt_need_reserve_rings() to be called unconditionally when using older firmware. Without the fix, if the TX rings are changing, we'll skip the bnxt_check_rss_tbl_no_rmgr() call and __bnxt_reserve_rings() may also skip the bnxt_set_dflt_rss_indir_tbl() call for the reason explained in the last paragraph. Without setting the default RSS indirection table to default, it causes the regression: BUG: KASAN: slab-out-of-bounds in __bnxt_hwrm_vnic_set_rss+0xb79/0xe40 Read of size 2 at addr ffff8881c5809618 by task ethtool/31525 Call Trace: __bnxt_hwrm_vnic_set_rss+0xb79/0xe40 bnxt_hwrm_vnic_rss_cfg_p5+0xf7/0x460 __bnxt_setup_vnic_p5+0x12e/0x270 __bnxt_open_nic+0x2262/0x2f30 bnxt_open_nic+0x5d/0xf0 ethnl_set_channels+0x5d4/0xb30 ethnl_default_set_doit+0x2f1/0x620 Reported-by: Breno Leitao Closes: https://lore.kernel.org/netdev/ZrC6jpghA3PWVWSB@gmail.com/ Fixes: 98ba1d931f61 ("bnxt_en: Fix RSS logic in __bnxt_reserve_rings()") Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Tested-by: Breno Leitao Link: https://patch.msgid.link/20240806053742.140304-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 23f74c6c88b9..e27e1082ee33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7591,19 +7591,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) int rx = bp->rx_nr_rings, stat; int vnic, grp = rx; - if (hw_resc->resv_tx_rings != bp->tx_nr_rings && - bp->hwrm_spec_code >= 0x10601) - return true; - /* Old firmware does not need RX ring reservations but we still * need to setup a default RSS map when needed. With new firmware * we go through RX ring reservations first and then set up the * RSS map for the successfully reserved RX rings when needed. */ - if (!BNXT_NEW_RM(bp)) { + if (!BNXT_NEW_RM(bp)) bnxt_check_rss_tbl_no_rmgr(bp); + + if (hw_resc->resv_tx_rings != bp->tx_nr_rings && + bp->hwrm_spec_code >= 0x10601) + return true; + + if (!BNXT_NEW_RM(bp)) return false; - } vnic = bnxt_get_total_vnics(bp, rx); From f01032a2ca099ec8d619aaa916c3762aa62495df Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 6 Aug 2024 15:09:20 -0700 Subject: [PATCH 0655/1523] idpf: fix memory leaks and crashes while performing a soft reset The second tagged commit introduced a UAF, as it removed restoring q_vector->vport pointers after reinitializating the structures. This is due to that all queue allocation functions are performed here with the new temporary vport structure and those functions rewrite the backpointers to the vport. Then, this new struct is freed and the pointers start leading to nowhere. But generally speaking, the current logic is very fragile. It claims to be more reliable when the system is low on memory, but in fact, it consumes two times more memory as at the moment of running this function, there are two vports allocated with their queues and vectors. Moreover, it claims to prevent the driver from running into "bad state", but in fact, any error during the rebuild leaves the old vport in the partially allocated state. Finally, if the interface is down when the function is called, it always allocates a new queue set, but when the user decides to enable the interface later on, vport_open() allocates them once again, IOW there's a clear memory leak here. Just don't allocate a new queue set when performing a reset, that solves crashes and memory leaks. Readd the old queue number and reopen the interface on rollback - that solves limbo states when the device is left disabled and/or without HW queues enabled. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Fixes: e4891e4687c8 ("idpf: split &idpf_queue into 4 strictly-typed queue structures") Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5dbf2b4ba1b0..10b884dd3475 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1335,9 +1335,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1350,11 +1349,9 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } + err = idpf_vport_queues_alloc(vport); + if (err) + return err; err = idpf_vport_intr_alloc(vport); if (err) { @@ -1539,7 +1536,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1898,9 +1895,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1932,17 +1926,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2171,7 +2171,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); From 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Tue, 6 Aug 2024 15:09:21 -0700 Subject: [PATCH 0656/1523] idpf: fix memleak in vport interrupt configuration The initialization of vport interrupt consists of two functions: 1) idpf_vport_intr_init() where a generic configuration is done 2) idpf_vport_intr_req_irq() where the irq for each q_vector is requested. The first function used to create a base name for each interrupt using "kasprintf()" call. Unfortunately, although that call allocated memory for a text buffer, that memory was never released. Fix this by removing creating the interrupt base name in 1). Instead, always create a full interrupt name in the function 2), because there is no need to create a base name separately, considering that the function 2) is never called out of idpf_vport_intr_init() context. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Cc: stable@vger.kernel.org # 6.7 Signed-off-by: Michal Kubiak Reviewed-by: Pavan Kumar Linga Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index af2879f03b8d..a2f9f252694a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3780,13 +3780,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; @@ -3804,8 +3806,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name, - vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); @@ -4326,7 +4328,6 @@ error: */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4340,11 +4341,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; From 290f1c033281c1a502a3cd1c53c3a549259c491f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 6 Aug 2024 15:09:22 -0700 Subject: [PATCH 0657/1523] idpf: fix UAFs when destroying the queues The second tagged commit started sometimes (very rarely, but possible) throwing WARNs from net/core/page_pool.c:page_pool_disable_direct_recycling(). Turned out idpf frees interrupt vectors with embedded NAPIs *before* freeing the queues making page_pools' NAPI pointers lead to freed memory before these pools are destroyed by libeth. It's not clear whether there are other accesses to the freed vectors when destroying the queues, but anyway, we usually free queue/interrupt vectors only when the queues are destroyed and the NAPIs are guaranteed to not be referenced anywhere. Invert the allocation and freeing logic making queue/interrupt vectors be allocated first and freed last. Vectors don't require queues to be present, so this is safe. Additionally, this change allows to remove that useless queue->q_vector pointer cleanup, as vectors are still valid when freeing the queues (+ both are freed within one function, so it's not clear why nullify the pointers at all). Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Fixes: 90912f9f4f2d ("idpf: convert header split mode to libeth + napi_build_skb()") Reported-by: Michal Kubiak Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 24 ++++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 24 +-------------------- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 10b884dd3475..0b6c8fd5bc90 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -900,8 +900,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1349,43 +1349,43 @@ static int idpf_vport_open(struct idpf_vport *vport) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1452,10 +1452,10 @@ unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index a2f9f252694a..585c3dadd9bf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3576,9 +3576,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->complq); @@ -3593,26 +3591,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) free_cpumask_var(q_vector->affinity_mask); } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } From 74b0666f97f9455bc799405b7874df62fcb66bae Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 6 Aug 2024 13:35:33 +0200 Subject: [PATCH 0658/1523] i2c: testunit: match HostNotify test name with docs Ensure the test has the same name in the code as it has in the docs. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-slave-testunit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 4e03b75f9ad7..4c550306f3ec 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -18,7 +18,7 @@ enum testunit_cmds { TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ - TU_CMD_HOST_NOTIFY, + TU_CMD_SMBUS_HOST_NOTIFY, TU_CMD_SMBUS_BLOCK_PROC_CALL, TU_NUM_CMDS }; @@ -60,7 +60,7 @@ static void i2c_slave_testunit_work(struct work_struct *work) msg.len = tu->regs[TU_REG_DATAH]; break; - case TU_CMD_HOST_NOTIFY: + case TU_CMD_SMBUS_HOST_NOTIFY: msg.addr = 0x08; msg.flags = 0; msg.len = 3; From 03898691d42e0170e7d00f07cbe21ce0e9f3a8fa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 8 Aug 2024 10:18:01 +0200 Subject: [PATCH 0659/1523] ALSA: usb-audio: Re-add ScratchAmp quirk entries At the code refactoring of USB-audio quirk handling, I assumed that the quirk entries of Stanton ScratchAmp devices were only about the device name, and moved them completely into the rename table. But it seems that the device requires the quirk entry so that it's probed by the driver itself. This re-adds back the quirk entries of ScratchAmp, but in a minimalistic manner. Fixes: 5436f59bc5bc ("ALSA: usb-audio: Move device rename and profile quirks to an internal table") Link: https://patch.msgid.link/20240808081803.22300-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 73abc38a5400..f13a8d63a019 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2594,6 +2594,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Stanton ScratchAmp */ +{ USB_DEVICE(0x103d, 0x0100) }, +{ USB_DEVICE(0x103d, 0x0101) }, + /* Novation EMS devices */ { USB_DEVICE_VENDOR_SPEC(0x1235, 0x0001), From cecab0bb737fb74813733f2472761fc3a93857b1 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 7 Aug 2024 11:45:53 +0100 Subject: [PATCH 0660/1523] drm/i915/gem: Improve pfn calculation readability in vm_fault_gtt() By moving the pfn calculation to the set_address_limits() function we improve code readability. This way, set_address_limits() is responsible for calculating all memory mapping paramenters: "start", "end" and "pfn". This suggestion from Jonathan was made during the review of commit 8bdd9ef7e9b1 ("drm/i915/gem: Fix Virtual Memory mapping boundaries calculation"), which I liked, but it got lost on the way. Suggested-by: Jonathan Cavitt Signed-off-by: Andi Shyti Reviewed-by: Krzysztof Niemiec Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240807104553.481763-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index cac6d4184506..e9b2424156f0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -293,8 +293,10 @@ out: static void set_address_limits(struct vm_area_struct *area, struct i915_vma *vma, unsigned long obj_offset, + resource_size_t gmadr_start, unsigned long *start_vaddr, - unsigned long *end_vaddr) + unsigned long *end_vaddr, + unsigned long *pfn) { unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ long start, end; /* memory boundaries */ @@ -323,6 +325,10 @@ static void set_address_limits(struct vm_area_struct *area, /* Let's move back into the "<< PAGE_SHIFT" domain */ *start_vaddr = (unsigned long)start << PAGE_SHIFT; *end_vaddr = (unsigned long)end << PAGE_SHIFT; + + *pfn = (gmadr_start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + *pfn += (*start_vaddr - area->vm_start) >> PAGE_SHIFT; + *pfn += obj_offset - vma->gtt_view.partial.offset; } static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) @@ -441,11 +447,13 @@ retry: if (ret) goto err_unpin; - set_address_limits(area, vma, obj_offset, &start, &end); - - pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; - pfn += (start - area->vm_start) >> PAGE_SHIFT; - pfn += obj_offset - vma->gtt_view.partial.offset; + /* + * Dump all the necessary parameters in this function to perform the + * arithmetic calculation for the virtual address start and end and + * the PFN (Page Frame Number). + */ + set_address_limits(area, vma, obj_offset, ggtt->gmadr.start, + &start, &end, &pfn); /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); From 06ce0af34177a110d6a5cf71f924965b9b230691 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 29 May 2024 00:11:23 +0100 Subject: [PATCH 0661/1523] soc: fsl: qbman: remove unused struct 'cgr_comp' 'cgr_comp' has been unused since commit 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Sean Anderson Signed-off-by: Michael Ellerman Link: https://msgid.link/20240528231123.136664-1-linux@treblig.org --- drivers/soc/fsl/qbman/qman.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad2..4dc8aba33d9b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); From b7b930d104c38a545d862896759863d649be8252 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 7 Aug 2024 14:05:15 +0100 Subject: [PATCH 0662/1523] drm/i915: Replace double blank with single blank after comma in gem/ and gt/ Do not use double blanks, ", " in function parameters where it's not required by any alignment purpose. Replase it with a single blank, ", ". Signed-off-by: Andi Shyti Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240807130516.491053-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 2 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/selftests/intel_memory_region.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 1495b6074492..68413c05c812 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -535,7 +535,7 @@ struct drm_i915_gem_object { * I915_CACHE_NONE. The only exception is userptr objects, where we * instead force I915_CACHE_LLC, but we also don't allow userspace to * ever change the @cache_level for such objects. Another special case - * is dma-buf, which doesn't rely on @cache_dirty, but there we + * is dma-buf, which doesn't rely on @cache_dirty, but there we * always do a forced flush when acquiring the pages, if there is a * chance that the pages can be read directly from main memory with * the GPU. diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3eff364ccf3a..ca460cee4f8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate, if (vaddr[x] != val) { pr_err("%ps failed, (%u != %u), offset: %zu\n", - fn, vaddr[x], val, x * sizeof(u32)); + fn, vaddr[x], val, x * sizeof(u32)); igt_hexdump(vaddr + i * 1024, 4096); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 7a63abf8f644..5b8080ec5315 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -99,7 +99,7 @@ static void __confirm_options(struct intel_uc *uc) } if (!intel_uc_supports_guc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", + gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ae6070b5bf07..f08f6674911e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -517,7 +517,7 @@ static int igt_mock_max_segment(void *arg) if (!IS_ALIGNED(daddr, ps)) { pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", - __func__, &daddr, ps); + __func__, &daddr, ps); err = -EINVAL; goto out_close; } From 6600c55ba0d46c6f2f74eaa6ddfe2b66cb8df852 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 7 Aug 2024 14:05:16 +0100 Subject: [PATCH 0663/1523] drm/i915: Replace double blank with single blank after comma Do not use double blanks, ", " in function parameters where it's not required by any alignment purpose. Replase it with a single blank, ", ". Signed-off-by: Andi Shyti Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240807130516.491053-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/display/i9xx_wm.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 2 +- drivers/gpu/drm/i915/gvt/trace.h | 2 +- drivers/gpu/drm/i915/i915_driver.c | 6 +++--- drivers/gpu/drm/i915/i915_sysfs.c | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 2b7c3d270b17..15cda57fbc91 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -4028,7 +4028,7 @@ void i9xx_wm_init(struct drm_i915_private *dev_priv) dev_priv->display.funcs.wm = &g4x_wm_funcs; } else if (IS_PINEVIEW(dev_priv)) { if (!pnv_get_cxsr_latency(dev_priv)) { - drm_info(&dev_priv->drm, "Unknown FSB/MEM, disabling CxSR\n"); + drm_info(&dev_priv->drm, "Unknown FSB/MEM, disabling CxSR\n"); /* Disable CxSR and never update its watermark again */ intel_set_memory_cxsr(dev_priv, false); dev_priv->display.funcs.wm = &nop_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 11ee4406dce8..85c3f806c821 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4973,7 +4973,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, } else { drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", port_name(port), phy_name(phy)); + "DDI %c/PHY %c", port_name(port), phy_name(phy)); } intel_encoder_link_check_init(encoder, intel_ddi_link_check); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2755ebbbb9d2..3d695688b6b5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5111,7 +5111,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (current_config->name != pipe_config->name) { \ BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \ __stringify(name) " is not bool"); \ - pipe_config_mismatch(&p, fastset, crtc, __stringify(name), \ + pipe_config_mismatch(&p, fastset, crtc, __stringify(name), \ "(expected %s, found %s)", \ str_yes_no(current_config->name), \ str_yes_no(pipe_config->name)); \ diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 9c5a3f5beda2..a0a3b3e180f4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -493,7 +493,7 @@ static void crtc_updates_info(struct seq_file *m, seq_printf(m, "%sMax update: %lluns\n", hdr, crtc->debug.vbl.max); seq_printf(m, "%sAverage update: %lluns\n", - hdr, div64_u64(crtc->debug.vbl.sum, count)); + hdr, div64_u64(crtc->debug.vbl.sum, count)); seq_printf(m, "%sOverruns > %uus: %u\n", hdr, VBLANK_EVASION_TIME_US, crtc->debug.vbl.over); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 47f51a5ab493..ec357d2670f1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -150,7 +150,7 @@ int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, ssize_t ret; u8 bcaps; - ret = intel_dp_hdcp_read_bcaps(&dig_port->dp.aux, i915, &bcaps); + ret = intel_dp_hdcp_read_bcaps(&dig_port->dp.aux, i915, &bcaps); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 020f1aa28322..63874d385c6f 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -227,7 +227,7 @@ TRACE_EVENT(oos_sync, #define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, - u32 cmd_len, u32 buf_type, u32 buf_addr_type, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, void *workload, const char *cmd_name), TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index fb8e9c2fcea5..202b0e494256 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1693,9 +1693,9 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, drm_noop, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 613decd47760..8775beab9cb8 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -191,8 +191,8 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) i915_gpu_error_sysfs_teardown(dev_priv); - device_remove_bin_file(kdev, &dpf_attrs_1); - device_remove_bin_file(kdev, &dpf_attrs); + device_remove_bin_file(kdev, &dpf_attrs_1); + device_remove_bin_file(kdev, &dpf_attrs); kobject_put(dev_priv->sysfs_gt); } From 78296429e20052b029211b0aca64aadc5052d581 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 30 Jul 2024 19:53:16 +0530 Subject: [PATCH 0664/1523] platform/x86/amd/pmf: Fix to Update HPD Data When ALS is Disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the Ambient Light Sensor (ALS) is disabled, the current code in the PMF driver does not query for Human Presence Detection (HPD) data in amd_pmf_get_sensor_info(). As a result, stale HPD data is used by PMF-TA to evaluate policy conditions, leading to unexpected behavior in the policy output actions. To resolve this issue, modify the PMF driver to query HPD data independently of ALS. Since user_present is a boolean, modify the current code to return true if the user is present and false if the user is away or if the sensor is not detected, and report this status to the PMF TA firmware accordingly. With this change, amd_pmf_get_sensor_info() now returns void instead of int. Fixes: cedecdba60f4 ("platform/x86/amd/pmf: Get ambient light information from AMD SFH driver") Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20240730142316.3846259-1-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/spc.c | 32 ++++++++++-------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a3dec14c3004..3c153fb1425e 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -150,36 +150,26 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } -static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +static void amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { struct amd_sfh_info sfh_info; - int ret; + + /* Get the latest information from SFH */ + in->ev_info.user_present = false; /* Get ALS data */ - ret = amd_get_sfh_info(&sfh_info, MT_ALS); - if (!ret) + if (!amd_get_sfh_info(&sfh_info, MT_ALS)) in->ev_info.ambient_light = sfh_info.ambient_light; else - return ret; + dev_dbg(dev->dev, "ALS is not enabled/detected\n"); /* get HPD data */ - ret = amd_get_sfh_info(&sfh_info, MT_HPD); - if (ret) - return ret; - - switch (sfh_info.user_present) { - case SFH_NOT_DETECTED: - in->ev_info.user_present = 0xff; /* assume no sensors connected */ - break; - case SFH_USER_PRESENT: - in->ev_info.user_present = 1; - break; - case SFH_USER_AWAY: - in->ev_info.user_present = 0; - break; + if (!amd_get_sfh_info(&sfh_info, MT_HPD)) { + if (sfh_info.user_present == SFH_USER_PRESENT) + in->ev_info.user_present = true; + } else { + dev_dbg(dev->dev, "HPD is not enabled/detected\n"); } - - return 0; } void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) From 613e3900c24bb1379d994f44d75d31c3223cc263 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 25 Jul 2024 11:21:07 +0200 Subject: [PATCH 0665/1523] platform/x86: ideapad-laptop: introduce a generic notification chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several cases where a notification chain can simplify Lenovo WMI drivers. Add a generic notification chain into ideapad-laptop. Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/c5a43efae8a32bd034c3d19c0a686941347575a7.1721898747.git.soyer@irl.hu Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 37 +++++++++++++++++++++++++++ drivers/platform/x86/ideapad-laptop.h | 5 ++++ 2 files changed, 42 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1ace711f7442..866b32bfe2c9 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1592,6 +1592,39 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ priv->r_touchpad_val = value; } +static int ideapad_laptop_nb_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + switch (action) { + } + + return 0; +} + +static struct notifier_block ideapad_laptop_notifier = { + .notifier_call = ideapad_laptop_nb_notify, +}; + +static BLOCKING_NOTIFIER_HEAD(ideapad_laptop_chain_head); + +int ideapad_laptop_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_register_notifier, IDEAPAD_LAPTOP); + +int ideapad_laptop_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_unregister_notifier, IDEAPAD_LAPTOP); + +void ideapad_laptop_call_notifier(unsigned long action, void *data) +{ + blocking_notifier_call_chain(&ideapad_laptop_chain_head, action, data); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_call_notifier, IDEAPAD_LAPTOP); + static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) { struct ideapad_private *priv = data; @@ -1974,6 +2007,8 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (err) goto shared_init_failed; + ideapad_laptop_register_notifier(&ideapad_laptop_notifier); + return 0; shared_init_failed: @@ -2006,6 +2041,8 @@ static void ideapad_acpi_remove(struct platform_device *pdev) struct ideapad_private *priv = dev_get_drvdata(&pdev->dev); int i; + ideapad_laptop_unregister_notifier(&ideapad_laptop_notifier); + ideapad_shared_exit(priv); acpi_remove_notify_handler(priv->adev->handle, diff --git a/drivers/platform/x86/ideapad-laptop.h b/drivers/platform/x86/ideapad-laptop.h index 4498a96de597..3eb0dcd6bf7b 100644 --- a/drivers/platform/x86/ideapad-laptop.h +++ b/drivers/platform/x86/ideapad-laptop.h @@ -12,6 +12,11 @@ #include #include #include +#include + +int ideapad_laptop_register_notifier(struct notifier_block *nb); +int ideapad_laptop_unregister_notifier(struct notifier_block *nb); +void ideapad_laptop_call_notifier(unsigned long action, void *data); enum { VPCCMD_R_VPC1 = 0x10, From cde7886b35176d56e72bfc68dc104fa08e7b072c Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 25 Jul 2024 11:21:08 +0200 Subject: [PATCH 0666/1523] platform/x86: ideapad-laptop: move ymc_trigger_ec from lenovo-ymc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some models need to trigger the EC after each YMC event for the yoga mode control to work properly. EC triggering consist of a VPC call from the lenovo-ymc module. Except for this, all VPC calls are in the ideapad-laptop module. Since ideapad-laptop has a notification chain, a new YMC_EVENT action can be added and triggered from the lenovo-ymc module. Then the ideapad-laptop can trigger the EC. If the triggering is in the ideapad-laptop module, then the ec_trigger module parameter should be there as well. Move the ymc_trigger_ec functionality and the ec_trigger module parameter to the ideapad-laptop module. Signed-off-by: Gergo Koteles Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/d980ab3ac32b5e554f456b0ff17279bfdbe2a203.1721898747.git.soyer@irl.hu Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/Kconfig | 1 + drivers/platform/x86/ideapad-laptop.c | 49 ++++++++++++++++++++++ drivers/platform/x86/ideapad-laptop.h | 4 ++ drivers/platform/x86/lenovo-ymc.c | 60 +-------------------------- 4 files changed, 56 insertions(+), 58 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 665fa9524986..ddfccc226751 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -477,6 +477,7 @@ config LENOVO_YMC tristate "Lenovo Yoga Tablet Mode Control" depends on ACPI_WMI depends on INPUT + depends on IDEAPAD_LAPTOP select INPUT_SPARSEKMAP help This driver maps the Tablet Mode Control switch to SW_TABLET_MODE input diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 866b32bfe2c9..9fc1bb990e47 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -146,6 +146,7 @@ struct ideapad_private { bool touchpad_ctrl_via_ec : 1; bool ctrl_ps2_aux_port : 1; bool usb_charging : 1; + bool ymc_ec_trigger : 1; } features; struct { bool initialized; @@ -194,6 +195,12 @@ MODULE_PARM_DESC(touchpad_ctrl_via_ec, "Enable registering a 'touchpad' sysfs-attribute which can be used to manually " "tell the EC to enable/disable the touchpad. This may not work on all models."); +static bool ymc_ec_trigger __read_mostly; +module_param(ymc_ec_trigger, bool, 0444); +MODULE_PARM_DESC(ymc_ec_trigger, + "Enable EC triggering work-around to force emitting tablet mode events. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + /* * shared data */ @@ -1592,10 +1599,50 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ priv->r_touchpad_val = value; } +static const struct dmi_system_id ymc_ec_trigger_quirk_dmi_table[] = { + { + /* Lenovo Yoga 7 14ARB7 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + }, + }, + { + /* Lenovo Yoga 7 14ACN6 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), + }, + }, + { } +}; + +static void ideapad_laptop_trigger_ec(void) +{ + struct ideapad_private *priv; + int ret; + + guard(mutex)(&ideapad_shared_mutex); + + priv = ideapad_shared; + if (!priv) + return; + + if (!priv->features.ymc_ec_trigger) + return; + + ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); + if (ret) + dev_warn(&priv->platform_device->dev, "Could not write YMC: %d\n", ret); +} + static int ideapad_laptop_nb_notify(struct notifier_block *nb, unsigned long action, void *data) { switch (action) { + case IDEAPAD_LAPTOP_YMC_EVENT: + ideapad_laptop_trigger_ec(); + break; } return 0; @@ -1761,6 +1808,8 @@ static void ideapad_check_features(struct ideapad_private *priv) priv->features.ctrl_ps2_aux_port = ctrl_ps2_aux_port || dmi_check_system(ctrl_ps2_aux_port_list); priv->features.touchpad_ctrl_via_ec = touchpad_ctrl_via_ec; + priv->features.ymc_ec_trigger = + ymc_ec_trigger || dmi_check_system(ymc_ec_trigger_quirk_dmi_table); if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) priv->features.fan_mode = true; diff --git a/drivers/platform/x86/ideapad-laptop.h b/drivers/platform/x86/ideapad-laptop.h index 3eb0dcd6bf7b..948cc61800a9 100644 --- a/drivers/platform/x86/ideapad-laptop.h +++ b/drivers/platform/x86/ideapad-laptop.h @@ -14,6 +14,10 @@ #include #include +enum ideapad_laptop_notifier_actions { + IDEAPAD_LAPTOP_YMC_EVENT, +}; + int ideapad_laptop_register_notifier(struct notifier_block *nb); int ideapad_laptop_unregister_notifier(struct notifier_block *nb); void ideapad_laptop_call_notifier(unsigned long action, void *data); diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c index e1fbc35504d4..e0bbd6a14a89 100644 --- a/drivers/platform/x86/lenovo-ymc.c +++ b/drivers/platform/x86/lenovo-ymc.c @@ -20,32 +20,10 @@ #define LENOVO_YMC_QUERY_INSTANCE 0 #define LENOVO_YMC_QUERY_METHOD 0x01 -static bool ec_trigger __read_mostly; -module_param(ec_trigger, bool, 0444); -MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events"); - static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type"); -static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { - { - /* Lenovo Yoga 7 14ARB7 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), - }, - }, - { - /* Lenovo Yoga 7 14ACN6 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), - }, - }, - { } -}; - static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { { .matches = { @@ -62,21 +40,8 @@ static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { struct lenovo_ymc_private { struct input_dev *input_dev; - struct acpi_device *ec_acpi_dev; }; -static void lenovo_ymc_trigger_ec(struct wmi_device *wdev, struct lenovo_ymc_private *priv) -{ - int err; - - if (!priv->ec_acpi_dev) - return; - - err = write_ec_cmd(priv->ec_acpi_dev->handle, VPCCMD_W_YMC, 1); - if (err) - dev_warn(&wdev->dev, "Could not write YMC: %d\n", err); -} - static const struct key_entry lenovo_ymc_keymap[] = { /* Laptop */ { KE_SW, 0x01, { .sw = { SW_TABLET_MODE, 0 } } }, @@ -125,11 +90,9 @@ static void lenovo_ymc_notify(struct wmi_device *wdev, union acpi_object *data) free_obj: kfree(obj); - lenovo_ymc_trigger_ec(wdev, priv); + ideapad_laptop_call_notifier(IDEAPAD_LAPTOP_YMC_EVENT, &code); } -static void acpi_dev_put_helper(void *p) { acpi_dev_put(p); } - static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) { struct lenovo_ymc_private *priv; @@ -143,29 +106,10 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) return -ENODEV; } - ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table); - priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - if (ec_trigger) { - pr_debug("Lenovo YMC enable EC triggering.\n"); - priv->ec_acpi_dev = acpi_dev_get_first_match_dev("VPC2004", NULL, -1); - - if (!priv->ec_acpi_dev) { - dev_err(&wdev->dev, "Could not find EC ACPI device.\n"); - return -ENODEV; - } - err = devm_add_action_or_reset(&wdev->dev, - acpi_dev_put_helper, priv->ec_acpi_dev); - if (err) { - dev_err(&wdev->dev, - "Could not clean up EC ACPI device: %d\n", err); - return err; - } - } - input_dev = devm_input_allocate_device(&wdev->dev); if (!input_dev) return -ENOMEM; @@ -192,7 +136,6 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) dev_set_drvdata(&wdev->dev, priv); /* Report the state for the first time on probe */ - lenovo_ymc_trigger_ec(wdev, priv); lenovo_ymc_notify(wdev, NULL); return 0; } @@ -217,3 +160,4 @@ module_wmi_driver(lenovo_ymc_driver); MODULE_AUTHOR("Gergo Koteles "); MODULE_DESCRIPTION("Lenovo Yoga Mode Control driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IDEAPAD_LAPTOP); From 7cc06e729460a209b84d3db4db56c9f85f048cc2 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 25 Jul 2024 11:21:10 +0200 Subject: [PATCH 0667/1523] platform/x86: ideapad-laptop: add a mutex to synchronize VPC commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling VPC commands consists of several VPCW and VPCR ACPI calls. These calls and their results can get mixed up if they are called simultaneously from different threads, like acpi notify handler, sysfs, debugfs, notification chain. The commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs") made the race issues much worse than before it but some races were possible even before that commit. Add a mutex to synchronize VPC commands. Fixes: e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs") Fixes: e82882cdd241 ("platform/x86: Add driver for Yoga Tablet Mode switch") Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/f26782fa1194ad11ed5d9ba121a804e59b58b026.1721898747.git.soyer@irl.hu Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 64 ++++++++++++++++++++------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 9fc1bb990e47..98ec30fce9fd 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -126,6 +126,7 @@ struct ideapad_rfk_priv { struct ideapad_private { struct acpi_device *adev; + struct mutex vpc_mutex; /* protects the VPC calls */ struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct ideapad_rfk_priv rfk_priv[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; @@ -301,6 +302,8 @@ static int debugfs_status_show(struct seq_file *s, void *data) struct ideapad_private *priv = s->private; unsigned long value; + guard(mutex)(&priv->vpc_mutex); + if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL_MAX, &value)) seq_printf(s, "Backlight max: %lu\n", value); if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL, &value)) @@ -419,7 +422,8 @@ static ssize_t camera_power_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); if (err) return err; @@ -438,7 +442,8 @@ static ssize_t camera_power_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); if (err) return err; @@ -491,7 +496,8 @@ static ssize_t fan_mode_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); if (err) return err; @@ -513,7 +519,8 @@ static ssize_t fan_mode_store(struct device *dev, if (state > 4 || state == 3) return -EINVAL; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); if (err) return err; @@ -598,7 +605,8 @@ static ssize_t touchpad_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); if (err) return err; @@ -619,7 +627,8 @@ static ssize_t touchpad_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); if (err) return err; @@ -1012,6 +1021,8 @@ static int ideapad_rfk_set(void *data, bool blocked) struct ideapad_rfk_priv *priv = data; int opcode = ideapad_rfk_data[priv->dev].opcode; + guard(mutex)(&priv->priv->vpc_mutex); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } @@ -1025,6 +1036,8 @@ static void ideapad_sync_rfk_state(struct ideapad_private *priv) int i; if (priv->features.hw_rfkill_switch) { + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_RF, &hw_blocked)) return; hw_blocked = !hw_blocked; @@ -1198,8 +1211,9 @@ static void ideapad_input_novokey(struct ideapad_private *priv) { unsigned long long_pressed; - if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) + return; if (long_pressed) ideapad_input_report(priv, 17); @@ -1211,8 +1225,9 @@ static void ideapad_check_special_buttons(struct ideapad_private *priv) { unsigned long bit, value; - if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) + return; for_each_set_bit (bit, &value, 16) { switch (bit) { @@ -1245,6 +1260,8 @@ static int ideapad_backlight_get_brightness(struct backlight_device *blightdev) unsigned long now; int err; + guard(mutex)(&priv->vpc_mutex); + err = read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); if (err) return err; @@ -1257,6 +1274,8 @@ static int ideapad_backlight_update_status(struct backlight_device *blightdev) struct ideapad_private *priv = bl_get_data(blightdev); int err; + guard(mutex)(&priv->vpc_mutex); + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_BL, blightdev->props.brightness); if (err) @@ -1334,6 +1353,8 @@ static void ideapad_backlight_notify_power(struct ideapad_private *priv) if (!blightdev) return; + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_BL_POWER, &power)) return; @@ -1346,7 +1367,8 @@ static void ideapad_backlight_notify_brightness(struct ideapad_private *priv) /* if we control brightness via acpi video driver */ if (!priv->blightdev) - read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); + scoped_guard(mutex, &priv->vpc_mutex) + read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); else backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY); } @@ -1571,7 +1593,8 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ int ret; /* Without reading from EC touchpad LED doesn't switch state */ - ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); + scoped_guard(mutex, &priv->vpc_mutex) + ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); if (ret) return; @@ -1631,7 +1654,8 @@ static void ideapad_laptop_trigger_ec(void) if (!priv->features.ymc_ec_trigger) return; - ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); + scoped_guard(mutex, &priv->vpc_mutex) + ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); if (ret) dev_warn(&priv->platform_device->dev, "Could not write YMC: %d\n", ret); } @@ -1677,11 +1701,13 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) struct ideapad_private *priv = data; unsigned long vpc1, vpc2, bit; - if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) - return; + scoped_guard(mutex, &priv->vpc_mutex) { + if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) + return; - if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) - return; + if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) + return; + } vpc1 = (vpc2 << 8) | vpc1; @@ -1988,6 +2014,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->adev = adev; priv->platform_device = pdev; + err = devm_mutex_init(&pdev->dev, &priv->vpc_mutex); + if (err) + return err; + ideapad_check_features(priv); err = ideapad_sysfs_init(priv); From b635066c2e0fcf74428b0864c249c1dbcaa4b5bc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 6 Aug 2024 16:38:30 +0300 Subject: [PATCH 0668/1523] drm/i915: remove a few __i915_printk() uses __i915_printk() does nothing special for notice/info levels. Just use the regular drm_notice() and drm_info() calls. Reviewed-by: Andi Shyti Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/82857a0c04d3c11ca6758f05c13a3cec4f1a2f01.1722951405.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_utils.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 6f9e7b354b54..bee32222f0fd 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -54,8 +54,8 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint) { - __i915_printk(i915, KERN_NOTICE, "CI tainted:%#x by %pS\n", - taint, (void *)_RET_IP_); + drm_notice(&i915->drm, "CI tainted: %#x by %pS\n", + taint, __builtin_return_address(0)); /* Failures that occur during fault injection testing are expected */ if (!i915_error_injected()) @@ -74,9 +74,9 @@ int __i915_inject_probe_error(struct drm_i915_private *i915, int err, if (++i915_probe_fail_count < i915_modparams.inject_probe_failure) return 0; - __i915_printk(i915, KERN_INFO, - "Injecting failure %d at checkpoint %u [%s:%d]\n", - err, i915_modparams.inject_probe_failure, func, line); + drm_info(&i915->drm, "Injecting failure %d at checkpoint %u [%s:%d]\n", + err, i915_modparams.inject_probe_failure, func, line); + i915_modparams.inject_probe_failure = 0; return err; } From 94a438a7595bb5a7a2efb94de2dc818845254e4f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 6 Aug 2024 16:38:31 +0300 Subject: [PATCH 0669/1523] drm/i915: remove i915_report_error() i915_report_error() presently acts as a wrapper for __i915_printk(). In practice, it would be better to use drm level error reporting wherever possible, so replace all uses of i915_report_error() with the equivalent drm_err() call. These cases are not worth having a dedicated wrapper to also print bug reporting info. Replacing the calls leaves i915_report_error() with no users, so remove it. Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/19eab020c57c0fa45acacf4e4a8077e57cd4d561.1722951405.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_driver.c | 8 ++++---- drivers/gpu/drm/i915/i915_utils.h | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 202b0e494256..b518d29596e9 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -451,8 +451,8 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (HAS_PPGTT(dev_priv)) { if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_full_ppgtt(dev_priv)) { - i915_report_error(dev_priv, - "incompatible vGPU found, support for isolated ppGTT required\n"); + drm_err(&dev_priv->drm, + "incompatible vGPU found, support for isolated ppGTT required\n"); return -ENXIO; } } @@ -465,8 +465,8 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) */ if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_hwsp_emulation(dev_priv)) { - i915_report_error(dev_priv, - "old vGPU host found, support for HWSP emulation required\n"); + drm_err(&dev_priv->drm, + "old vGPU host found, support for HWSP emulation required\n"); return -ENXIO; } } diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 06ec6ceb61d5..feb078ae246f 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -49,9 +49,6 @@ void __printf(3, 4) __i915_printk(struct drm_i915_private *dev_priv, const char *level, const char *fmt, ...); -#define i915_report_error(dev_priv, fmt, ...) \ - __i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__) - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int __i915_inject_probe_error(struct drm_i915_private *i915, int err, From 372f244b01784b5ee233cdfd732b4c1929ddd71e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 6 Aug 2024 16:38:32 +0300 Subject: [PATCH 0670/1523] drm/i915: remove __i915_printk() With the previous cleanups, the last remaining user of __i915_printk() is i915_probe_error(). Switch that to use drm_dbg() and drm_err() instead, dropping the request to report bugs in the few remaining specific cases. It's not common for drivers to log bug filing requests to begin with, but these cases are in init, which is most likely to be tested in CI and least likely to be hit by end users anyway. Acked-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/be9baeab281f75999e96cc7ad1c06c6680494bc1.1722951405.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_utils.c | 41 ------------------------------- drivers/gpu/drm/i915/i915_utils.h | 13 +++++----- 2 files changed, 6 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index bee32222f0fd..b34a2d3d331d 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -11,47 +11,6 @@ #include "i915_reg.h" #include "i915_utils.h" -#define FDO_BUG_MSG "Please file a bug on drm/i915; see " FDO_BUG_URL " for details." - -void -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...) -{ - static bool shown_bug_once; - struct device *kdev = dev_priv->drm.dev; - bool is_error = level[1] <= KERN_ERR[1]; - bool is_debug = level[1] == KERN_DEBUG[1]; - struct va_format vaf; - va_list args; - - if (is_debug && !drm_debug_enabled(DRM_UT_DRIVER)) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (is_error) - dev_printk(level, kdev, "%pV", &vaf); - else - dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV", - __builtin_return_address(0), &vaf); - - va_end(args); - - if (is_error && !shown_bug_once) { - /* - * Ask the user to file a bug report for the error, except - * if they may have caused the bug by fiddling with unsafe - * module parameters. - */ - if (!test_taint(TAINT_USER)) - dev_notice(kdev, "%s", FDO_BUG_MSG); - shown_bug_once = true; - } -} - void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint) { drm_notice(&i915->drm, "CI tainted: %#x by %pS\n", diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index feb078ae246f..71bdc89bd621 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -45,10 +45,6 @@ struct timer_list; #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) -void __printf(3, 4) -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...); - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int __i915_inject_probe_error(struct drm_i915_private *i915, int err, @@ -66,9 +62,12 @@ bool i915_error_injected(void); #define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV) -#define i915_probe_error(i915, fmt, ...) \ - __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ - fmt, ##__VA_ARGS__) +#define i915_probe_error(i915, fmt, ...) ({ \ + if (i915_error_injected()) \ + drm_dbg(&(i915)->drm, fmt, ##__VA_ARGS__); \ + else \ + drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \ +}) #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ From 919f18f961c03d6694aa726c514184f2311a4614 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 7 Aug 2024 17:02:44 -0700 Subject: [PATCH 0671/1523] x86/mtrr: Check if fixed MTRRs exist before saving them MTRRs have an obsolete fixed variant for fine grained caching control of the 640K-1MB region that uses separate MSRs. This fixed variant has a separate capability bit in the MTRR capability MSR. So far all x86 CPUs which support MTRR have this separate bit set, so it went unnoticed that mtrr_save_state() does not check the capability bit before accessing the fixed MTRR MSRs. Though on a CPU that does not support the fixed MTRR capability this results in a #GP. The #GP itself is harmless because the RDMSR fault is handled gracefully, but results in a WARN_ON(). Add the missing capability check to prevent this. Fixes: 2b1f6278d77c ("[PATCH] x86: Save the MTRRs of the BSP before booting an AP") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240808000244.946864-1-ak@linux.intel.com --- arch/x86/kernel/cpu/mtrr/mtrr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 767bf1c71aad..2a2fc14955cd 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -609,7 +609,7 @@ void mtrr_save_state(void) { int first_cpu; - if (!mtrr_enabled()) + if (!mtrr_enabled() || !mtrr_state.have_fixed) return; first_cpu = cpumask_first(cpu_online_mask); From 85ba108a529d99c82e814eaf782a9443acf5eaed Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 6 Aug 2024 14:08:41 +0100 Subject: [PATCH 0672/1523] net: stmmac: dwmac4: fix PCS duplex mode decode dwmac4 was decoding the duplex mode from the GMAC_PHYIF_CONTROL_STATUS register incorrectly, using GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK (value 1) rather than GMAC_PHYIF_CTRLSTATUS_LNKMOD (bit 16). Fix this. Fixes: 70523e639bf8c ("drivers: net: stmmac: reworking the PCS code.") Reviewed-by: Andrew Halaney Reviewed-by: Andrew Lunn Reviewed-by: Serge Semin Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1sbJvd-001rGD-E3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 -- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d3c5306f1c41..93a78fd0737b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) #define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKMOD */ -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK 0x1 /* LNKSPEED */ #define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f98741d2607e..31c387cc5f26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -786,7 +786,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) else x->pcs_speed = SPEED_10; - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK); + x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, x->pcs_duplex ? "Full" : "Half"); From 86a41ea9fd79ddb6145cb8ebf5aeafceabca6f7d Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 6 Aug 2024 17:06:26 +0100 Subject: [PATCH 0673/1523] l2tp: fix lockdep splat When l2tp tunnels use a socket provided by userspace, we can hit lockdep splats like the below when data is transmitted through another (unrelated) userspace socket which then gets routed over l2tp. This issue was previously discussed here: https://lore.kernel.org/netdev/87sfialu2n.fsf@cloudflare.com/ The solution is to have lockdep treat socket locks of l2tp tunnel sockets separately than those of standard INET sockets. To do so, use a different lockdep subclass where lock nesting is possible. ============================================ WARNING: possible recursive locking detected 6.10.0+ #34 Not tainted -------------------------------------------- iperf3/771 is trying to acquire lock: ffff8881027601d8 (slock-AF_INET/1){+.-.}-{2:2}, at: l2tp_xmit_skb+0x243/0x9d0 but task is already holding lock: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(slock-AF_INET/1); lock(slock-AF_INET/1); *** DEADLOCK *** May be due to missing lock nesting notation 10 locks held by iperf3/771: #0: ffff888102650258 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendmsg+0x1a/0x40 #1: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0 #2: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130 #3: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x28b/0x9f0 #4: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0xf9/0x260 #5: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10 #6: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0 #7: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130 #8: ffffffff822ac1e0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0xcc/0x1450 #9: ffff888101f33258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock#2){+...}-{2:2}, at: __dev_queue_xmit+0x513/0x1450 stack backtrace: CPU: 2 UID: 0 PID: 771 Comm: iperf3 Not tainted 6.10.0+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x69/0xa0 dump_stack+0xc/0x20 __lock_acquire+0x135d/0x2600 ? srso_alias_return_thunk+0x5/0xfbef5 lock_acquire+0xc4/0x2a0 ? l2tp_xmit_skb+0x243/0x9d0 ? __skb_checksum+0xa3/0x540 _raw_spin_lock_nested+0x35/0x50 ? l2tp_xmit_skb+0x243/0x9d0 l2tp_xmit_skb+0x243/0x9d0 l2tp_eth_dev_xmit+0x3c/0xc0 dev_hard_start_xmit+0x11e/0x420 sch_direct_xmit+0xc3/0x640 __dev_queue_xmit+0x61c/0x1450 ? ip_finish_output2+0xf4c/0x1130 ip_finish_output2+0x6b6/0x1130 ? srso_alias_return_thunk+0x5/0xfbef5 ? __ip_finish_output+0x217/0x380 ? srso_alias_return_thunk+0x5/0xfbef5 __ip_finish_output+0x217/0x380 ip_output+0x99/0x120 __ip_queue_xmit+0xae4/0xbc0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? tcp_options_write.constprop.0+0xcb/0x3e0 ip_queue_xmit+0x34/0x40 __tcp_transmit_skb+0x1625/0x1890 __tcp_send_ack+0x1b8/0x340 tcp_send_ack+0x23/0x30 __tcp_ack_snd_check+0xa8/0x530 ? srso_alias_return_thunk+0x5/0xfbef5 tcp_rcv_established+0x412/0xd70 tcp_v4_do_rcv+0x299/0x420 tcp_v4_rcv+0x1991/0x1e10 ip_protocol_deliver_rcu+0x50/0x220 ip_local_deliver_finish+0x158/0x260 ip_local_deliver+0xc8/0xe0 ip_rcv+0xe5/0x1d0 ? __pfx_ip_rcv+0x10/0x10 __netif_receive_skb_one_core+0xce/0xe0 ? process_backlog+0x28b/0x9f0 __netif_receive_skb+0x34/0xd0 ? process_backlog+0x28b/0x9f0 process_backlog+0x2cb/0x9f0 __napi_poll.constprop.0+0x61/0x280 net_rx_action+0x332/0x670 ? srso_alias_return_thunk+0x5/0xfbef5 ? find_held_lock+0x2b/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 handle_softirqs+0xda/0x480 ? __dev_queue_xmit+0xa2c/0x1450 do_softirq+0xa1/0xd0 __local_bh_enable_ip+0xc8/0xe0 ? __dev_queue_xmit+0xa2c/0x1450 __dev_queue_xmit+0xa48/0x1450 ? ip_finish_output2+0xf4c/0x1130 ip_finish_output2+0x6b6/0x1130 ? srso_alias_return_thunk+0x5/0xfbef5 ? __ip_finish_output+0x217/0x380 ? srso_alias_return_thunk+0x5/0xfbef5 __ip_finish_output+0x217/0x380 ip_output+0x99/0x120 __ip_queue_xmit+0xae4/0xbc0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? tcp_options_write.constprop.0+0xcb/0x3e0 ip_queue_xmit+0x34/0x40 __tcp_transmit_skb+0x1625/0x1890 tcp_write_xmit+0x766/0x2fb0 ? __entry_text_end+0x102ba9/0x102bad ? srso_alias_return_thunk+0x5/0xfbef5 ? __might_fault+0x74/0xc0 ? srso_alias_return_thunk+0x5/0xfbef5 __tcp_push_pending_frames+0x56/0x190 tcp_push+0x117/0x310 tcp_sendmsg_locked+0x14c1/0x1740 tcp_sendmsg+0x28/0x40 inet_sendmsg+0x5d/0x90 sock_write_iter+0x242/0x2b0 vfs_write+0x68d/0x800 ? __pfx_sock_write_iter+0x10/0x10 ksys_write+0xc8/0xf0 __x64_sys_write+0x3d/0x50 x64_sys_call+0xfaf/0x1f50 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f4d143af992 Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 e9 01 cc ff ff 41 54 b8 02 00 00 0 RSP: 002b:00007ffd65032058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4d143af992 RDX: 0000000000000025 RSI: 00007f4d143f3bcc RDI: 0000000000000005 RBP: 00007f4d143f2b28 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4d143f3bcc R13: 0000000000000005 R14: 0000000000000000 R15: 00007ffd650323f0 Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Suggested-by: Eric Dumazet Reported-by: syzbot+6acef9e0a4d1f46c83d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6acef9e0a4d1f46c83d4 CC: gnault@redhat.com CC: cong.wang@bytedance.com Signed-off-by: James Chapman Signed-off-by: Tom Parkin Link: https://patch.msgid.link/20240806160626.1248317-1-jchapman@katalix.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084..2e86f520f799 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -86,6 +86,11 @@ /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -1124,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock_nested(sk); + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); + if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1176,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); return ret; } From 9ee09edc05f20422e7ced84b1f8a5d3359926ac8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Aug 2024 10:56:59 -0700 Subject: [PATCH 0674/1523] net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC, while others might be only supported by the PHY. Make sure that the .get_wol() returns the union of both rather than only that of the PHY if the PHY supports Wake-on-LAN. Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN") Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20240806175659.3232204-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1248792d7fd4..0715ea5bf13e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - if (dev->phydev) { + if (dev->phydev) phy_ethtool_get_wol(dev->phydev, wol); - if (wol->supported) - return; - } - if (!device_can_wakeup(kdev)) { - wol->supported = 0; - wol->wolopts = 0; + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) return; - } - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); From 8fee6d5ad5fa18c270eedb2a2cdf58dbadefb94b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Wed, 7 Aug 2024 10:09:56 +0200 Subject: [PATCH 0675/1523] net: fec: Stop PPS on driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPS was not stopped in `fec_ptp_stop()`, called when the adapter was removed. Consequentially, you couldn't safely reload the driver with the PPS signal on. Fixes: 32cba57ba74b ("net: fec: introduce fec_ptp_stop and use in probe fail path") Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/netdev/CAOMZO5BzcZR8PwKKwBssQq_wAGzVgf1ffwe_nhpQJjviTdxy-w@mail.gmail.com/T/#m01dcb810bfc451a492140f6797ca77443d0cb79f Signed-off-by: Csókás, Bence Reviewed-by: Andrew Lunn Reviewed-by: Frank Li Link: https://patch.msgid.link/20240807080956.2556602-1-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index e32f6724f568..2e4f3e1782a2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -775,6 +775,9 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); hrtimer_cancel(&fep->perout_timer); if (fep->ptp_clock) From a70b637db15b4de25af3c5946c4399144b3bc241 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Aug 2024 09:54:22 +0200 Subject: [PATCH 0676/1523] net: pse-pd: tps23881: include missing bitfield.h header Using FIELD_GET() fails in configurations that don't already include the header file indirectly: drivers/net/pse-pd/tps23881.c: In function 'tps23881_i2c_probe': drivers/net/pse-pd/tps23881.c:755:13: error: implicit declaration of function 'FIELD_GET' [-Wimplicit-function-declaration] 755 | if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { | ^~~~~~~~~ Fixes: 89108cb5c285 ("net: pse-pd: tps23881: Fix the device ID check") Signed-off-by: Arnd Bergmann Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20240807075455.2055224-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/tps23881.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index f90db758554b..2ea75686a319 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 Bootlin, Kory Maincent */ +#include #include #include #include From b54de55990b0467538c6bb33523b28816384958a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 7 Aug 2024 17:06:12 +0100 Subject: [PATCH 0677/1523] net: ethtool: fix off-by-one error in max RSS context IDs Both ethtool_ops.rxfh_max_context_id and the default value used when it's not specified are supposed to be exclusive maxima (the former is documented as such; the latter, U32_MAX, cannot be used as an ID since it equals ETH_RXFH_CONTEXT_ALLOC), but xa_alloc() expects an inclusive maximum. Subtract one from 'limit' to produce an inclusive maximum, and pass that to xa_alloc(). Increase bnxt's max by one to prevent a (very minor) regression, as BNXT_MAX_ETH_RSS_CTX is an inclusive max. This is safe since bnxt is not actually hard-limited; BNXT_MAX_ETH_RSS_CTX is just a leftover from old driver code that managed context IDs itself. Rename rxfh_max_context_id to rxfh_max_num_contexts to make its semantics (hopefully) more obvious. Fixes: 847a8ab18676 ("net: ethtool: let the core choose RSS context IDs") Signed-off-by: Edward Cree Link: https://patch.msgid.link/5a2d11a599aa5b0cc6141072c01accfb7758650c.1723045898.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- include/linux/ethtool.h | 10 +++++----- net/ethtool/ioctl.c | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ab8e3f197e7b..9dadc89378f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -5290,7 +5290,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, .cap_rss_ctx_supported = 1, - .rxfh_max_context_id = BNXT_MAX_ETH_RSS_CTX, + .rxfh_max_num_contexts = BNXT_MAX_ETH_RSS_CTX + 1, .rxfh_indir_space = BNXT_MAX_RSS_TABLE_ENTRIES_P5, .rxfh_priv_size = sizeof(struct bnxt_rss_ctx), .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17..989c94eddb2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 8ca13208d240..a8e276ecf723 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1449,12 +1449,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; u32 ctx_id; /* driver uses new API, core allocates ID */ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + XA_LIMIT(1, limit - 1), + GFP_KERNEL_ACCOUNT); if (ret < 0) { kfree(ctx); goto out; From 4d7c3c1aba3ca12fad2e90163b8d5153363f93e5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Aug 2024 20:33:52 +0300 Subject: [PATCH 0678/1523] ethtool: Fix context creation with no parameters The 'at least one change' requirement is not applicable for context creation, skip the check in such case. This allows a command such as 'ethtool -X eth0 context new' to work. The command works by mistake when using older versions of userspace ethtool due to an incompatibility issue where rxfh.input_xfrm is passed as zero (unset) instead of RXH_XFRM_NO_CHANGE as done with recent userspace. This patch does not try to solve the incompatibility issue. Link: https://lore.kernel.org/netdev/05ae8316-d3aa-4356-98c6-55ed4253c8a7@nvidia.com/ Fixes: 84a1d9c48200 ("net: ethtool: extend RXNFC API to support RSS spreading of filter matches") Reviewed-by: Dragos Tatulea Reviewed-by: Jianbo Liu Signed-off-by: Gal Pressman Reviewed-by: Edward Cree Link: https://patch.msgid.link/20240807173352.3501746-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index a8e276ecf723..e18823bf2330 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1369,14 +1369,17 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key, function - * or input transformation. - */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.key_size && rxfh.key_size != dev_key_size)) + return -EINVAL; + + /* Must request at least one change: indir size, hash key, function + * or input transformation. + * There's no need for any of it in case of context creation. + */ + if (!create && (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) From 0411f73c13afcf619d7aa7546edbc5710a871cae Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Wed, 7 Aug 2024 21:52:09 +0100 Subject: [PATCH 0679/1523] net: dsa: microchip: disable EEE for KSZ8567/KSZ9567/KSZ9896/KSZ9897. As noted in the device errata [1-8], EEE support is not fully operational in the KSZ8567, KSZ9477, KSZ9567, KSZ9896, and KSZ9897 devices, causing link drops when connected to another device that supports EEE. The patch series "net: add EEE support for KSZ9477 switch family" merged in commit 9b0bf4f77162 caused EEE support to be enabled in these devices. A fix for this regression for the KSZ9477 alone was merged in commit 08c6d8bae48c2. This patch extends this fix to the other affected devices. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ8567R-Errata-DS80000752.pdf [2] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ8567S-Errata-DS80000753.pdf [3] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9477S-Errata-DS80000754.pdf [4] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9567R-Errata-DS80000755.pdf [5] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9567S-Errata-DS80000756.pdf [6] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9896C-Errata-DS80000757.pdf [7] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9897R-Errata-DS80000758.pdf [8] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9897S-Errata-DS80000759.pdf Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ8567/KSZ9567/KSZ9896/KSZ9897 Link: https://lore.kernel.org/netdev/137ce1ee-0b68-4c96-a717-c8164b514eec@martin-whitaker.me.uk/ Signed-off-by: Martin Whitaker Acked-by: Arun Ramadoss Reviewed-by: Oleksij Rempel Reviewed-by: Lukasz Majewski Link: https://patch.msgid.link/20240807205209.21464-1-foss@martin-whitaker.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b120e66d5669..1491099528be 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2578,7 +2578,11 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; + case KSZ8567_CHIP_ID: case KSZ9477_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: /* KSZ9477 Errata DS80000754C * * Module 4: Energy Efficient Ethernet (EEE) feature select must @@ -2588,6 +2592,13 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) * controls. If not disabled, the PHY ports can auto-negotiate * to enable EEE, and this feature can cause link drops when * linked to another device supporting EEE. + * + * The same item appears in the errata for the KSZ9567, KSZ9896, + * and KSZ9897. + * + * A similar item appears in the errata for the KSZ8567, but + * provides an alternative workaround. For now, use the simple + * workaround of disabling the EEE feature for this device too. */ return MICREL_NO_EEE; } From 1b5487aefb1ce7a6b1f15a33297d1231306b4122 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 31 Jul 2024 21:38:50 -0500 Subject: [PATCH 0680/1523] smb3: fix setting SecurityFlags when encryption is required Setting encryption as required in security flags was broken. For example (to require all mounts to be encrypted by setting): "echo 0x400c5 > /proc/fs/cifs/SecurityFlags" Would return "Invalid argument" and log "Unsupported security flags" This patch fixes that (e.g. allowing overriding the default for SecurityFlags 0x00c5, including 0x40000 to require seal, ie SMB3.1.1 encryption) so now that works and forces encryption on subsequent mounts. Acked-by: Bharath SM Cc: stable@vger.kernel.org Signed-off-by: Steve French --- Documentation/admin-guide/cifs/usage.rst | 2 +- fs/smb/client/cifs_debug.c | 2 +- fs/smb/client/cifsglob.h | 8 ++++---- fs/smb/client/smb2pdu.c | 3 +++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index fd4b56c0996f..c09674a75a9e 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -742,7 +742,7 @@ SecurityFlags Flags which control security negotiation and may use NTLMSSP 0x00080 must use NTLMSSP 0x80080 seal (packet encryption) 0x00040 - must seal (not implemented yet) 0x40040 + must seal 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c71ae5c04306..4a20e92474b2 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) static void cifs_security_flags_handle_must_flags(unsigned int *flags) { - unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL); if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) *flags = CIFSSEC_MUST_KRB5; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index f6d1f075987f..b9f46d29a441 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1881,7 +1881,7 @@ static inline bool is_replayable_error(int error) #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 -#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 @@ -1891,11 +1891,11 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ -#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9a06b5594669..83facb54276a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) if (tcon->seal && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) return 1; + if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + return 1; return 0; } From bdcffe4be7cb90ccd12c49924dad9e2eda11b57a Mon Sep 17 00:00:00 2001 From: Xiaxi Shen Date: Wed, 7 Aug 2024 09:53:20 -0700 Subject: [PATCH 0681/1523] Fix spelling errors in Server Message Block Fixed typos in various files under fs/smb/client/ Signed-off-by: Xiaxi Shen Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 4 ++-- fs/smb/client/misc.c | 2 +- fs/smb/client/smbdirect.c | 8 ++++---- fs/smb/client/transport.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index b9f46d29a441..5c9b3e6cd95f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -345,7 +345,7 @@ struct smb_version_operations { /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); - /* close tree connecion */ + /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, @@ -816,7 +816,7 @@ struct TCP_Server_Info { * Protected by @refpath_lock and @srv_lock. The @refpath_lock is * mostly used for not requiring a copy of @leaf_fullpath when getting * cached or new DFS referrals (which might also sleep during I/O). - * While @srv_lock is held for making string and NULL comparions against + * While @srv_lock is held for making string and NULL comparisons against * both fields as in mount(2) and cache refresh. * * format: \\HOST\SHARE[\OPTIONAL PATH] diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index b28ff62f1f15..3fe5bfc389d0 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) * on simple responses (wct, bcc both zero) * in particular have seen this on * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized + * one byte of bcc potentially uninitialized */ /* zero rest of bcc */ tmp[sizeof(struct smb_hdr)+1] = 0; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d74e829de51c..7bcc379014ca 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work) else response = get_empty_queue_buffer(info); if (!response) { - /* now switch to emtpy packet queue */ + /* now switch to empty packet queue */ if (use_receive_queue) { use_receive_queue = 0; continue; @@ -618,7 +618,7 @@ out: /* * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requries FRWR on RDMA read/write + * This implementation requires FRWR on RDMA read/write * return value: true if it is supported */ static bool frwr_is_supported(struct ib_device_attr *attrs) @@ -2177,7 +2177,7 @@ cleanup_entries: * MR available in the list. It may access the list while the * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock * as they never modify the same places. However, there may be several CPUs - * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ static struct smbd_mr *get_mr(struct smbd_connection *info) @@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* * There is no need for waiting for complemtion on ib_post_send * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actaully send I/O to remote peer + * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index adfe0d058701..6e68aaf5bd20 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, out: /* * This will dequeue all mids. After this it is important that the - * demultiplex_thread will not process any of these mids any futher. + * demultiplex_thread will not process any of these mids any further. * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ From 1cb6ab446424649f03c82334634360c2e3043684 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 23 Jul 2024 17:15:44 +0800 Subject: [PATCH 0682/1523] MIPS: Loongson64: Set timer mode in cpu-probe Loongson64 C and G processors have EXTIMER feature which is conflicting with CP0 counter. Although the processor resets in EXTIMER disabled & INTIMER enabled mode, which is compatible with MIPS CP0 compare, firmware may attempt to enable EXTIMER and interfere CP0 compare. Set timer mode back to MIPS compatible mode to fix booting on systems with such firmware before we have an actual driver for EXTIMER. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/cpu-probe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index bda7f193baab..af7412549e6e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1724,12 +1724,16 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; case PRID_IMP_LOONGSON_64G: __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); decode_cpucfg(c); + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; default: panic("Unknown Loongson Processor ID!"); From 9eb18136af9fe4dd688724070f2bfba271bd1542 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Aug 2024 10:15:46 +0100 Subject: [PATCH 0683/1523] KVM: arm64: vgic: Hold config_lock while tearing down a CPU interface Tearing down a vcpu CPU interface involves freeing the private interrupt array. If we don't hold the lock, we may race against another thread trying to configure it. Yeah, fuzzers do wonderful things... Taking the lock early solves this particular problem. Fixes: 03b3d00a70b5 ("KVM: arm64: vgic: Allocate private interrupts on demand") Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240808091546.3262111-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 7f68cf58b978..41feb858ff9a 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -438,14 +438,13 @@ void kvm_vgic_destroy(struct kvm *kvm) unsigned long i; mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) __kvm_vgic_vcpu_destroy(vcpu); - mutex_lock(&kvm->arch.config_lock); - kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); From 5819e464a17587e6830cfab05f3e91a9a8753a41 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 8 Aug 2024 14:08:08 +1000 Subject: [PATCH 0684/1523] cpumask: Fix crash on updating CPU enabled mask The CPU enabled mask instead of the CPU possible mask should be used by set_cpu_enabled(). Otherwise, we run into crash due to write to the read-only CPU possible mask when vCPU is hot added on ARM64. (qemu) device_add host-arm-cpu,id=cpu1,socket-id=1 Unable to handle kernel write to read-only memory at virtual address ffff800080fa7190 : Call trace: register_cpu+0x1a4/0x2e8 arch_register_cpu+0x84/0xd8 acpi_processor_add+0x480/0x5b0 acpi_bus_attach+0x1c4/0x300 acpi_dev_for_one_check+0x3c/0x50 device_for_each_child+0x68/0xc8 acpi_dev_for_each_child+0x48/0x80 acpi_bus_attach+0x84/0x300 acpi_bus_scan+0x74/0x220 acpi_scan_rescan_bus+0x54/0x88 acpi_device_hotplug+0x208/0x478 acpi_hotplug_work_fn+0x2c/0x50 process_one_work+0x15c/0x3c0 worker_thread+0x2ec/0x400 kthread+0x120/0x130 ret_from_fork+0x10/0x20 Fix it by passing the CPU enabled mask instead of the CPU possible mask to set_cpu_enabled(). Fixes: 51c4767503d5 ("Merge tag 'bitmap-6.11-rc1' of https://github.com:/norov/linux") Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Signed-off-by: Yury Norov --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 801a7e524113..53158de44b83 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1037,7 +1037,7 @@ void init_cpu_online(const struct cpumask *src); assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) #define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) -#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_possible_mask, (enabled)) +#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) #define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) #define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) #define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) From ae02c7b7fea3e034fbd724c21d88406f71ccc2f8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 25 Jul 2024 23:43:35 -0700 Subject: [PATCH 0685/1523] drm/xe/rtp: Fix off-by-one when processing rules Gustavo noticed an odd "+ 2" in rtp_mark_active() while processing rtp rules and pointed that it should be "+ 1". In fact, while processing entries without actions (OOB workarounds), if the WA is activated and has OR rules, it will also inadvertently activate the very next workaround. Test in a LNL B0 platform by moving 18024947630 on top of 16020292621, makes the latter become active: $ cat /sys/kernel/debug/dri/0/gt0/workarounds ... OOB Workarounds 18024947630 16020292621 14018094691 16022287689 13011645652 22019338487_display In future a kunit test will be added to cover the rtp checks for entries without actions. Fixes: fe19328b900c ("drm/xe/rtp: Add support for entries with no action") Cc: Gustavo Sousa Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240726064337.797576-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit fd6797ec50c561f085bc94e3ee26f484a52af79e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 02e28274282f..5efe83cc82ab 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe, if (first == last) bitmap_set(ctx->active_entries, first, 1); else - bitmap_set(ctx->active_entries, first, last - first + 2); + bitmap_set(ctx->active_entries, first, last - first + 1); } /** From 4f854a8b1b85d46abd5ce206936d23f87ac5e0c9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 26 Jul 2024 18:22:16 -0700 Subject: [PATCH 0686/1523] drm/xe: Use dma_fence_chain_free in chain fence unused as a sync A chain fence is uninitialized if not installed in a drm sync obj. Thus if xe_sync_entry_cleanup is called and sync->chain_fence is non-NULL the proper cleanup is dma_fence_chain_free rather than a dma-fence put. Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2411 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2261 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240727012216.2118276-1-matthew.brost@intel.com (cherry picked from commit 7f7a2da3bf8bc0e0f6c239af495b7050056e889c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2883d9aca404..c4e018aa2982 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -263,7 +263,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } From ac3191c5cf47e2d5220a1ed7353a2e498a1f415e Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Thu, 1 Aug 2024 16:54:24 +0530 Subject: [PATCH 0687/1523] drm/xe/hwmon: Fix PL1 disable flow in xe_hwmon_power_max_write In xe_hwmon_power_max_write, for PL1 disable supported case, instead of returning after PL1 disable, PL1 enable path was also being run. Fixed it by returning after disable. v2: Correct typo and grammar in commit message. (Jonathan) Signed-off-by: Karthik Poosa Fixes: fef6dd12b45a ("drm/xe/hwmon: Protect hwmon rw attributes with hwmon_lock") Reviewed-by: Jonathan Cavitt Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240801112424.1841766-1-karthik.poosa@intel.com (cherry picked from commit 146458645e505f5eac498759bcd865cf7c0dfd9a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..832ea81faeee 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ From 642dfc9d5964b26f66fa6c28ce2861e11f9232aa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:16 -0700 Subject: [PATCH 0688/1523] drm/xe: Take ref to VM in delayed snapshot Kernel BO's don't take a ref to the VM, we need the VM for the delayed snapshot, so take a ref to the VM in delayed snapshot. v2: - Check for lrc_bo before taking a VM ref (CI) - Check lrc_bo->vm before taking / dropping a VM ref (CI) - Drop VM in xe_lrc_snapshot_free v5: - Fix commit message wording (Johnathan) Fixes: 47058633d9c5 ("drm/xe: Move lrc snapshot capturing to xe_lrc.c") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-2-matthew.brost@intel.com (cherry picked from commit c3bc97d2f102ddd5a8341eeb2dbae2a3e98bb46a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..58121821f081 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } From cd9aae921ab6b614e56ce690dedfe82e79db9354 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 8 Aug 2024 11:44:07 -0700 Subject: [PATCH 0689/1523] dt-bindings: display: panel: samsung,atna45dc02: Fix indentation The yaml had indentation errors: ./Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml:21:9: [warning] wrong indentation: expected 10 but found 8 (indentation) ./Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml:23:11: [warning] wrong indentation: expected 12 but found 10 (indentation) Fix them. Reported-by: Rob Herring Closes: https://lore.kernel.org/r/CAL_JsqLRTgQRPcfXy4G9hLoHMd-Uax4_C90BV_OZn4mK+-82kw@mail.gmail.com Fixes: 1c4a057d01f4 ("dt-bindings: display: panel: samsung,atna45dc02: Document ATNA45DC02") Reviewed-by: Rob Clark Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240808114407.1.I099e8e9e36407a0785d846b953031d40ea71e559@changeid --- .../bindings/display/panel/samsung,atna33xc20.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 87c601bcf20a..032f783eefc4 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -18,12 +18,12 @@ properties: # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel - const: samsung,atna33xc20 - items: - - enum: - # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel - - samsung,atna45af01 - # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel - - samsung,atna45dc02 - - const: samsung,atna33xc20 + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 + - const: samsung,atna33xc20 enable-gpios: true port: true From f39bae2e028b841732ca81d8131d27b48a6051ad Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 15:42:23 -0400 Subject: [PATCH 0690/1523] bcachefs: Switch to .get_inode_acl() .set_acl() requires a dentry, and if one isn't passed it marks the VFS inode as not having an ACL. This has been causing inodes with ACLs to have them "disappear" on bcachefs filesystem, depending on which path those inodes get pulled into the cache from. Switching to .get_inode_acl(), like other local filesystems, fixes this. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 11 +++++++---- fs/bcachefs/acl.h | 2 +- fs/bcachefs/fs.c | 8 ++++---- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0..331a17f3f113 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278..fe730a6bf0c1 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a..15fc41e63b6c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; From cb5b81bc9a448f8db817566f60f92e2ea788ea0f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 8 Aug 2024 12:29:40 -0700 Subject: [PATCH 0691/1523] module: warn about excessively long module waits Russell King reported that the arm cbc(aes) crypto module hangs when loaded, and Herbert Xu bisected it to commit 9b9879fc0327 ("modules: catch concurrent module loads, treat them as idempotent"), and noted: "So what's happening here is that the first modprobe tries to load a fallback CBC implementation, in doing so it triggers a load of the exact same module due to module aliases. IOW we're loading aes-arm-bs which provides cbc(aes). However, this needs a fallback of cbc(aes) to operate, which is made out of the generic cbc module + any implementation of aes, or ecb(aes). The latter happens to also be provided by aes-arm-cb so that's why it tries to load the same module again" So loading the aes-arm-bs module ends up wanting to recursively load itself, and the recursive load then ends up waiting for the original module load to complete. This is a regression, in that it used to be that we just tried to load the module multiple times, and then as we went on to install it the second time we would instead just error out because the module name already existed. That is actually also exactly what the original "catch concurrent loads" patch did in commit 9828ed3f695a ("module: error out early on concurrent load of the same module file"), but it turns out that it ends up being racy, in that erroring out before the module has been fully initialized will cause failures in dependent module loading. See commit ac2263b588df (which was the revert of that "error out early") commit for details about why erroring out before the module has been initialized is actually fundamentally racy. Now, for the actual recursive module load (as opposed to just concurrently loading the same module twice), the race is not an issue. At the same time it's hard for the kernel to see that this is recursion, because the module load is always done from a usermode helper, so the recursion is not some simple callchain within the kernel. End result: this is not the real fix, but this at least adds a warning for the situation (admittedly much too late for all the debugging pain that Russell and Herbert went through) and if we can come to a resolution on how to detect the recursion properly, this re-organizes the code to make that easier. Link: https://lore.kernel.org/all/ZrFHLqvFqhzykuYw@shell.armlinux.org.uk/ Reported-by: Russell King Debugged-by: Herbert Xu Signed-off-by: Linus Torvalds --- kernel/module/main.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index d9592195c5bb..6f4ec857bdef 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3183,15 +3183,28 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int if (!f || !(f->f_mode & FMODE_READ)) return -EBADF; - /* See if somebody else is doing the operation? */ - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; + /* Are we the winners of the race and get to do this? */ + if (!idempotent(&idem, file_inode(f))) { + int ret = init_module_from_file(f, uargs, flags); + return idempotent_complete(&idem, ret); } - /* Otherwise, we'll do it and complete others */ - return idempotent_complete(&idem, - init_module_from_file(f, uargs, flags)); + /* + * Somebody else won the race and is loading the module. + * + * We have to wait for it forever, since our 'idem' is + * on the stack and the list entry stays there until + * completed (but we could fix it under the idem_lock) + * + * It's also unclear what a real timeout might be, + * but we could maybe at least make this killable + * and remove the idem entry in that case? + */ + for (;;) { + if (wait_for_completion_timeout(&idem.complete, 10*HZ)) + return idem.ret; + pr_warn_once("module '%pD' taking a long time to load", f); + } } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) From 23a58b782f864951485d7a0018549729e007cb43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20St=C4=99pniak?= Date: Wed, 7 Aug 2024 02:12:19 +0200 Subject: [PATCH 0692/1523] ASoC: amd: yc: Support mic on Lenovo Thinkpad E14 Gen 6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lenovo Thinkpad E14 Gen 6 (model type 21M3) needs a quirk entry for internal mic to work. Signed-off-by: Krzysztof Stępniak Link: https://patch.msgid.link/20240807001219.1147-1-kfs.szk@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f4bbfffe9fcb..d30752c0dab2 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -220,6 +220,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M3"), + } + }, { .driver_data = &acp6x_card, .matches = { From 4684a2df9c5b3fc914377127faf2515aa9049093 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 7 Aug 2024 10:53:55 +0800 Subject: [PATCH 0693/1523] ASoC: codecs: ES8326: button detect issue We find that we need to set snd_jack_types to 0. If not, there will be a probability of button detection errors Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20240807025356.24904-2-zhangyi@everest-semi.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b246694ebb4f..be3c79232a31 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -843,6 +843,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326_disable_micbias(es8326->component); if (es8326->jack->status & SND_JACK_HEADPHONE) { dev_dbg(comp->dev, "Report hp remove event\n"); + snd_soc_jack_report(es8326->jack, 0, + SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2); snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET); /* mute adc when mic path switch */ regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44); From 6675e76a5c441b52b1b983ebb714122087020ebe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 19:02:27 +0200 Subject: [PATCH 0694/1523] ASoC: amd: yc: Add quirk entry for OMEN by HP Gaming Laptop 16-n0xxx Fix the missing mic on OMEN by HP Gaming Laptop 16-n0xxx by adding the quirk entry with the board ID 8A44. Cc: stable@vger.kernel.org Link: https://bugzilla.suse.com/show_bug.cgi?id=1227182 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807170249.16490-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index d30752c0dab2..0523c16305db 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -416,6 +416,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A43"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A44"), + } + }, { .driver_data = &acp6x_card, .matches = { From 2f3e2c9eaafc272266344d777f8de44f8632e247 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 13:49:28 +0200 Subject: [PATCH 0695/1523] ASoC: dt-bindings: qcom,wcd937x: Correct reset GPIO polarity in example The reset GPIO of WCD9370/WCD9375 is active low and that's how it is routed on typical boards, so correct the example DTS to use expected polarity. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806114931.40090-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml index de397d879acc..f94203798f24 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml @@ -42,7 +42,7 @@ examples: pinctrl-names = "default", "sleep"; pinctrl-0 = <&wcd_reset_n>; pinctrl-1 = <&wcd_reset_n_sleep>; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p8>; vdd-rxtx-supply = <&vreg_l18b_1p8>; vdd-px-supply = <&vreg_l18b_1p8>; From 55922275702e112652d314a9b6a6ca31d4b7252e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 13:49:29 +0200 Subject: [PATCH 0696/1523] ASoC: dt-bindings: qcom,wcd934x: Correct reset GPIO polarity in example The reset GPIO of WCD9340/WCD9341 is active low and that's how it is routed on typical boards, so correct the example DTS to use expected polarity. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806114931.40090-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml index beb0ff0245b0..a65b1d1d5fdd 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml @@ -199,10 +199,11 @@ additionalProperties: false examples: - | + #include codec@1,0{ compatible = "slim217,250"; reg = <1 0>; - reset-gpios = <&tlmm 64 0>; + reset-gpios = <&tlmm 64 GPIO_ACTIVE_LOW>; slim-ifc-dev = <&wcd9340_ifd>; #sound-dai-cells = <1>; interrupt-parent = <&tlmm>; From 871f1a16fa3506487de24b05d68be45e9185e77a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 13:49:30 +0200 Subject: [PATCH 0697/1523] ASoC: dt-bindings: qcom,wcd938x: Correct reset GPIO polarity in example The reset GPIO of WCD9380/WCD9385 is active low and that's how it is routed on typical boards, so correct the example DTS to use expected polarity. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806114931.40090-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml index cf6c3787adfe..10531350c336 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml @@ -34,9 +34,10 @@ unevaluatedProperties: false examples: - | + #include codec { compatible = "qcom,wcd9380-codec"; - reset-gpios = <&tlmm 32 0>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd938x_tx>; qcom,rx-device = <&wcd938x_rx>; From 81f88fddef9cddae6b4e5d9359022c7a2a3e3b6a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 13:49:31 +0200 Subject: [PATCH 0698/1523] ASoC: dt-bindings: qcom,wcd939x: Correct reset GPIO polarity in example The reset GPIO of WCD9390/WCD9395 is active low and that's how it is routed on typical boards, so correct the example DTS to use expected polarity, instead of IRQ flag (which is a logical mistake on its own). Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806114931.40090-4-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml index 6e76f6a8634f..c69291f4d575 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml @@ -52,10 +52,10 @@ unevaluatedProperties: false examples: - | - #include + #include codec { compatible = "qcom,wcd9390-codec"; - reset-gpios = <&tlmm 32 IRQ_TYPE_NONE>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd939x_tx>; qcom,rx-device = <&wcd939x_rx>; From 2f11f61f9d4d5692bcebb9d089429ee0c046e08a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 7 Aug 2024 15:01:40 +0100 Subject: [PATCH 0699/1523] MAINTAINERS: Update Cirrus Logic parts to linux-sound mailing list Now that most kernel work on sound has moved over to the linux-sound mailing list so should the Cirrus Logic audio parts. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240807140140.421359-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..d304054d661e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5306,7 +5306,7 @@ F: drivers/media/cec/i2c/ch7322.c CIRRUS LOGIC AUDIO CODEC DRIVERS M: David Rhodes M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Maintained F: Documentation/devicetree/bindings/sound/cirrus,cs* @@ -5375,7 +5375,7 @@ F: sound/soc/codecs/lochnagar-sc.c CIRRUS LOGIC MADERA CODEC DRIVERS M: Charles Keepax M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Supported W: https://github.com/CirrusLogic/linux-drivers/wiki From 5003d0ce5c7da3a02c0aff771f516f99731e7390 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 7 Aug 2024 18:27:03 +0200 Subject: [PATCH 0700/1523] ASoC: meson: axg-fifo: fix irq scheduling issue with PREEMPT_RT With PREEMPT_RT enabled a spinlock_t becomes a sleeping lock. This is usually not a problem with spinlocks used in IRQ context since IRQ handlers get threaded. However, if IRQF_ONESHOT is set, the primary handler won't be force-threaded and runs always in hardirq context. This is a problem because spinlock_t requires a preemptible context on PREEMPT_RT. In this particular instance, regmap mmio uses spinlock_t to protect the register access and IRQF_ONESHOT is set on the IRQ. In this case, it is actually better to do everything in threaded handler and it solves the problem with PREEMPT_RT. Reported-by: Arseniy Krasnov Closes: https://lore.kernel.org/linux-amlogic/20240729131652.3012327-1-avkrasnov@salutedevices.com Suggested-by: Sebastian Andrzej Siewior Fixes: b11d26660dff ("ASoC: meson: axg-fifo: use threaded irq to check periods") Signed-off-by: Jerome Brunet Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240807162705.4024136-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-fifo.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 7e6090af720b..75909196b769 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -207,25 +207,18 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) status = FIELD_GET(STATUS1_INT_STS, status); axg_fifo_ack_irq(fifo, status); - /* Use the thread to call period elapsed on nonatomic links */ - if (status & FIFO_INT_COUNT_REPEAT) - return IRQ_WAKE_THREAD; + if (status & ~FIFO_INT_COUNT_REPEAT) + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + if (status & FIFO_INT_COUNT_REPEAT) { + snd_pcm_period_elapsed(ss); + return IRQ_HANDLED; + } return IRQ_NONE; } -static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) -{ - struct snd_pcm_substream *ss = dev_id; - - snd_pcm_period_elapsed(ss); - - return IRQ_HANDLED; -} - int axg_fifo_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *ss) { @@ -251,8 +244,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, - axg_fifo_pcm_irq_block_thread, + /* Use the threaded irq handler only with non-atomic links */ + ret = request_threaded_irq(fifo->irq, NULL, + axg_fifo_pcm_irq_block, IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; From 72776774b55bb59b7b1b09117e915a5030110304 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Wed, 7 Aug 2024 14:26:48 +0000 Subject: [PATCH 0701/1523] ASoC: cs35l56: Patch CS35L56_IRQ1_MASK_18 to the default value Device tuning files made with early revision tooling may contain configuration that can unmask IRQ signals that are owned by the host. Adding a safe default to the regmap patch ensures that the hardware matches the driver expectations. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20240807142648.46932-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e7e8d617da94..bd74fef33d49 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -36,6 +36,7 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + { CS35L56_IRQ1_MASK_18, 0x1f7df0ff }, /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, From 0c84bde4f37ba27d50e4c70ecacd33fe4a57030d Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 8 Aug 2024 10:35:19 +0200 Subject: [PATCH 0702/1523] media: Revert "media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control()" This reverts commit 2052138b7da52ad5ccaf74f736d00f39a1c9198c. This breaks the TeVii s480 dual DVB-S2 S660. The device has a bulk in endpoint but no corresponding out endpoint, so the device does not pass the "has both receive and send bulk endpoint" test. Seemingly this device does not use dvb_usb_generic_rw() so I have tried removing the generic_bulk_ctrl_endpoint entry, but this resulted in different problems. As we have no explanation yet, revert. $ dmesg | grep -i -e dvb -e dw21 -e usb\ 4 [ 0.999122] usb 1-1: new high-speed USB device number 2 using ehci-pci [ 1.023123] usb 4-1: new high-speed USB device number 2 using ehci-pci [ 1.130247] usb 1-1: New USB device found, idVendor=9022, idProduct=d482, +bcdDevice= 0.01 [ 1.130257] usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 [ 1.152323] usb 4-1: New USB device found, idVendor=9022, idProduct=d481, +bcdDevice= 0.01 [ 1.152329] usb 4-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 [ 6.701033] dvb-usb: found a 'TeVii S480.2 USB' in cold state, will try to +load a firmware [ 6.701178] dvb-usb: downloading firmware from file 'dvb-usb-s660.fw' [ 6.701179] dw2102: start downloading DW210X firmware [ 6.703715] dvb-usb: found a 'Microsoft Xbox One Digital TV Tuner' in cold +state, will try to load a firmware [ 6.703974] dvb-usb: downloading firmware from file 'dvb-usb-dib0700-1.20.fw' [ 6.756432] usb 1-1: USB disconnect, device number 2 [ 6.862119] dvb-usb: found a 'TeVii S480.2 USB' in warm state. [ 6.862194] dvb-usb: TeVii S480.2 USB error while loading driver (-22) [ 6.862209] dvb-usb: found a 'TeVii S480.1 USB' in cold state, will try to +load a firmware [ 6.862244] dvb-usb: downloading firmware from file 'dvb-usb-s660.fw' [ 6.862245] dw2102: start downloading DW210X firmware [ 6.914811] usb 4-1: USB disconnect, device number 2 [ 7.014131] dvb-usb: found a 'TeVii S480.1 USB' in warm state. [ 7.014487] dvb-usb: TeVii S480.1 USB error while loading driver (-22) [ 7.014538] usbcore: registered new interface driver dw2102 Closes: https://lore.kernel.org/stable/20240801165146.38991f60@mir/ Fixes: 2052138b7da5 ("media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control()") Reported-by: Stefan Lippers-Hollmann Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dvb-usb-init.c | 35 +++--------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index 22d83ac18eb7..fbf58012becd 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); -static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - int ret; - - ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - } - return 0; -} - -static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - } -} - static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) { struct dvb_usb_adapter *adap; int ret, n, o; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint); - if (ret) - return ret; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response); - if (ret) - return ret; for (n = 0; n < d->props.num_adapters; n++) { adap = &d->adapter[n]; adap->dev = d; @@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) * when reloading the driver w/o replugging the device * sometimes a timeout occurs, this helps */ - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint); - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response); + if (d->props.generic_bulk_ctrl_endpoint != 0) { + usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + } return 0; From 05a3d6e9307250a5911d75308e4363466794ab21 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 8 Aug 2024 11:57:38 -0400 Subject: [PATCH 0703/1523] selinux: revert our use of vma_is_initial_heap() Unfortunately it appears that vma_is_initial_heap() is currently broken for applications that do not currently have any heap allocated, e.g. brk == start_brk. The breakage is such that it will cause SELinux to check for the process/execheap permission on memory regions that cross brk/start_brk even when there is no heap. The proper fix would be to correct vma_is_initial_heap(), but as there are multiple callers I am hesitant to unilaterally modify the helper out of concern that I would end up breaking some other subsystem. The mm developers have been made aware of the situation and hopefully they will have a fix at some point in the future, but we need a fix soon so we are simply going to revert our use of vma_is_initial_heap() in favor of our old logic/code which works as expected, even in the face of a zero size heap. We can return to using vma_is_initial_heap() at some point in the future when it is fixed. Cc: stable@vger.kernel.org Reported-by: Marc Reisner Closes: https://lore.kernel.org/all/ZrPmoLKJEf1wiFmM@marcreisner.com Fixes: 68df1baf158f ("selinux: use vma_is_initial_stack() and vma_is_initial_heap()") Signed-off-by: Paul Moore --- security/selinux/hooks.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd..bfa61e005aac 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3852,7 +3852,17 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { int rc = 0; - if (vma_is_initial_heap(vma)) { + /* + * We don't use the vma_is_initial_heap() helper as it has + * a history of problems and is currently broken on systems + * where there is no heap, e.g. brk == start_brk. Before + * replacing the conditional below with vma_is_initial_heap(), + * or something similar, please ensure that the logic is the + * same as what we have below or you have tested every possible + * corner case you can think to test. + */ + if (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && (vma_is_initial_stack(vma) || From 636cdf6fbddff4780ff4452a82afb099292b88b1 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Mon, 5 Aug 2024 13:54:35 -0700 Subject: [PATCH 0704/1523] drm/xe/guc: Enable w/a 14022293748 and 22019794406 Enable workarounds for HW bug where render engine reset fails. Given that we're bumping the minimum required GuC version to 70.29, we're guaranteed to always have support for this KLV in the GuC. v2: Enable KLV correctly for either workaround (Lucas) v4: Add check for minimum supported GuC firmware version. Enable w/a for hw version 20.01 too. (Daniele) v5 (Daniele): remove now unneeded fw type and version checks (JohnH) Signed-off-by: Julia Filipchuk Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240805205435.921921-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 6 ++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 8f9f60b28306..6b30743a2f6c 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -351,6 +351,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1c60b685dbc6..d1902a8581ca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc_fw.h" #include "xe_wa.h" /* Slack of a few additional entries per engine */ @@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0xC40, &offset, &remain); + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 540d38603f32..5cf27ff27ce6 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,6 +27,10 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +14022293748 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) From e422c0bfd9e47e399e86bcc483f49d8b54064fc2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 7 Aug 2024 16:53:32 -0700 Subject: [PATCH 0705/1523] drm/xe: fix WA 14018094691 This WA is applied while initializing the media GT, but it a primary GT WA (because it modifies a register on the primary GT), so the XE_WA macro is returning false even when the WA should be applied. Fix this by using the primary GT in the macro. Note that this WA only applies to PXP and we don't yet support that in Xe, so there are no negative effects to this bug, which is why we didn't see any errors in testing. v2: use the primary GT in the macro instead of marking the WA as platform-wide (Lucas, Matt). Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240807235333.1370915-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..77ce44e845c5 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) From 36bb22a08a69d9984a8399c07310d18b115eae20 Mon Sep 17 00:00:00 2001 From: Gleb Korobeynikov Date: Thu, 8 Aug 2024 18:47:48 +0300 Subject: [PATCH 0706/1523] cifs: cifs_inval_name_dfs_link_error: correct the check for fullpath Replace the always-true check tcon->origin_fullpath with check of server->leaf_fullpath See https://bugzilla.kernel.org/show_bug.cgi?id=219083 The check of the new @tcon will always be true during mounting, since @tcon->origin_fullpath will only be set after the tree is connected to the latest common resource, as well as checking if the prefix paths from it are fully accessible. Fixes: 3ae872de4107 ("smb: client: fix shared DFS root mounts with different prefixes") Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Gleb Korobeynikov Signed-off-by: Steve French --- fs/smb/client/misc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 3fe5bfc389d0..c6f11e6f9eb9 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -1234,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, const char *full_path, bool *islink) { + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_ses *ses = tcon->ses; size_t len; char *path; @@ -1250,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, !is_tcon_dfs(tcon)) return 0; - spin_lock(&tcon->tc_lock); - if (!tcon->origin_fullpath) { - spin_unlock(&tcon->tc_lock); + spin_lock(&server->srv_lock); + if (!server->leaf_fullpath) { + spin_unlock(&server->srv_lock); return 0; } - spin_unlock(&tcon->tc_lock); + spin_unlock(&server->srv_lock); /* * Slow path - tcon is DFS and @full_path has prefix path, so attempt From a018c1b636e79b60149b41151ded7c2606d8606e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 5 Aug 2024 08:56:18 +0900 Subject: [PATCH 0707/1523] ksmbd: override fsids for share path check Sangsoo reported that a DAC denial error occurred when accessing files through the ksmbd thread. This patch override fsids for share path check. Reported-by: Sangsoo Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/share_config.c | 15 ++++++++++++--- fs/smb/server/mgmt/share_config.h | 4 +++- fs/smb/server/mgmt/tree_connect.c | 9 +++++---- fs/smb/server/mgmt/tree_connect.h | 4 ++-- fs/smb/server/smb2pdu.c | 2 +- fs/smb/server/smb_common.c | 9 +++++++-- fs/smb/server/smb_common.h | 2 ++ 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index e0a6b758094f..d8d03070ae44 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -15,6 +15,7 @@ #include "share_config.h" #include "user_config.h" #include "user_session.h" +#include "../connection.h" #include "../transport_ipc.h" #include "../misc.h" @@ -120,12 +121,13 @@ static int parse_veto_list(struct ksmbd_share_config *share, return 0; } -static struct ksmbd_share_config *share_config_request(struct unicode_map *um, +static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config_response *resp; struct ksmbd_share_config *share = NULL; struct ksmbd_share_config *lookup; + struct unicode_map *um = work->conn->um; int ret; resp = ksmbd_ipc_share_config_request(name); @@ -181,7 +183,14 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, KSMBD_SHARE_CONFIG_VETO_LIST(resp), resp->veto_list_sz); if (!ret && share->path) { + if (__ksmbd_override_fsids(work, share)) { + kill_share(share); + share = NULL; + goto out; + } + ret = kern_path(share->path, 0, &share->vfs_path); + ksmbd_revert_fsids(work); if (ret) { ksmbd_debug(SMB, "failed to access '%s'\n", share->path); @@ -214,7 +223,7 @@ out: return share; } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config *share; @@ -227,7 +236,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, if (share) return share; - return share_config_request(um, name); + return share_config_request(work, name); } bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h index 5f591751b923..d4ac2dd4de20 100644 --- a/fs/smb/server/mgmt/share_config.h +++ b/fs/smb/server/mgmt/share_config.h @@ -11,6 +11,8 @@ #include #include +struct ksmbd_work; + struct ksmbd_share_config { char *name; char *path; @@ -68,7 +70,7 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share) __ksmbd_share_config_put(share); } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name); bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, const char *filename); diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index d2c81a8a11dd..94a52a75014a 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -16,17 +16,18 @@ #include "user_session.h" struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name) +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) { struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; struct sockaddr *peer_addr; + struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; int ret; - sc = ksmbd_share_config_get(conn->um, share_name); + sc = ksmbd_share_config_get(work, share_name); if (!sc) return status; @@ -61,7 +62,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, struct ksmbd_share_config *new_sc; ksmbd_share_config_del(sc); - new_sc = ksmbd_share_config_get(conn->um, share_name); + new_sc = ksmbd_share_config_get(work, share_name); if (!new_sc) { pr_err("Failed to update stale share config\n"); status.ret = -ESTALE; diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 6377a70b811c..a42cdd051041 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -13,6 +13,7 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; +struct ksmbd_work; enum { TREE_NEW = 0, @@ -50,8 +51,7 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn, struct ksmbd_session; struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name); +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name); void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 37a39ab4ee65..54154d36ea2f 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1955,7 +1955,7 @@ int smb2_tree_connect(struct ksmbd_work *work) ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n", name, treename); - status = ksmbd_tree_conn_connect(conn, sess, name); + status = ksmbd_tree_conn_connect(work, name); if (status.ret == KSMBD_TREE_CONN_STATUS_OK) rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id); else diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 474dadf6b7b8..13818ecb6e1b 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -732,10 +732,10 @@ bool is_asterisk(char *p) return p && p[0] == '*'; } -int ksmbd_override_fsids(struct ksmbd_work *work) +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share) { struct ksmbd_session *sess = work->sess; - struct ksmbd_share_config *share = work->tcon->share_conf; struct cred *cred; struct group_info *gi; unsigned int uid; @@ -775,6 +775,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work) return 0; } +int ksmbd_override_fsids(struct ksmbd_work *work) +{ + return __ksmbd_override_fsids(work, work->tcon->share_conf); +} + void ksmbd_revert_fsids(struct ksmbd_work *work) { const struct cred *cred; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index f1092519c0c2..4a3148b0167f 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -447,6 +447,8 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command); int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp); +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share); int ksmbd_override_fsids(struct ksmbd_work *work); void ksmbd_revert_fsids(struct ksmbd_work *work); From f6bd41280a44dcc2e0a25ed72617d25f586974a7 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 5 Aug 2024 08:57:03 +0900 Subject: [PATCH 0708/1523] ksmbd: override fsids for smb2_query_info() Sangsoo reported that a DAC denial error occurred when accessing files through the ksmbd thread. This patch override fsids for smb2_query_info(). Reported-by: Sangsoo Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 54154d36ea2f..2df1354288e6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5596,6 +5596,11 @@ int smb2_query_info(struct ksmbd_work *work) ksmbd_debug(SMB, "GOT query info request\n"); + if (ksmbd_override_fsids(work)) { + rc = -ENOMEM; + goto err_out; + } + switch (req->InfoType) { case SMB2_O_INFO_FILE: ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n"); @@ -5614,6 +5619,7 @@ int smb2_query_info(struct ksmbd_work *work) req->InfoType); rc = -EOPNOTSUPP; } + ksmbd_revert_fsids(work); if (!rc) { rsp->StructureSize = cpu_to_le16(9); @@ -5623,6 +5629,7 @@ int smb2_query_info(struct ksmbd_work *work) le32_to_cpu(rsp->OutputBufferLength)); } +err_out: if (rc < 0) { if (rc == -EACCES) rsp->hdr.Status = STATUS_ACCESS_DENIED; From 5bdacb0907c1f531995b6ba47b832ac3a0182ae9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 6 Aug 2024 20:05:16 +0200 Subject: [PATCH 0709/1523] drm/xe/pf: Fix VF config validation on multi-GT platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When validating VF config on the media GT, we may wrongly report that VF is already partially configured on it, as we consider GGTT and LMEM provisioning done on the primary GT (since both GGTT and LMEM are tile-level resources, not a GT-level). This will cause skipping a VF auto-provisioning on the media-GT and in result will block a VF from successfully initialize that GT. Fix that by considering GGTT and LMEM configurations only when checking if a VF provisioning is complete, and omit GGTT and LMEM when reporting empty/partial provisioning. Fixes: 234670cea9a2 ("drm/xe/pf: Skip fair VFs provisioning if already provisioned") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240806180516.618-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 52c7277d243d..227527785afd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1928,6 +1928,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1936,13 +1937,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } From a857add73e61277fb8f688234c9d383130313ea7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Aug 2024 10:10:14 +0100 Subject: [PATCH 0710/1523] drm/i915/gt: Mark the GT as dead when mmio is unreliable After we detect that mmio is returning all 0xff, we believe that the GPU has dropped off the pci bus and is dead. Mark the device as wedged such that we can propagate the failure back to userspace and wait for recovery. Signed-off-by: Chris Wilson Signed-off-by: Andi Shyti Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240807091014.469992-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.h | 6 ++++++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 ++ drivers/gpu/drm/i915/gt/intel_reset.c | 12 +++++++++++- drivers/gpu/drm/i915/intel_uncore.c | 7 +++++-- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index b5e114d284ad..b73555889d50 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -208,4 +208,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, void intel_gt_bind_context_set_ready(struct intel_gt *gt); void intel_gt_bind_context_set_unready(struct intel_gt *gt); bool intel_gt_is_bind_context_ready(struct intel_gt *gt); + +static inline void intel_gt_set_wedged_async(struct intel_gt *gt) +{ + queue_work(system_highpri_wq, >->wedge); +} + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index cfdd2ad5e954..bcee084b1f27 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -292,6 +292,8 @@ struct intel_gt { struct gt_defaults defaults; struct kobject *sysfs_defaults; + struct work_struct wedge; + struct i915_perf_gt perf; /** link: &ggtt.gt_list */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 6161f7a3ff70..76810ebb5e83 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1013,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) GT_TRACE(gt, "end\n"); } +static void set_wedged_work(struct work_struct *w) +{ + struct intel_gt *gt = container_of(w, struct intel_gt, wedge); + intel_wakeref_t wf; + + with_intel_runtime_pm(gt->uncore->rpm, wf) + __intel_gt_set_wedged(gt); +} + void intel_gt_set_wedged(struct intel_gt *gt) { intel_wakeref_t wakeref; @@ -1614,6 +1623,7 @@ void intel_gt_init_reset(struct intel_gt *gt) init_waitqueue_head(>->reset.queue); mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + INIT_WORK(>->wedge, set_wedged_work); /* * While undesirable to wait inside the shrinker, complain anyway. @@ -1640,7 +1650,7 @@ static void intel_wedge_me(struct work_struct *work) struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name); - intel_gt_set_wedged(w->gt); + set_wedged_work(&w->gt->wedge); } void __intel_init_wedge(struct intel_wedge_me *w, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2eba289d88ad..6aa179a3e92a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -24,6 +24,7 @@ #include #include +#include "gt/intel_gt.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" @@ -180,14 +181,16 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) if (!wait_ack_clear(d, FORCEWAKE_KERNEL)) return; - if (fw_ack(d) == ~0) + if (fw_ack(d) == ~0) { drm_err(&d->uncore->i915->drm, "%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n", intel_uncore_forcewake_domain_to_str(d->id)); - else + intel_gt_set_wedged_async(d->uncore->gt); + } else { drm_err(&d->uncore->i915->drm, "%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); + } add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ } From e5876b088ba03a62124266fa20d00e65533c7269 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Aug 2024 19:28:05 +0200 Subject: [PATCH 0711/1523] usbnet: ipheth: race between ipheth_close and error handling ipheth_sndbulk_callback() can submit carrier_work as a part of its error handling. That means that the driver must make sure that the work is cancelled after it has made sure that no more URB can terminate with an error condition. Hence the order of actions in ipheth_close() needs to be inverted. Signed-off-by: Oliver Neukum Signed-off-by: Foster Snowhill Tested-by: Georgi Valkov Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 687d70cfc556..6eeef10edada 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -475,8 +475,8 @@ static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); - cancel_delayed_work_sync(&dev->carrier_work); netif_stop_queue(net); + cancel_delayed_work_sync(&dev->carrier_work); return 0; } From 655b46d7a39ac6f049698b27c1568c0f7ff85d1e Mon Sep 17 00:00:00 2001 From: Foster Snowhill Date: Tue, 6 Aug 2024 19:28:06 +0200 Subject: [PATCH 0712/1523] usbnet: ipheth: remove extraneous rx URB length check Rx URB length was already checked in ipheth_rcvbulk_callback_legacy() and ipheth_rcvbulk_callback_ncm(), depending on the current mode. The check in ipheth_rcvbulk_callback() was thus mostly a duplicate. The only place in ipheth_rcvbulk_callback() where we care about the URB length is for the initial control frame. These frames are always 4 bytes long. This has been checked as far back as iOS 4.2.1 on iPhone 3G. Remove the extraneous URB length check. For control frames, check for the specific 4-byte length instead. Signed-off-by: Foster Snowhill Tested-by: Georgi Valkov Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 6eeef10edada..017255615508 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -286,11 +286,6 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - if (urb->actual_length <= IPHETH_IP_ALIGN) { - dev->net->stats.rx_length_errors++; - return; - } - /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and * they don't affect driver functionality, okay to drop them. @@ -298,7 +293,8 @@ static void ipheth_rcvbulk_callback(struct urb *urb) * URB received from the bulk IN endpoint. */ if (unlikely - (((char *)urb->transfer_buffer)[0] == 0 && + (urb->actual_length == 4 && + ((char *)urb->transfer_buffer)[0] == 0 && ((char *)urb->transfer_buffer)[1] == 1)) goto rx_submit; From 94d7eeb6c0ef0310992944f0d0296929816a2cb0 Mon Sep 17 00:00:00 2001 From: Foster Snowhill Date: Tue, 6 Aug 2024 19:28:07 +0200 Subject: [PATCH 0713/1523] usbnet: ipheth: drop RX URBs with no payload On iPhone 15 Pro Max one can observe periodic URBs with no payload on the "bulk in" (RX) endpoint. These don't seem to do anything meaningful. Reproduced on iOS 17.5.1 and 17.6. This behaviour isn't observed on iPhone 11 on the same iOS version. The nature of these zero-length URBs is so far unknown. Drop RX URBs with no payload. Signed-off-by: Foster Snowhill Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 017255615508..f04c7bf79665 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -286,6 +286,12 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } + /* iPhone may periodically send URBs with no payload + * on the "bulk in" endpoint. It is safe to ignore them. + */ + if (urb->actual_length == 0) + goto rx_submit; + /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and * they don't affect driver functionality, okay to drop them. From 74efed51e0a4d62f998f806c307778b47fc73395 Mon Sep 17 00:00:00 2001 From: Foster Snowhill Date: Tue, 6 Aug 2024 19:28:08 +0200 Subject: [PATCH 0714/1523] usbnet: ipheth: do not stop RX on failing RX callback RX callbacks can fail for multiple reasons: * Payload too short * Payload formatted incorrecly (e.g. bad NCM framing) * Lack of memory None of these should cause the driver to seize up. Make such failures non-critical and continue processing further incoming URBs. Signed-off-by: Foster Snowhill Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index f04c7bf79665..cdc72559790a 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -308,7 +308,6 @@ static void ipheth_rcvbulk_callback(struct urb *urb) if (retval != 0) { dev_err(&dev->intf->dev, "%s: callback retval: %d\n", __func__, retval); - return; } rx_submit: From 67927a1b255d883881be9467508e0af9a5e0be9d Mon Sep 17 00:00:00 2001 From: Foster Snowhill Date: Tue, 6 Aug 2024 19:28:09 +0200 Subject: [PATCH 0715/1523] usbnet: ipheth: fix carrier detection in modes 1 and 4 Apart from the standard "configurations", "interfaces" and "alternate interface settings" in USB, iOS devices also have a notion of "modes". In different modes, the device exposes a different set of available configurations. Depending on the iOS version, and depending on the current mode, the length and contents of the carrier state control message differs: * 1 byte (seen on iOS 4.2.1, 8.4): * 03: carrier off (mode 0) * 04: carrier on (mode 0) * 3 bytes (seen on iOS 10.3.4, 15.7.6): * 03 03 03: carrier off (mode 0) * 04 04 03: carrier on (mode 0) * 4 bytes (seen on iOS 16.5, 17.6): * 03 03 03 00: carrier off (mode 0) * 04 03 03 00: carrier off (mode 1) * 06 03 03 00: carrier off (mode 4) * 04 04 03 04: carrier on (mode 0 and 1) * 06 04 03 04: carrier on (mode 4) Before this change, the driver always used the first byte of the response to determine carrier state. From this larger sample, the first byte seems to indicate the number of available USB configurations in the current mode (with the exception of the default mode 0), and in some cases (namely mode 1 and 4) does not correlate with the carrier state. Previous logic erroneously counted `04 03 03 00` as "carrier on" and `06 04 03 04` as "carrier off" on iOS versions that support mode 1 and mode 4 respectively. Only modes 0, 1 and 4 expose the USB Ethernet interfaces necessary for the ipheth driver. Check the second byte of the control message where possible, and fall back to checking the first byte on older iOS versions. Signed-off-by: Foster Snowhill Tested-by: Georgi Valkov Signed-off-by: David S. Miller --- drivers/net/usb/ipheth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index cdc72559790a..46afb95ffabe 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -355,13 +355,14 @@ static int ipheth_carrier_set(struct ipheth_device *dev) 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); - if (retval < 0) { + if (retval <= 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } - if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) { + if ((retval == 1 && dev->ctrl_buf[0] == IPHETH_CARRIER_ON) || + (retval >= 2 && dev->ctrl_buf[1] == IPHETH_CARRIER_ON)) { netif_carrier_on(dev->net); if (dev->tx_urb->status != -EINPROGRESS) netif_wake_queue(dev->net); From 2124d84db293ba164059077944e6b429ba530495 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Aug 2024 08:33:28 -0700 Subject: [PATCH 0716/1523] module: make waiting for a concurrent module loader interruptible The recursive aes-arm-bs module load situation reported by Russell King is getting fixed in the crypto layer, but this in the meantime fixes the "recursive load hangs forever" by just making the waiting for the first module load be interruptible. This should now match the old behavior before commit 9b9879fc0327 ("modules: catch concurrent module loads, treat them as idempotent"), which used the different "wait for module to be ready" code in module_patient_check_exists(). End result: a recursive module load will still block, but now a signal will interrupt it and fail the second module load, at which point the first module will successfully complete loading. Fixes: 9b9879fc0327 ("modules: catch concurrent module loads, treat them as idempotent") Cc: Russell King Cc: Herbert Xu Signed-off-by: Linus Torvalds --- kernel/module/main.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 6f4ec857bdef..71396e297499 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3104,7 +3104,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) struct idempotent *existing; bool first; - u->ret = 0; + u->ret = -EINTR; u->cookie = cookie; init_completion(&u->complete); @@ -3140,7 +3140,7 @@ static int idempotent_complete(struct idempotent *u, int ret) hlist_for_each_entry_safe(pos, next, head, entry) { if (pos->cookie != cookie) continue; - hlist_del(&pos->entry); + hlist_del_init(&pos->entry); pos->ret = ret; complete(&pos->complete); } @@ -3148,6 +3148,28 @@ static int idempotent_complete(struct idempotent *u, int ret) return ret; } +/* + * Wait for the idempotent worker. + * + * If we get interrupted, we need to remove ourselves from the + * the idempotent list, and the completion may still come in. + * + * The 'idem_lock' protects against the race, and 'idem.ret' was + * initialized to -EINTR and is thus always the right return + * value even if the idempotent work then completes between + * the wait_for_completion and the cleanup. + */ +static int idempotent_wait_for_completion(struct idempotent *u) +{ + if (wait_for_completion_interruptible(&u->complete)) { + spin_lock(&idem_lock); + if (!hlist_unhashed(&u->entry)) + hlist_del(&u->entry); + spin_unlock(&idem_lock); + } + return u->ret; +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; @@ -3191,20 +3213,8 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int /* * Somebody else won the race and is loading the module. - * - * We have to wait for it forever, since our 'idem' is - * on the stack and the list entry stays there until - * completed (but we could fix it under the idem_lock) - * - * It's also unclear what a real timeout might be, - * but we could maybe at least make this killable - * and remove the idem entry in that case? */ - for (;;) { - if (wait_for_completion_timeout(&idem.complete, 10*HZ)) - return idem.ret; - pr_warn_once("module '%pD' taking a long time to load", f); - } + return idempotent_wait_for_completion(&idem); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) From d0949cd44a62c4c41b30ea7ae94d8c887f586882 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 8 Aug 2024 23:57:30 -0400 Subject: [PATCH 0717/1523] tracing: Return from tracing_buffers_read() if the file has been closed When running the following: # cd /sys/kernel/tracing/ # echo 1 > events/sched/sched_waking/enable # echo 1 > events/sched/sched_switch/enable # echo 0 > tracing_on # dd if=per_cpu/cpu0/trace_pipe_raw of=/tmp/raw0.dat The dd task would get stuck in an infinite loop in the kernel. What would happen is the following: When ring_buffer_read_page() returns -1 (no data) then a check is made to see if the buffer is empty (as happens when the page is not full), it will call wait_on_pipe() to wait until the ring buffer has data. When it is it will try again to read data (unless O_NONBLOCK is set). The issue happens when there's a reader and the file descriptor is closed. The wait_on_pipe() will return when that is the case. But this loop will continue to try again and wait_on_pipe() will again return immediately and the loop will continue and never stop. Simply check if the file was closed before looping and exit out if it is. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20240808235730.78bf63e5@rorschach.local.home Fixes: 2aa043a55b9a7 ("tracing/ring-buffer: Fix wait_on_pipe() race") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10cd38bce2f1..ebe7ce2f5f4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; From 90574d2a675947858b47008df8d07f75ea50d0d0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2024 15:34:30 +0300 Subject: [PATCH 0718/1523] rtla/osnoise: Prevent NULL dereference in error handling If the "tool->data" allocation fails then there is no need to call osnoise_free_top() and, in fact, doing so will lead to a NULL dereference. Cc: stable@vger.kernel.org Cc: John Kacur Cc: "Luis Claudio R. Goncalves" Cc: Clark Williams Fixes: 1eceb2fc2ca5 ("rtla/osnoise: Add osnoise top mode") Link: https://lore.kernel.org/f964ed1f-64d2-4fde-ad3e-708331f8f358@stanley.mountain Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/osnoise_top.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840..2f756628613d 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; From 077e47372309dcbe3a150754ea9c6f15cc838d6b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 Aug 2024 23:19:59 -0400 Subject: [PATCH 0719/1523] bcachefs: bch2_accounting_invalid() Implement bch2_accounting_invalid(); check for junk at the end, and replicas accounting entries in particular need to be checked or we'll pop asserts later. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 65 +++++++++++++++++++++++++++- fs/bcachefs/disk_accounting_format.h | 9 ++-- fs/bcachefs/replicas.c | 1 - fs/bcachefs/sb-errors_format.h | 6 ++- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23..046ac92b6639 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,74 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } +static inline bool is_zero(char *start, char *end) +{ + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags, struct printbuf *err) { - return 0; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, err, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, err, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + c, err, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, err, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b33..848f06cc809d 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -124,10 +124,6 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; @@ -140,6 +136,9 @@ struct bch_acct_btree { __u32 id; }; +struct bch_acct_rebalance_work { +}; + struct disk_accounting_pos { union { struct { @@ -149,10 +148,10 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; + struct bch_acct_rebalance_work rebalance_work; }; }; struct bpos _pad; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047a..1223b710755d 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a..d3a498617303 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 1a9e219db15e62760cfcc107ab6df3796d353605 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 Aug 2024 23:44:00 -0400 Subject: [PATCH 0720/1523] bcachefs: improve bch2_dev_usage_to_text() Add a line for capacity Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/buckets.c | 12 ++++++++---- fs/bcachefs/buckets.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 02de5ad2be2c..8563c2d26847 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1740,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d24663..9f7004e941ce 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d..edbdffd508fc 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { From d5240fa65db071909e9d1d5adcc5fd1abc8e96fe Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 9 Aug 2024 11:11:55 +0800 Subject: [PATCH 0721/1523] nvdimm/pmem: Set dax flag for all 'PFN_MAP' cases The dax is only supported on pfn type pmem devices since commit f467fee48da4 ("block: move the dax flag to queue_limits"). Trying to mount DAX filesystem fails with this error: mount: : wrong fs type, bad option, bad superblock on /dev/pmem7, missing codepage or helper program, or other error. dmesg(1) may have more information after failed mount system call. dmesg: EXT4-fs (pmem7): DAX unsupported by block device. Fix the problem by adding dax flag setting for the missed case. Fixes: f467fee48da4 ("block: move the dax flag to queue_limits") Signed-off-by: Zhihao Cheng Reviewed-by: Christoph Hellwig Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Tested-by: Ira Weiny Tested-by: Alison Schofield Link: https://patch.msgid.link/20240809031155.2837271-1-chengzhihao1@huawei.com Signed-off-by: Ira Weiny --- drivers/nvdimm/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1ae8b2351654..210fb77f51ba 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -498,7 +498,7 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; - if (is_nd_pfn(dev)) + if (is_nd_pfn(dev) || pmem_should_map_pages(dev)) lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), From 549dd786b61cd3db903f5d94d07fc5a89ccdbeb9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 08:28:31 -0700 Subject: [PATCH 0722/1523] drm/xe: Move VM dma-resv lock from xe_exec_queue_create to __xe_exec_queue_init The critical section which requires the VM dma-resv is the call xe_lrc_create in __xe_exec_queue_init. Move this lock to __xe_exec_queue_init holding it just around xe_lrc_create. Not only is good practice, this also fixes a locking double of the VM dma-resv in the error paths of __xe_exec_queue_init as xe_lrc_put tries to acquire this too resulting in a deadlock. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240724152831.1848325-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 956dc15b432a..971e1234b8ea 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -105,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; From 869b5016e94eced02f2cf99bf53c69b49adcee32 Mon Sep 17 00:00:00 2001 From: Zehui Xu Date: Wed, 31 Jul 2024 16:43:46 +0300 Subject: [PATCH 0723/1523] kbuild: rust: skip -fmin-function-alignment in bindgen flags GCC 14 recently added -fmin-function-alignment option and the root Makefile uses it to replace -falign-functions when available. However, this flag can cause issues when passed to the Rust Makefile and affect the bindgen process. Bindgen relies on libclang to parse C code, and currently does not support the -fmin-function-alignment flag, leading to compilation failures when GCC 14 is used. This patch addresses the issue by adding -fmin-function-alignment to the bindgen_skip_c_flags in rust/Makefile. This prevents the flag from causing compilation issues. [ Matthew and Gary confirm function alignment should not change the ABI in a way that bindgen would care about, thus we did not need the extra logic for bindgen from v2. - Miguel ] Link: https://lore.kernel.org/linux-kbuild/20240222133500.16991-1-petr.pavlu@suse.com/ Signed-off-by: Zehui Xu Reviewed-by: Alice Ryhl Reviewed-by: Neal Gompa Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20240731134346.10630-1-zehuixu@whu.edu.cn [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 6c0644b6090c..c41bae7ca8a3 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -227,7 +227,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ - -fstrict-flex-arrays=% \ + -fstrict-flex-arrays=% -fmin-function-alignment=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. From 02dfd63afe65f7bacad543ba2b10f77083ae7929 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 6 Aug 2024 17:06:19 +0200 Subject: [PATCH 0724/1523] rust: add intrinsics to fix `-Os` builds Alice reported [1] that an arm64 build failed with: ld.lld: error: undefined symbol: __extendsfdf2 >>> referenced by core.a6f5fc5794e7b7b3-cgu.0 >>> rust/core.o:(::midpoint) in archive vmlinux.a >>> referenced by core.a6f5fc5794e7b7b3-cgu.0 >>> rust/core.o:(::midpoint) in archive vmlinux.a ld.lld: error: undefined symbol: __truncdfsf2 >>> referenced by core.a6f5fc5794e7b7b3-cgu.0 >>> rust/core.o:(::midpoint) in archive vmlinux.a Rust 1.80.0 or later together with `CONFIG_CC_OPTIMIZE_FOR_SIZE=y` is what triggers it. In addition, x86_64 builds also fail the same way. Similarly, compiling with Rust 1.82.0 (currently in nightly) makes another one appear, possibly due to the LLVM 19 upgrade there: ld.lld: error: undefined symbol: __eqdf2 >>> referenced by core.20495ea57a9f069d-cgu.0 >>> rust/core.o:(::next_up) in archive vmlinux.a >>> referenced by core.20495ea57a9f069d-cgu.0 >>> rust/core.o:(::next_down) in archive vmlinux.a Gary adds [1]: > Usually the fix on rustc side is to mark those functions as `#[inline]` > > All of {midpoint,next_up,next_down} are indeed unstable functions not > marked as inline... Fix all those by adding those intrinsics to our usual workaround. [ Trevor quickly submitted a fix to upstream Rust [2] that has already been merged, to be released in Rust 1.82.0 (2024-10-17). - Miguel ] Cc: Gary Guo Reported-by: Alice Ryhl Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/x/topic/x/near/455637364 [1] Reviewed-by: Trevor Gross Tested-by: Alice Ryhl Tested-by: Boqun Feng Reviewed-by: Gary Guo Link: https://github.com/rust-lang/rust/pull/128749 [2] Link: https://lore.kernel.org/r/20240806150619.192882-1-ojeda@kernel.org [ Shortened Zulip link. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 4 ++-- rust/compiler_builtins.rs | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index c41bae7ca8a3..8de3ebba9551 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -354,8 +354,8 @@ rust-analyzer: $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ - __addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \ - __adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \ + __addsf3 __eqsf2 __extendsfdf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __truncdfsf2 __unordsf2 \ + __adddf3 __eqdf2 __ledf2 __ltdf2 __muldf3 __unorddf2 \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index bba2922c6ef7..f14b8d7caf89 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -40,16 +40,19 @@ macro_rules! define_panicking_intrinsics( define_panicking_intrinsics!("`f32` should not be used", { __addsf3, __eqsf2, + __extendsfdf2, __gesf2, __lesf2, __ltsf2, __mulsf3, __nesf2, + __truncdfsf2, __unordsf2, }); define_panicking_intrinsics!("`f64` should not be used", { __adddf3, + __eqdf2, __ledf2, __ltdf2, __muldf3, From d734422b7dd7d033fc02e421924e70dabf665b4c Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 7 Aug 2024 01:35:59 +0200 Subject: [PATCH 0725/1523] kbuild: rust-analyzer: mark `rust_is_available.sh` invocation as recursive When calling the `rust_is_available.sh` script, we need to make the jobserver available to it, as commit ecab4115c44c ("kbuild: mark `rustc` (and others) invocations as recursive") explains and did for the others. Otherwise, we get a warning from `rustc` when calling `make rust-analyzer` with parallel jobs, e.g. `-j8`. Using several jobs for that target does not really matter, but developers may call `make` with jobs enabled in all cases. Thus fix it. Fixes: 6dc9d9ca9a72 ("kbuild: rust-analyzer: better error handling") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240806233559.246705-1-ojeda@kernel.org [ Reworded to add a couple more details mentioned in the list. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8ad55d6e7b60..8ed76eb9d438 100644 --- a/Makefile +++ b/Makefile @@ -1963,7 +1963,7 @@ tags TAGS cscope gtags: FORCE # Protocol). PHONY += rust-analyzer rust-analyzer: - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh + +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh $(Q)$(MAKE) $(build)=rust $@ # Script to generate missing namespace dependencies From 0eba65f0310d3c7d5516c7fd4c172d0bfa8b285b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 6 Aug 2024 16:45:58 +0200 Subject: [PATCH 0726/1523] rust: x86: remove `-3dnow{,a}` from target features LLVM 19 is dropping support for 3DNow! in commit f0eb5587ceeb ("Remove support for 3DNow!, both intrinsics and builtins. (#96246)"): Remove support for 3DNow!, both intrinsics and builtins. (#96246) This set of instructions was only supported by AMD chips starting in the K6-2 (introduced 1998), and before the "Bulldozer" family (2011). They were never much used, as they were effectively superseded by the more-widely-implemented SSE (first implemented on the AMD side in Athlon XP in 2001). This is being done as a predecessor towards general removal of MMX register usage. Since there is almost no usage of the 3DNow! intrinsics, and no modern hardware even implements them, simple removal seems like the best option. Thus we should avoid passing these to the backend, since otherwise we get a diagnostic about it: '-3dnow' is not a recognized feature for this target (ignoring feature) '-3dnowa' is not a recognized feature for this target (ignoring feature) We could try to disable them only up to LLVM 19 (not the C side one, but the one used by `rustc`, which may be built with a range of LLVMs). However, to avoid more complexity, we can likely just remove them altogether. According to Nikita [2]: > I don't think it's needed because LLVM should not generate 3dnow > instructions unless specifically asked to, using intrinsics that > Rust does not provide in the first place. Thus do so, like Rust did for one of their builtin targets [3]. For those curious: Clang will warn only about trying to enable them (`-m3dnow{,a}`), but not about disabling them (`-mno-3dnow{,a}`), so there is no change needed there. Cc: Nikita Popov Cc: Nathan Chancellor Cc: x86@kernel.org Link: https://github.com/llvm/llvm-project/commit/f0eb5587ceeb641445b64cb264c822b4751de04a [1] Link: https://github.com/rust-lang/rust/pull/127864#issuecomment-2235898760 [2] Link: https://github.com/rust-lang/rust/pull/127864 [3] Closes: https://github.com/Rust-for-Linux/linux/issues/1094 Tested-by: Benno Lossin Tested-by: Alice Ryhl Link: https://lore.kernel.org/r/20240806144558.114461-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_target.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 87f34925eb7b..404edf7587e0 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -162,7 +162,7 @@ fn main() { "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } @@ -179,7 +179,7 @@ fn main() { "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } From 8c251c5ab1b7cd204231e4ee936bfe078a33f234 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 9 Aug 2024 08:27:49 +0000 Subject: [PATCH 0727/1523] cxl/pci: Get AER capability address from RCRB only for RCH dport cxl_setup_parent_dport() needs to get RCH dport AER capability address from RCRB to disable AER interrupt. The function does not check if dport is RCH dport, it will get a wrong pci_host_bridge structure by dport_dev in VH case because dport_dev points to a pci device(RP or switch DSP) rather than a pci host bridge device. Fixes: f05fd10d138d ("cxl/pci: Add RCH downstream port AER register discovery") Signed-off-by: Li Ming Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Tested-by: Ira Weiny Tested-by: Alison Schofield Link: https://patch.msgid.link/20240809082750.3015641-2-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a663e7566c48..51132a575b27 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); From 2c402bd2e85b44dc00ef85b5c0e217de684b5372 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 9 Aug 2024 08:27:50 +0000 Subject: [PATCH 0728/1523] cxl/test: Skip cxl_setup_parent_dport() for emulated dports The cxl_test unit test environment on qemu always hits below call trace with KASAN enabled: BUG: KASAN: slab-out-of-bounds in cxl_setup_parent_dport+0x480/0x530 [cxl_core] Read of size 1 at addr ff110000676014f8 by task (udev-worker)/676[ 24.424403] CPU: 2 PID: 676 Comm: (udev-worker) Tainted: G O N 6.10.0-qemucxl #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20240214-2.el9 02/14/2024 Call Trace: dump_stack_lvl+0xea/0x150 print_report+0xce/0x610 ? kasan_complete_mode_report_info+0x40/0x200 kasan_report+0xcc/0x110 __asan_report_load1_noabort+0x18/0x20 cxl_setup_parent_dport+0x480/0x530 [cxl_core] cxl_mem_probe+0x49b/0xaa0 [cxl_mem] cxl_test module models a CXL topology for testing, it creates some emulated dports with platform devices in the CXL topology, so the dport_dev of an emulated dport points to a platform device rather than a pci device or a pci host bridge in the case. Currently, cxl_setup_parent_dport() is used to set up RAS and AER capability on the dport connected to the CXL memory device, but cxl_test does not support RAS or AER functionality yet, so the fix is implementing a __wrap_cxl_setup_parent_dport() to filter out all emulated dports, guarantees only real dports can be handled by cxl_setup_parent_dport(). Fixes: f05fd10d138d ("cxl/pci: Add RCH downstream port AER register discovery") Reported-by: Pengfei Xu Closes: https://lore.kernel.org/linux-cxl/ZrHTBp2O+HtUe6kt@xpf.sh.intel.com/T/#t Signed-off-by: Li Ming Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Tested-by: Ira Weiny Tested-by: Alison Schofield Link: https://patch.msgid.link/20240809082750.3015641-3-ming4.li@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mock.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f0..3d1ca9e38b1f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e..d619672faa49 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); From 8a2491db7bea6ad88ec568731eafd583501f1c96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 Aug 2024 00:25:25 -0400 Subject: [PATCH 0729/1523] bcachefs: bcachefs_metadata_version_disk_accounting_v3 bcachefs_metadata_version_disk_accounting_v2 erroneously had padding bytes in disk_accounting_key, which is a problem because we have to guarantee that all unused bytes in disk_accounting_key are zeroed. Fortunately 6.11 isn't out yet, so it's cheap to fix this by spinning a new version. Reported-by: Gabriel de Perthuis Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/disk_accounting_format.h | 8 ++++---- fs/bcachefs/sb-downgrade.c | 27 ++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index ad893684db52..b25f86356728 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,8 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index 848f06cc809d..a93cf26ff4a9 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -130,11 +130,11 @@ struct bch_acct_compression { struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; -}; +} __packed; struct bch_acct_rebalance_work { }; @@ -152,8 +152,8 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; - }; - }; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731..6c4469f53313 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,12 +61,37 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ - BCH_FSCK_ERR_accounting_mismatch) + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ 0) \ x(disk_accounting_v2, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ From a86ee96ce819800a399e0260c5ffad793c9c6ac3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:25 -0700 Subject: [PATCH 0730/1523] drm/xe: Add xe_sched_msg_lock/unlock helper Will help callers to own locking when adding messages to scheduler. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 12 ++++++------ drivers/gpu/drm/xe/xe_gpu_scheduler.h | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index e4ad1d6ce1d5..eea71c67cf2a 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) xe_sched_process_msg_queue(sched); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); } static struct xe_sched_msg * @@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) list_del(&msg->link); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); return msg; } @@ -93,9 +93,9 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); list_add_tail(&msg->link, &sched->msgs); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 10c6bb9c9386..a54038fb3094 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -25,6 +25,16 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) +{ + spin_lock(&sched->base.job_list_lock); +} + +static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) +{ + spin_unlock(&sched->base.job_list_lock); +} + static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) { drm_sched_stop(&sched->base, NULL); From fc33077765e9104f84b49d9b9e0702a41d5269d6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:26 -0700 Subject: [PATCH 0731/1523] drm/xe: Reinit msg link when processing a message Will help to avoid adding a static message twice. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index eea71c67cf2a..1c703e8423de 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -31,7 +31,7 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) - list_del(&msg->link); + list_del_init(&msg->link); xe_sched_msg_unlock(sched); return msg; From 17d6abcbf6249b4ef22f41b255de2be2691ec32e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:27 -0700 Subject: [PATCH 0732/1523] drm/xe: Add xe_sched_add_msg_locked helper Will help by allowing callers to own message locking. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 9 ++++++++- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 1c703e8423de..c518d1d16d82 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -94,8 +94,15 @@ void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { xe_sched_msg_lock(sched); - list_add_tail(&msg->link, &sched->msgs); + xe_sched_add_msg_locked(sched, msg); xe_sched_msg_unlock(sched); +} +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index a54038fb3094..cee9c6809fc0 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -24,6 +24,8 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) { From 885c31382509d13fd70f6a9c42637eb72056a6ce Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:28 -0700 Subject: [PATCH 0733/1523] drm/xe: Only enable scheduling upon resume if needed No need to enable scheduling in already enabled. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 460808507947..a9af33ff8aa7 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1374,9 +1374,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - enable_scheduling(q); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } From d79fdaef2b55deea0df3fc6af4d4ac60e81a527c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:29 -0700 Subject: [PATCH 0734/1523] drm/xe: Allow suspend / resume to be safely called multiple times Switching modes between LR and dma-fence can result in multiple calls to suspend / resume. Make these calls safe while still enforcing call order. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 40 ++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a9af33ff8aa7..da63be550d4d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1388,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1432,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); @@ -1444,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, @@ -1506,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 @@ -1584,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } @@ -1624,13 +1647,16 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); xe_assert(xe, !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) From 4bbe6002931954bbe82b25f25990b987b0392e18 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jul 2024 16:38:07 -0300 Subject: [PATCH 0735/1523] perf daemon: Fix the build on 32-bit architectures Noticed with: 1 6.22 debian:experimental-x-mipsel : FAIL gcc version 13.2.0 (Debian 13.2.0-25) builtin-daemon.c: In function 'cmd_session_list': builtin-daemon.c:691:35: error: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'time_t' {aka 'long long int'} [-Werror=format=] Use inttypes.h's PRIu64 to deal with that. Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/ZplvH21aQ8pzmza_@x1 Signed-off-by: Namhyung Kim --- tools/perf/builtin-daemon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfb..5c9335fff2d3 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -688,7 +689,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - daemon->start) / 60); @@ -700,7 +701,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - daemon->start) / 60); } } @@ -727,7 +728,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - session->start) / 60); @@ -745,7 +746,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - session->start) / 60); } } From 3eb3cd5992f7a0c37edc8d05b4c38c98758d8671 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Aug 2024 12:51:23 -0700 Subject: [PATCH 0736/1523] binfmt_flat: Fix corruption when not offsetting data start Commit 04d82a6d0881 ("binfmt_flat: allow not offsetting data start") introduced a RISC-V specific variant of the FLAT format which does not allocate any space for the (obsolete) array of shared library pointers. However, it did not disable the code which initializes the array, resulting in the corruption of sizeof(long) bytes before the DATA segment, generally the end of the TEXT segment. Introduce MAX_SHARED_LIBS_UPDATE which depends on the state of CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET to guard the initialization of the shared library pointer region so that it will only be initialized if space is reserved for it. Fixes: 04d82a6d0881 ("binfmt_flat: allow not offsetting data start") Co-developed-by: Stefan O'Rear Signed-off-by: Stefan O'Rear Reviewed-by: Damien Le Moal Acked-by: Greg Ungerer Link: https://lore.kernel.org/r/20240807195119.it.782-kees@kernel.org Signed-off-by: Kees Cook --- fs/binfmt_flat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c26545d71d39..cd6d5bbb4b9d 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -72,8 +72,10 @@ #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET #define DATA_START_OFFSET_WORDS (0) +#define MAX_SHARED_LIBS_UPDATE (0) #else #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS) +#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS) #endif struct lib_info { @@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm) return res; /* Update data segment pointers for all libraries */ - for (i = 0; i < MAX_SHARED_LIBS; i++) { + for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) { if (!libinfo.lib_list[i].loaded) continue; for (j = 0; j < MAX_SHARED_LIBS; j++) { From 08b5a4798713f4331317272752b27b1c4f6a246d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 8 Aug 2024 10:11:21 -0700 Subject: [PATCH 0737/1523] drm/xe: Allow to compile out debugfs Use a dummy xe_debugfs_register() if debugfs is not enabled and move all debugfs-related files under `ifeq ($(CONFIG_DEBUG_FS),y)` in the Makefile. This is similar to what was done for display in commit 439987f6f471 ("drm/xe: don't build debugfs files when CONFIG_DEBUG_FS=n"). This removes the following warning while loading xe with CONFIG_DEUBG_FS=n: xe 0000:03:00.0: [drm] Create GT directory failed Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240808171121.2484237-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 16 +++++++++------- drivers/gpu/drm/xe/xe_debugfs.h | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1ff9602a52f6..b846b36a7bfd 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -28,7 +28,6 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ - xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -46,7 +45,6 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ - xe_gt_debugfs.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ @@ -59,7 +57,6 @@ xe-y += xe_bb.o \ xe_guc_ads.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ - xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ @@ -71,7 +68,6 @@ xe-y += xe_bb.o \ xe_hw_engine_class_sysfs.o \ xe_hw_fence.o \ xe_huc.o \ - xe_huc_debugfs.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -107,7 +103,6 @@ xe-y += xe_bb.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ - xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ xe_vram.o \ @@ -124,7 +119,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ - xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o @@ -133,7 +127,6 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ - xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ @@ -281,6 +274,15 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) endif ifeq ($(CONFIG_DEBUG_FS),y) + xe-y += xe_debugfs.o \ + xe_gt_debugfs.o \ + xe_gt_sriov_vf_debugfs.o \ + xe_guc_debugfs.o \ + xe_huc_debugfs.o \ + xe_uc_debugfs.o + + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o + xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_debugfs.o \ i915-display/intel_display_debugfs_params.o \ diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h index 715b8e2e0bd9..17f4c2f1b5e4 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.h +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -8,6 +8,10 @@ struct xe_device; +#ifdef CONFIG_DEBUG_FS void xe_debugfs_register(struct xe_device *xe); +#else +static inline void xe_debugfs_register(struct xe_device *xe) { } +#endif #endif From 3a3be7ff9224f424e485287b54be00d2c6bd9c40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Aug 2024 13:24:55 +0000 Subject: [PATCH 0738/1523] gtp: pull network headers in gtp_dev_xmit() syzbot/KMSAN reported use of uninit-value in get_dev_xmit() [1] We must make sure the IPv4 or Ipv6 header is pulled in skb->head before accessing fields in them. Use pskb_inet_may_pull() to fix this issue. [1] BUG: KMSAN: uninit-value in ipv6_pdp_find drivers/net/gtp.c:220 [inline] BUG: KMSAN: uninit-value in gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline] BUG: KMSAN: uninit-value in gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281 ipv6_pdp_find drivers/net/gtp.c:220 [inline] gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline] gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281 __netdev_start_xmit include/linux/netdevice.h:4913 [inline] netdev_start_xmit include/linux/netdevice.h:4922 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3596 __dev_queue_xmit+0x358c/0x5610 net/core/dev.c:4423 dev_queue_xmit include/linux/netdevice.h:3105 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3145 [inline] packet_sendmsg+0x90e3/0xa3a0 net/packet/af_packet.c:3177 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2204 __do_sys_sendto net/socket.c:2216 [inline] __se_sys_sendto net/socket.c:2212 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212 x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:3994 [inline] slab_alloc_node mm/slub.c:4037 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4080 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:583 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:674 alloc_skb include/linux/skbuff.h:1320 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6526 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2815 packet_alloc_skb net/packet/af_packet.c:2994 [inline] packet_snd net/packet/af_packet.c:3088 [inline] packet_sendmsg+0x749c/0xa3a0 net/packet/af_packet.c:3177 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2204 __do_sys_sendto net/socket.c:2216 [inline] __se_sys_sendto net/socket.c:2212 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212 x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f CPU: 0 UID: 0 PID: 7115 Comm: syz.1.515 Not tainted 6.11.0-rc1-syzkaller-00043-g94ede2a3e913 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024 Fixes: 999cb275c807 ("gtp: add IPv6 support") Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Eric Dumazet Cc: Harald Welte Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20240808132455.3413916-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 427b91aca50d..0696faf60013 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1269,6 +1269,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ From 2b2bc3bab158b7e036508742b16cd8a3c2f59a12 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 8 Aug 2024 11:56:21 +0200 Subject: [PATCH 0739/1523] net: Make USO depend on CSUM offload UDP segmentation offload inherently depends on checksum offload. It should not be possible to disable checksum offload while leaving USO enabled. Enforce this dependency in code. There is a single tx-udp-segmentation feature flag to indicate support for both IPv4/6, hence the devices wishing to support USO must offer checksum offload for both IP versions. Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload") Suggested-by: Willem de Bruijn Signed-off-by: Jakub Sitnicki Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-1-f5c5b4149ab9@cloudflare.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 751d9b70e6ad..f66e61407883 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9912,6 +9912,15 @@ static void netdev_sync_lower_features(struct net_device *upper, } } +static bool netdev_has_ip_or_hw_csum(netdev_features_t features) +{ + netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + bool ip_csum = (features & ip_csum_mask) == ip_csum_mask; + bool hw_csum = features & NETIF_F_HW_CSUM; + + return ip_csum || hw_csum; +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -9993,15 +10002,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_LRO; } - if (features & NETIF_F_HW_TLS_TX) { - bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - bool hw_csum = features & NETIF_F_HW_CSUM; - - if (!ip_csum && !hw_csum) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; - } + if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; } if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { @@ -10009,6 +10012,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_HW_TLS_RX; } + if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n"); + features &= ~NETIF_F_GSO_UDP_L4; + } + return features; } From 30b03f2a0592eee1267298298eac9dd655f55ab2 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 8 Aug 2024 11:56:22 +0200 Subject: [PATCH 0740/1523] udp: Fall back to software USO if IPv6 extension headers are present In commit 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload") we have intentionally allowed UDP GSO packets marked CHECKSUM_NONE to pass to the GSO stack, so that they can be segmented and checksummed by a software fallback when the egress device lacks these features. What was not taken into consideration is that a CHECKSUM_NONE skb can be handed over to the GSO stack also when the egress device advertises the tx-udp-segmentation / NETIF_F_GSO_UDP_L4 feature. This will happen when there are IPv6 extension headers present, which we check for in __ip6_append_data(). Syzbot has discovered this scenario, producing a warning as below: ip6tnl0: caps=(0x00000006401d7869, 0x00000006401d7869) WARNING: CPU: 0 PID: 5112 at net/core/dev.c:3293 skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291 Modules linked in: CPU: 0 PID: 5112 Comm: syz-executor391 Not tainted 6.10.0-rc7-syzkaller-01603-g80ab5445da62 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 RIP: 0010:skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291 [...] Call Trace: __skb_gso_segment+0x3be/0x4c0 net/core/gso.c:127 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x585/0x1120 net/core/dev.c:3661 __dev_queue_xmit+0x17a4/0x3e90 net/core/dev.c:4415 neigh_output include/net/neighbour.h:542 [inline] ip6_finish_output2+0xffa/0x1680 net/ipv6/ip6_output.c:137 ip6_finish_output+0x41e/0x810 net/ipv6/ip6_output.c:222 ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1958 udp_v6_send_skb+0xbf5/0x1870 net/ipv6/udp.c:1292 udpv6_sendmsg+0x23b3/0x3270 net/ipv6/udp.c:1588 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xef/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2725 __do_sys_sendmmsg net/socket.c:2754 [inline] __se_sys_sendmmsg net/socket.c:2751 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2751 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f [...] We are hitting the bad offload warning because when an egress device is capable of handling segmentation offload requested by skb_shinfo(skb)->gso_type, the chain of gso_segment callbacks won't produce any segment skbs and return NULL. See the skb_gso_ok() branch in {__udp,tcp,sctp}_gso_segment helpers. To fix it, force a fallback to software USO when processing a packet with IPv6 extension headers, since we don't know if these can checksummed by all devices which offer USO. Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload") Reported-by: syzbot+e15b7e15b8a751a91d9a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000e1609a061d5330ce@google.com/ Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Sitnicki Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-2-f5c5b4149ab9@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index bc8a9da750fe..b254a5dadfcf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -282,6 +282,12 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, skb_transport_header(gso_skb))) return ERR_PTR(-EINVAL); + /* We don't know if egress device can segment and checksum the packet + * when IPv6 extension headers are present. Fall back to software GSO. + */ + if (gso_skb->ip_summed != CHECKSUM_PARTIAL) + features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), From 1d2c46c1bc5680335f20f64089c161fdfcd3e8ab Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 8 Aug 2024 11:56:23 +0200 Subject: [PATCH 0741/1523] selftests/net: Add coverage for UDP GSO with IPv6 extension headers After enabling UDP GSO for devices not offering checksum offload, we have hit a regression where a bad offload warning can be triggered when sending a datagram with IPv6 extension headers. Extend the UDP GSO IPv6 tests to cover this scenario. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Sitnicki Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-3-f5c5b4149ab9@cloudflare.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bf..3f2fca02fec5 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -255,6 +258,13 @@ struct testcase testcases_v6[] = { .gso_len = 1, .r_num_mss = 2, }, + { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, { /* send 2B + 2B + 1B segments */ .tlen = 5, @@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; From c31fe2b5095d8c84562ce90db07600f7e9f318df Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 8 Aug 2024 17:41:02 +0300 Subject: [PATCH 0742/1523] net/mlx5: SD, Do not query MPIR register if no sd_group Unconditionally calling the MPIR query on BF separate mode yields the FW syndrome below [1]. Do not call it unless admin clearly specified the SD group, i.e. expressing the intention of using the multi-PF netdev feature. This fix covers cases not covered in commit fca3b4791850 ("net/mlx5: Do not query MPIR on embedded CPU function"). [1] mlx5_cmd_out_err:808:(pid 8267): ACCESS_REG(0x805) op_mod(0x1) failed, status bad system state(0x4), syndrome (0x685f19), err(-5) Fixes: 678eb448055a ("net/mlx5: SD, Implement basic query and instantiation") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Link: https://patch.msgid.link/20240808144107.2095424-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/lib/sd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f6deb5a3f820..eeb0b7ea05f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) } static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, - u8 *host_buses, u8 *sd_group) + u8 *host_buses) { u32 out[MLX5_ST_SZ_DW(mpir_reg)]; int err; @@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, if (err) return err; - err = mlx5_query_nic_vport_sd_group(dev, sd_group); - if (err) - return err; - *sdm = MLX5_GET(mpir_reg, out, sdm); *host_buses = MLX5_GET(mpir_reg, out, host_buses); @@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev) if (mlx5_core_is_ecpf(dev)) return 0; + err = mlx5_query_nic_vport_sd_group(dev, &sd_group); + if (err) + return err; + + if (!sd_group) + return 0; + if (!MLX5_CAP_MCAM_REG(dev, mpir)) return 0; - err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group); + err = mlx5_query_sd(dev, &sdm, &host_buses); if (err) return err; if (!sdm) return 0; - if (!sd_group) - return 0; - group_id = mlx5_sd_group_id(dev, sd_group); if (!mlx5_sd_is_supported(dev, host_buses)) { From ab6013a59b4d0947fda409c29426dc904959e632 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 8 Aug 2024 17:41:03 +0300 Subject: [PATCH 0743/1523] net/mlx5e: SHAMPO, Increase timeout to improve latency During latency tests (netperf TCP_RR) a 30% degradation of HW GRO vs SW GRO was observed. This is due to SHAMPO triggering timeout filler CQEs instead of delivering the CQE for the packet. Having a short timeout for SHAMPO doesn't bring any benefits as it is the driver that does the merging, not the hardware. On the contrary, it can have a negative impact: additional filler CQEs are generated due to the timeout. As there is no way to disable this timeout, this change sets it to the maximum value. Instead of using the packet_merge.timeout parameter which is also used for LRO, set the value directly when filling in the rest of the SHAMPO parameters in mlx5e_build_rq_param(). Fixes: 99be56171fa9 ("net/mlx5e: SHAMPO, Re-enable HW-GRO") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240808144107.2095424-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/params.c | 16 +++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en/params.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 12 ------------ 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5fd82c67b6ab..bb5da42edc23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -130,7 +130,7 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_DEFAULT_LRO_TIMEOUT 32 -#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 +#define MLX5E_DEFAULT_SHAMPO_TIMEOUT 1024 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 6c9ccccca81e..64b62ed17b07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_headers_entry_size, mlx5e_shampo_get_log_hd_entry_size(mdev, params)); MLX5_SET(rqc, rqc, reservation_timeout, - params->packet_merge.timeout); + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); MLX5_SET(rqc, rqc, shampo_match_criteria_type, params->packet_merge.shampo.match_criteria_type); MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, @@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, return wqebbs; } +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 749b2ec0436e..3f8986f9d862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6f686fabed44..f04decca39f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5167,18 +5167,6 @@ const struct net_device_ops mlx5e_netdev_ops = { #endif }; -static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) -{ - int i; - - /* The supported periods are organized in ascending order */ - for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) - if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) - break; - - return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; From e6b5afd30b99b43682a7764e1a74a42fe4d5f4b3 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 8 Aug 2024 17:41:04 +0300 Subject: [PATCH 0744/1523] net/mlx5e: Take state lock during tx timeout reporter mlx5e_safe_reopen_channels() requires the state lock taken. The referenced changed in the Fixes tag removed the lock to fix another issue. This patch adds it back but at a later point (when calling mlx5e_safe_reopen_channels()) to avoid the deadlock referenced in the Fixes tag. Fixes: eab0da38912e ("net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work") Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/all/ZplpKq8FKi3vwfxv@gmail.com/T/ Reviewed-by: Breno Leitao Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240808144107.2095424-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 22918b2ef7f1..09433b91be17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } + mutex_lock(&priv->state_lock); err = mlx5e_safe_reopen_channels(priv); + mutex_unlock(&priv->state_lock); if (!err) { to_ctx->status = 1; /* all channels recovered */ return err; From cbc796be1779c4dbc9a482c7233995e2a8b6bfb3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 8 Aug 2024 17:41:05 +0300 Subject: [PATCH 0745/1523] net/mlx5e: Correctly report errors for ethtool rx flows Previously, an ethtool rx flow with no attrs would not be added to the NIC as it has no rules to configure the hw with, but it would be reported as successful to the caller (return code 0). This is confusing for the user as ethtool then reports "Added rule $num", but no rule was actually added. This change corrects that by instead reporting these wrong rules as -EINVAL. Fixes: b29c61dac3a2 ("net/mlx5e: Ethtool steering flow validation refactoring") Signed-off-by: Cosmin Ratiu Reviewed-by: Saeed Mahameed Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240808144107.2095424-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3eccdadc0357..773624bb2c5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, if (num_tuples <= 0) { netdev_warn(priv->netdev, "%s: flow is not valid %d\n", __func__, num_tuples); - return num_tuples; + return num_tuples < 0 ? num_tuples : -EINVAL; } eth_ft = get_flow_table(priv, fs, num_tuples); From 0b4a4534d083e055831b3bc29c5eafc918ed4d86 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 8 Aug 2024 17:41:06 +0300 Subject: [PATCH 0746/1523] net/mlx5e: Fix queue stats access to non-existing channels splat The queue stats API queries the queues according to the real_num_[tr]x_queues, in case the device is down and channels were not yet created, don't try to query their statistics. To trigger the panic, run this command before the interface is brought up: ./cli.py --spec ../../../Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"ifindex": 4}' BUG: kernel NULL pointer dereference, address: 0000000000000c00 PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 3 UID: 0 PID: 977 Comm: python3 Not tainted 6.10.0+ #40 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_get_queue_stats_rx+0x3c/0xb0 [mlx5_core] Code: fc 55 48 63 ee 53 48 89 d3 e8 40 3d 70 e1 85 c0 74 58 4c 89 ef e8 d4 07 04 00 84 c0 75 41 49 8b 84 24 f8 39 00 00 48 8b 04 e8 <48> 8b 90 00 0c 00 00 48 03 90 40 0a 00 00 48 89 53 08 48 8b 90 08 RSP: 0018:ffff888116be37d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888116be3868 RCX: 0000000000000004 RDX: ffff88810ada4000 RSI: 0000000000000000 RDI: ffff888109df09c0 RBP: 0000000000000000 R08: 0000000000000004 R09: 0000000000000004 R10: ffff88813461901c R11: ffffffffffffffff R12: ffff888109df0000 R13: ffff888109df09c0 R14: ffff888116be38d0 R15: 0000000000000000 FS: 00007f4375d5c740(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000c00 CR3: 0000000106ada006 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __die+0x1f/0x60 ? page_fault_oops+0x14e/0x3d0 ? exc_page_fault+0x73/0x130 ? asm_exc_page_fault+0x22/0x30 ? mlx5e_get_queue_stats_rx+0x3c/0xb0 [mlx5_core] netdev_nl_stats_by_netdev+0x2a6/0x4c0 ? __rmqueue_pcplist+0x351/0x6f0 netdev_nl_qstats_get_dumpit+0xc4/0x1b0 genl_dumpit+0x2d/0x80 netlink_dump+0x199/0x410 __netlink_dump_start+0x1aa/0x2c0 genl_family_rcv_msg_dumpit+0x94/0xf0 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 genl_rcv_msg+0x116/0x2b0 ? __pfx_netdev_nl_qstats_get_dumpit+0x10/0x10 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x21a/0x340 netlink_sendmsg+0x1f4/0x440 __sys_sendto+0x1b6/0x1c0 ? do_sock_setsockopt+0xc3/0x180 ? __sys_setsockopt+0x60/0xb0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x50/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f43757132b0 Code: c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 1d 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 68 c3 0f 1f 80 00 00 00 00 41 54 48 83 ec 20 RSP: 002b:00007ffd258da048 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007ffd258da0f8 RCX: 00007f43757132b0 RDX: 000000000000001c RSI: 00007f437464b850 RDI: 0000000000000003 RBP: 00007f4375085de0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f43751a6147 Modules linked in: netconsole xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core zram zsmalloc mlx5_core fuse [last unloaded: netconsole] CR2: 0000000000000c00 ---[ end trace 0000000000000000 ]--- RIP: 0010:mlx5e_get_queue_stats_rx+0x3c/0xb0 [mlx5_core] Code: fc 55 48 63 ee 53 48 89 d3 e8 40 3d 70 e1 85 c0 74 58 4c 89 ef e8 d4 07 04 00 84 c0 75 41 49 8b 84 24 f8 39 00 00 48 8b 04 e8 <48> 8b 90 00 0c 00 00 48 03 90 40 0a 00 00 48 89 53 08 48 8b 90 08 RSP: 0018:ffff888116be37d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888116be3868 RCX: 0000000000000004 RDX: ffff88810ada4000 RSI: 0000000000000000 RDI: ffff888109df09c0 RBP: 0000000000000000 R08: 0000000000000004 R09: 0000000000000004 R10: ffff88813461901c R11: ffffffffffffffff R12: ffff888109df0000 R13: ffff888109df09c0 R14: ffff888116be38d0 R15: 0000000000000000 FS: 00007f4375d5c740(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000c00 CR3: 0000000106ada006 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 7b66ae536a78 ("net/mlx5e: Add per queue netdev-genl stats") Signed-off-by: Gal Pressman Signed-off-by: Tariq Toukan Reviewed-by: Joe Damato Link: https://patch.msgid.link/20240808144107.2095424-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f04decca39f2..5df904639b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5296,7 +5296,7 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *rq_stats; ASSERT_RTNL(); - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; channel_stats = priv->channel_stats[i]; @@ -5316,6 +5316,9 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_sq_stats *sq_stats; ASSERT_RTNL(); + if (!priv->stats_nch) + return; + /* no special case needed for ptp htb etc since txq2sq_stats is kept up * to date for active sq_stats, otherwise get_base_stats takes care of * inactive sqs. From d73f0f49daa84176c3beee1606e73c7ffb6af8b2 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 9 Aug 2024 12:32:24 +0530 Subject: [PATCH 0747/1523] irqchip/xilinx: Fix shift out of bounds The device tree property 'xlnx,kind-of-intr' is sanity checked that the bitmask contains only set bits which are in the range of the number of interrupts supported by the controller. The check is done by shifting the mask right by the number of supported interrupts and checking the result for zero. The data type of the mask is u32 and the number of supported interrupts is up to 32. In case of 32 interrupts the shift is out of bounds, resulting in a mismatch warning. The out of bounds condition is also reported by UBSAN: UBSAN: shift-out-of-bounds in irq-xilinx-intc.c:332:22 shift exponent 32 is too large for 32-bit type 'unsigned int' Fix it by promoting the mask to u64 for the test. Fixes: d50466c90724 ("microblaze: intc: Refactor DT sanity check") Signed-off-by: Radhey Shyam Pandey Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/1723186944-3571957-1-git-send-email-radhey.shyam.pandey@amd.com --- drivers/irqchip/irq-xilinx-intc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 238d3d344949..7e08714d507f 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -189,7 +189,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, irqc->intr_mask = 0; } - if (irqc->intr_mask >> irqc->nr_irq) + if ((u64)irqc->intr_mask >> irqc->nr_irq) pr_warn("irq-xilinx: mismatch in kind-of-intr param\n"); pr_info("irq-xilinx: %pOF: num_irq=%d, edge=0x%x\n", From 03f9885c60adf73488fe32aab628ee3d4a39598e Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 9 Aug 2024 15:10:47 +0800 Subject: [PATCH 0748/1523] irqchip/riscv-aplic: Retrigger MSI interrupt on source configuration The section 4.5.2 of the RISC-V AIA specification says that "any write to a sourcecfg register of an APLIC might (or might not) cause the corresponding interrupt-pending bit to be set to one if the rectified input value is high (= 1) under the new source mode." When the interrupt type is changed in the sourcecfg register, the APLIC device might not set the corresponding pending bit, so the interrupt might never become pending. To handle sourcecfg register changes for level-triggered interrupts in MSI mode, manually set the pending bit for retriggering interrupt so it gets retriggered if it was already asserted. Fixes: ca8df97fe679 ("irqchip/riscv-aplic: Add support for MSI-mode") Signed-off-by: Yong-Xuan Wang Signed-off-by: Thomas Gleixner Reviewed-by: Vincent Chen Reviewed-by: Anup Patel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240809071049.2454-1-yongxuan.wang@sifive.com --- drivers/irqchip/irq-riscv-aplic-msi.c | 32 +++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-riscv-aplic-msi.c b/drivers/irqchip/irq-riscv-aplic-msi.c index 028444af48bd..d7773f76e5d0 100644 --- a/drivers/irqchip/irq-riscv-aplic-msi.c +++ b/drivers/irqchip/irq-riscv-aplic-msi.c @@ -32,15 +32,10 @@ static void aplic_msi_irq_unmask(struct irq_data *d) aplic_irq_unmask(d); } -static void aplic_msi_irq_eoi(struct irq_data *d) +static void aplic_msi_irq_retrigger_level(struct irq_data *d) { struct aplic_priv *priv = irq_data_get_irq_chip_data(d); - /* - * EOI handling is required only for level-triggered interrupts - * when APLIC is in MSI mode. - */ - switch (irqd_get_trigger_type(d)) { case IRQ_TYPE_LEVEL_LOW: case IRQ_TYPE_LEVEL_HIGH: @@ -59,6 +54,29 @@ static void aplic_msi_irq_eoi(struct irq_data *d) } } +static void aplic_msi_irq_eoi(struct irq_data *d) +{ + /* + * EOI handling is required only for level-triggered interrupts + * when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); +} + +static int aplic_msi_irq_set_type(struct irq_data *d, unsigned int type) +{ + int rc = aplic_irq_set_type(d, type); + + if (rc) + return rc; + /* + * Updating sourcecfg register for level-triggered interrupts + * requires interrupt retriggering when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); + return 0; +} + static void aplic_msi_write_msg(struct irq_data *d, struct msi_msg *msg) { unsigned int group_index, hart_index, guest_index, val; @@ -130,7 +148,7 @@ static const struct msi_domain_template aplic_msi_template = { .name = "APLIC-MSI", .irq_mask = aplic_msi_irq_mask, .irq_unmask = aplic_msi_irq_unmask, - .irq_set_type = aplic_irq_set_type, + .irq_set_type = aplic_msi_irq_set_type, .irq_eoi = aplic_msi_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, From ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 10 Aug 2024 10:48:32 +0200 Subject: [PATCH 0749/1523] ALSA: timer: Relax start tick time check for slave timer elements The recent addition of a sanity check for a too low start tick time seems breaking some applications that uses aloop with a certain slave timer setup. They may have the initial resolution 0, hence it's treated as if it were a too low value. Relax and skip the check for the slave timer instance for addressing the regression. Fixes: 4a63bd179fa8 ("ALSA: timer: Set lower bound of start tick time") Cc: Link: https://github.com/raspberrypi/linux/issues/6294 Link: https://patch.msgid.link/20240810084833.10939-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index d104adc75a8b..71a07c1662f5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ - if (start) { + if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } From 2ad4e1ada8eebafa2d75a4b75eeeca882de6ada1 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 3 Aug 2024 21:52:55 +0200 Subject: [PATCH 0750/1523] wifi: brcmfmac: cfg80211: Handle SSID based pmksa deletion wpa_supplicant 2.11 sends since 1efdba5fdc2c ("Handle PMKSA flush in the driver for SAE/OWE offload cases") SSID based PMKSA del commands. brcmfmac is not prepared and tries to dereference the NULL bssid and pmkid pointers in cfg80211_pmksa. PMKID_V3 operations support SSID based updates so copy the SSID. Fixes: a96202acaea4 ("wifi: brcmfmac: cfg80211: Add support for PMKID_V3 operations") Cc: stable@vger.kernel.org # 6.4.x Signed-off-by: Janne Grunau Reviewed-by: Neal Gompa Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240803-brcmfmac_pmksa_del_ssid-v1-1-4e85f19135e1@jannau.net --- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1585a5653ee4..d4cc5fa92341 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, /* Single PMK operation */ pmk_op->count = cpu_to_le16(1); length += sizeof(struct brcmf_pmksa_v3); - memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); - memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); - pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + if (pmksa->bssid) + memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); + if (pmksa->pmkid) { + memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); + pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + } + if (pmksa->ssid && pmksa->ssid_len) { + memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len); + pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len; + } pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0); } From aad41832326723627ad8ac9ee8a543b6dca4454d Mon Sep 17 00:00:00 2001 From: Asmaa Mnebhi Date: Tue, 11 Jun 2024 13:15:09 -0400 Subject: [PATCH 0751/1523] gpio: mlxbf3: Support shutdown() function During Linux graceful reboot, the GPIO interrupts are not disabled. Since the drivers are not removed during graceful reboot, the logic to call mlxbf3_gpio_irq_disable() is not triggered. Interrupts that remain enabled can cause issues on subsequent boots. For example, the mlxbf-gige driver contains PHY logic to bring up the link. If the gpio-mlxbf3 driver loads first, the mlxbf-gige driver will use a GPIO interrupt to bring up the link. Otherwise, it will use polling. The next time Linux boots and loads the drivers in this order, we encounter the issue: - mlxbf-gige loads first and uses polling while the GPIO10 interrupt is still enabled from the previous boot. So if the interrupt triggers, there is nothing to clear it. - gpio-mlxbf3 loads. - i2c-mlxbf loads. The interrupt doesn't trigger for I2C because it is shared with the GPIO interrupt line which was not cleared. The solution is to add a shutdown function to the GPIO driver to clear and disable all interrupts. Also clear the interrupt after disabling it in mlxbf3_gpio_irq_disable(). Fixes: 38a700efc510 ("gpio: mlxbf3: Add gpio driver support") Signed-off-by: Asmaa Mnebhi Reviewed-by: David Thompson Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240611171509.22151-1-asmaa@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index d5906d419b0a..10ea71273c89 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver); From 9a039eeb71a42c8b13408a1976e300f3898e1be0 Mon Sep 17 00:00:00 2001 From: Moon Yeounsu Date: Wed, 7 Aug 2024 19:07:21 +0900 Subject: [PATCH 0752/1523] net: ethernet: use ip_hdrlen() instead of bit shift `ip_hdr(skb)->ihl << 2` is the same as `ip_hdrlen(skb)` Therefore, we should use a well-defined function not a bit shift to find the header length. It also compresses two lines to a single line. Signed-off-by: Moon Yeounsu Reviewed-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b06e24562973..d8be0e4dcb07 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -946,15 +946,13 @@ jme_udpsum(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IP)) return csum; skb_set_network_header(skb, ETH_HLEN); - if ((ip_hdr(skb)->protocol != IPPROTO_UDP) || - (skb->len < (ETH_HLEN + - (ip_hdr(skb)->ihl << 2) + - sizeof(struct udphdr)))) { + + if (ip_hdr(skb)->protocol != IPPROTO_UDP || + skb->len < (ETH_HLEN + ip_hdrlen(skb) + sizeof(struct udphdr))) { skb_reset_network_header(skb); return csum; } - skb_set_transport_header(skb, - ETH_HLEN + (ip_hdr(skb)->ihl << 2)); + skb_set_transport_header(skb, ETH_HLEN + ip_hdrlen(skb)); csum = udp_hdr(skb)->check; skb_reset_transport_header(skb); skb_reset_network_header(skb); From ef9718b3d54e822de294351251f3a574f8a082ce Mon Sep 17 00:00:00 2001 From: Parsa Poorshikhian Date: Sat, 10 Aug 2024 18:39:06 +0330 Subject: [PATCH 0753/1523] ALSA: hda/realtek: Fix noise from speakers on Lenovo IdeaPad 3 15IAU7 Fix noise from speakers connected to AUX port when no sound is playing. The problem occurs because the `alc_shutup_pins` function includes a 0x10ec0257 vendor ID, which causes noise on Lenovo IdeaPad 3 15IAU7 with Realtek ALC257 codec when no sound is playing. Removing this vendor ID from the function fixes the bug. Fixes: 70794b9563fe ("ALSA: hda/realtek: Add more codec ID to no shutup pins list") Signed-off-by: Parsa Poorshikhian Link: https://patch.msgid.link/20240810150939.330693-1-parsa.poorsh@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 480e82df7a4c..6e19598e23b7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -583,7 +583,6 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: - case 0x10ec0257: case 0x19e58326: case 0x10ec0283: case 0x10ec0285: From 7c626ce4bae1ac14f60076d00eafe71af30450ba Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Aug 2024 14:27:14 -0700 Subject: [PATCH 0754/1523] Linux 6.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 44c02a6f60a1..0a364e34f50b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From 7d113cce5fe92c8bf0a2e2f19f71762a845e9d9e Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Wed, 7 Aug 2024 19:51:05 +0530 Subject: [PATCH 0755/1523] drm/i915/bmg: Read display register timeout Log the address of the register that caused the timeout interrupt by reading RMTIMEOUTREG_CAPTURE --v2: - Update RMTIMEOUTREG_CAPTURE naming (Suraj) --v3: - XeLpdp naming convention. - Use if condition instead of else if Signed-off-by: Mitul Golani Reviewed-by: Suraj Kandpal Signed-off-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20240807142106.1270213-1-mitulkumar.ajitkumar.golani@intel.com --- drivers/gpu/drm/i915/display/intel_display_irq.c | 9 ++++++++- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 5219ba295c74..9f452d92f6d4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -906,6 +906,13 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_pmdemand_irq_handler(dev_priv); found = true; } + + if (iir & XELPDP_RM_TIMEOUT) { + u32 val = intel_uncore_read(&dev_priv->uncore, + RM_TIMEOUT_REG_CAPTURE); + drm_warn(&dev_priv->drm, "Register Access Timeout = 0x%x\n", val); + found = true; + } } else if (iir & GEN8_DE_MISC_GSE) { intel_opregion_asle_intr(dev_priv); found = true; @@ -1710,7 +1717,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 14) { de_misc_masked |= XELPDP_PMDEMAND_RSPTOUT_ERR | - XELPDP_PMDEMAND_RSP; + XELPDP_PMDEMAND_RSP | XELPDP_RM_TIMEOUT; } else if (DISPLAY_VER(dev_priv) >= 11) { enum port port; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0e3d79227e3c..569b461022c5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2396,6 +2396,7 @@ /* Display Internal Timeout Register */ #define RM_TIMEOUT _MMIO(0x42060) +#define RM_TIMEOUT_REG_CAPTURE _MMIO(0x420E0) #define MMIO_TIMEOUT_US(us) ((us) << 0) /* interrupts */ @@ -2574,6 +2575,7 @@ #define GEN8_DE_MISC_IMR _MMIO(0x44464) #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) +#define XELPDP_RM_TIMEOUT REG_BIT(29) #define XELPDP_PMDEMAND_RSPTOUT_ERR REG_BIT(27) #define GEN8_DE_MISC_GSE REG_BIT(27) #define GEN8_DE_EDP_PSR REG_BIT(19) From 03c5c350e38d9346b69357d0e52c3c40495c14a0 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 2 Aug 2024 16:22:15 +0100 Subject: [PATCH 0756/1523] ALSA: hda/realtek: Add support for new HP G12 laptops Some of these laptop models have quirk IDs that are identical but have different amplifier parts fitted, this difference is described in the ACPI information. The solution introduced for this product family can derive the required component binding information from ACPI instead of hardcoding it, supports the new variants of the CS35L56 being used and has generalized naming that makes it applicable to other ALC+amp combinations. Signed-off-by: Simon Trimmer Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240802152215.20831-4-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 99 +++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 480e82df7a4c..24eb71efac6c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11,15 +11,18 @@ */ #include +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -6856,6 +6859,86 @@ static void comp_generic_fixup(struct hda_codec *cdc, int action, const char *bu } } +static void cs35lxx_autodet_fixup(struct hda_codec *cdc, + const struct hda_fixup *fix, + int action) +{ + struct device *dev = hda_codec_dev(cdc); + struct acpi_device *adev; + struct fwnode_handle *fwnode __free(fwnode_handle) = NULL; + const char *bus = NULL; + static const struct { + const char *hid; + const char *name; + } acpi_ids[] = {{ "CSC3554", "cs35l54-hda" }, + { "CSC3556", "cs35l56-hda" }, + { "CSC3557", "cs35l57-hda" }}; + char *match; + int i, count = 0, count_devindex = 0; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + for (i = 0; i < ARRAY_SIZE(acpi_ids); ++i) { + adev = acpi_dev_get_first_match_dev(acpi_ids[i].hid, NULL, -1); + if (adev) + break; + } + if (!adev) { + dev_err(dev, "Failed to find ACPI entry for a Cirrus Amp\n"); + return; + } + + count = i2c_acpi_client_count(adev); + if (count > 0) { + bus = "i2c"; + } else { + count = acpi_spi_count_resources(adev); + if (count > 0) + bus = "spi"; + } + + fwnode = fwnode_handle_get(acpi_fwnode_handle(adev)); + acpi_dev_put(adev); + + if (!bus) { + dev_err(dev, "Did not find any buses for %s\n", acpi_ids[i].hid); + return; + } + + if (!fwnode) { + dev_err(dev, "Could not get fwnode for %s\n", acpi_ids[i].hid); + return; + } + + /* + * When available the cirrus,dev-index property is an accurate + * count of the amps in a system and is used in preference to + * the count of bus devices that can contain additional address + * alias entries. + */ + count_devindex = fwnode_property_count_u32(fwnode, "cirrus,dev-index"); + if (count_devindex > 0) + count = count_devindex; + + match = devm_kasprintf(dev, GFP_KERNEL, "-%%s:00-%s.%%d", acpi_ids[i].name); + if (!match) + return; + dev_info(dev, "Found %d %s on %s (%s)\n", count, acpi_ids[i].hid, bus, match); + comp_generic_fixup(cdc, action, bus, acpi_ids[i].hid, match, count); + + break; + case HDA_FIXUP_ACT_FREE: + /* + * Pass the action on to comp_generic_fixup() so that + * hda_component_manager functions can be called in just once + * place. In this context the bus, hid, match_str or count + * values do not need to be calculated. + */ + comp_generic_fixup(cdc, action, NULL, NULL, NULL, 0); + break; + } +} + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "i2c", "CSC3551", "-%s:00-cs35l41-hda.%d", 2); @@ -7528,6 +7611,7 @@ enum { ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, ALC287_FIXUP_LENOVO_SSID_17AA3820, + ALCXXX_FIXUP_CS35LXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9857,6 +9941,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_ssid_17aa3820, }, + [ALCXXX_FIXUP_CS35LXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35lxx_autodet_fixup, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10271,6 +10359,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d08, "HP EliteBook 1045 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite x360 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8c, "HP EliteBook 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8d, "HP Elite x360 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8e, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8f, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 860 16 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALCXXX_FIXUP_CS35LXX), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 004eb8ba776ccd3e296ea6f78f7ae7985b12824e Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Sun, 11 Aug 2024 08:30:11 +0000 Subject: [PATCH 0757/1523] ALSA: usb-audio: Add delay quirk for VIVO USB-C-XE710 HEADSET Audio control requests that sets sampling frequency sometimes fail on this card. Adding delay between control messages eliminates that problem. Signed-off-by: Lianqin Hu Cc: Link: https://patch.msgid.link/TYUPR06MB6217FF67076AF3E49E12C877D2842@TYUPR06MB6217.apcprd06.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ea063a14cdd8..e7b68c67852e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2221,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ From b86aa4140f6a8f01f35bfb05af60e01a55b48803 Mon Sep 17 00:00:00 2001 From: Bouke Sybren Haarsma Date: Sun, 28 Jul 2024 14:47:30 +0200 Subject: [PATCH 0758/1523] drm: panel-orientation-quirks: Add quirk for Ayn Loki Zero Add quirk orientation for the Ayn Loki Zero. This also has been tested/used by the JELOS team. Signed-off-by: Bouke Sybren Haarsma Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20240728124731.168452-2-boukehaarsma23@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index c16c7678237e..a1dfeaae644d 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), From 2c71c8459c8ca66bd8f597effaac892ee8448a9f Mon Sep 17 00:00:00 2001 From: Bouke Sybren Haarsma Date: Sun, 28 Jul 2024 14:47:31 +0200 Subject: [PATCH 0759/1523] drm: panel-orientation-quirks: Add quirk for Ayn Loki Max Add quirk orientation for Ayn Loki Max model. This has been tested by JELOS team that uses their own patched kernel for a while now and confirmed by users in the ChimeraOS discord servers. Signed-off-by: Bouke Sybren Haarsma Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20240728124731.168452-3-boukehaarsma23@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index a1dfeaae644d..0830cae9a4d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* AYN Loki Zero */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), From f7303ab29d08a551975aecb4bc6851ac3445abf4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 9 Aug 2024 17:27:03 +0300 Subject: [PATCH 0760/1523] drm/i915/acpi: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_acpi.[ch] to struct intel_display. Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/465436a3442807b49609fc55c9f652a29f96fd02.1723213547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_acpi.c | 17 ++++++++--------- drivers/gpu/drm/i915/display/intel_acpi.h | 18 +++++++++--------- .../drm/i915/display/intel_display_driver.c | 8 +++++--- drivers/gpu/drm/i915/display/intel_opregion.c | 10 ++++++---- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index 0aa3999374e2..c3b29a331d72 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -183,9 +183,9 @@ void intel_unregister_dsm_handler(void) { } -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); acpi_handle dhandle; union acpi_object *obj; @@ -263,15 +263,14 @@ static u32 acpi_display_type(struct intel_connector *connector) return display_type; } -void intel_acpi_device_id_update(struct drm_i915_private *dev_priv) +void intel_acpi_device_id_update(struct intel_display *display) { - struct drm_device *drm_dev = &dev_priv->drm; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; u8 display_index[16] = {}; /* Populate the ACPI IDs for all connectors for a given drm_device */ - drm_connector_list_iter_begin(drm_dev, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { u32 device_id, type; @@ -288,10 +287,10 @@ void intel_acpi_device_id_update(struct drm_i915_private *dev_priv) } /* NOTE: The connector order must be final before this is called. */ -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) +void intel_acpi_assign_connector_fwnodes(struct intel_display *display) { + struct drm_device *drm_dev = display->drm; struct drm_connector_list_iter conn_iter; - struct drm_device *drm_dev = &i915->drm; struct fwnode_handle *fwnode = NULL; struct drm_connector *connector; struct acpi_device *adev; @@ -333,7 +332,7 @@ void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) fwnode_handle_put(fwnode); } -void intel_acpi_video_register(struct drm_i915_private *i915) +void intel_acpi_video_register(struct intel_display *display) { struct drm_connector_list_iter conn_iter; struct drm_connector *connector; @@ -347,7 +346,7 @@ void intel_acpi_video_register(struct drm_i915_private *i915) * a native backlight later and acpi_video_register_backlight() should * only be called after any native backlights have been registered. */ - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { struct intel_panel *panel = &to_intel_connector(connector)->panel; diff --git a/drivers/gpu/drm/i915/display/intel_acpi.h b/drivers/gpu/drm/i915/display/intel_acpi.h index 6a0007452f95..788a63071661 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.h +++ b/drivers/gpu/drm/i915/display/intel_acpi.h @@ -6,26 +6,26 @@ #ifndef __INTEL_ACPI_H__ #define __INTEL_ACPI_H__ -struct drm_i915_private; +struct intel_display; #ifdef CONFIG_ACPI void intel_register_dsm_handler(void); void intel_unregister_dsm_handler(void); -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915); -void intel_acpi_device_id_update(struct drm_i915_private *i915); -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915); -void intel_acpi_video_register(struct drm_i915_private *i915); +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display); +void intel_acpi_device_id_update(struct intel_display *display); +void intel_acpi_assign_connector_fwnodes(struct intel_display *display); +void intel_acpi_video_register(struct intel_display *display); #else static inline void intel_register_dsm_handler(void) { return; } static inline void intel_unregister_dsm_handler(void) { return; } static inline -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) { return; } +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display) { return; } static inline -void intel_acpi_device_id_update(struct drm_i915_private *i915) { return; } +void intel_acpi_device_id_update(struct intel_display *display) { return; } static inline -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) { return; } +void intel_acpi_assign_connector_fwnodes(struct intel_display *display) { return; } static inline -void intel_acpi_video_register(struct drm_i915_private *i915) { return; } +void intel_acpi_video_register(struct intel_display *display) { return; } #endif /* CONFIG_ACPI */ #endif /* __INTEL_ACPI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 13e206ec450f..392c10359a52 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -416,7 +416,8 @@ bool intel_display_driver_check_access(struct drm_i915_private *i915) /* part #2: call after irq install, but before gem init */ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) { - struct drm_device *dev = &i915->drm; + struct intel_display *display = &i915->display; + struct drm_device *dev = display->drm; enum pipe pipe; int ret; @@ -466,7 +467,7 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(i915, dev->mode_config.acquire_ctx); - intel_acpi_assign_connector_fwnodes(i915); + intel_acpi_assign_connector_fwnodes(display); drm_modeset_unlock_all(dev); intel_initial_plane_config(i915); @@ -526,6 +527,7 @@ int intel_display_driver_probe(struct drm_i915_private *i915) void intel_display_driver_register(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct drm_printer p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "i915 display info:"); @@ -534,7 +536,7 @@ void intel_display_driver_register(struct drm_i915_private *i915) /* Must be done after probing outputs */ intel_opregion_register(i915); - intel_acpi_video_register(i915); + intel_acpi_video_register(display); intel_audio_init(i915); diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 68bd5101ec89..4ff646a7f05d 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -722,7 +722,8 @@ static void set_did(struct intel_opregion *opregion, int i, u32 val) static void intel_didl_outputs(struct drm_i915_private *dev_priv) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_display *display = &dev_priv->display; + struct intel_opregion *opregion = display->opregion; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; int i = 0, max_outputs; @@ -737,7 +738,7 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) max_outputs = ARRAY_SIZE(opregion->acpi->didl) + ARRAY_SIZE(opregion->acpi->did2); - intel_acpi_device_id_update(dev_priv); + intel_acpi_device_id_update(display); drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { @@ -1189,7 +1190,8 @@ void intel_opregion_register(struct drm_i915_private *i915) static void intel_opregion_resume_display(struct drm_i915_private *i915) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_display *display = &i915->display; + struct intel_opregion *opregion = display->opregion; if (opregion->acpi) { intel_didl_outputs(i915); @@ -1210,7 +1212,7 @@ static void intel_opregion_resume_display(struct drm_i915_private *i915) } /* Some platforms abuse the _DSM to enable MUX */ - intel_dsm_get_bios_data_funcs_supported(i915); + intel_dsm_get_bios_data_funcs_supported(display); } void intel_opregion_resume(struct drm_i915_private *i915) From b7f317e62968979343a4677ffd5bd91dd3edc204 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 9 Aug 2024 17:27:04 +0300 Subject: [PATCH 0761/1523] drm/i915/opregion: unify intel_encoder/intel_connector naming Prefer the short encoder/connector names for struct intel_encoder/intel_connector variables and parameters. Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/60c67da6b7282ab521366524109ade0470408cf8.1723213547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_opregion.c | 23 +++++++++---------- drivers/gpu/drm/i915/display/intel_opregion.h | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 4ff646a7f05d..4c9b9e7ebc91 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -381,10 +381,10 @@ static int swsci(struct drm_i915_private *dev_priv, #define DISPLAY_TYPE_EXTERNAL_FLAT_PANEL 2 #define DISPLAY_TYPE_INTERNAL_FLAT_PANEL 3 -int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, +int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) { - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 parm = 0; u32 type = 0; u32 port; @@ -399,10 +399,10 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, if (ret) return ret; - if (intel_encoder->type == INTEL_OUTPUT_DSI) + if (encoder->type == INTEL_OUTPUT_DSI) port = 0; else - port = intel_encoder->port; + port = encoder->port; if (port == PORT_E) { port = 0; @@ -421,15 +421,15 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, if (port > 4) { drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", - intel_encoder->base.base.id, intel_encoder->base.name, - port_name(intel_encoder->port), port); + encoder->base.base.id, encoder->base.name, + port_name(encoder->port), port); return -EINVAL; } if (!enable) parm |= 4 << 8; - switch (intel_encoder->type) { + switch (encoder->type) { case INTEL_OUTPUT_ANALOG: type = DISPLAY_TYPE_CRT; break; @@ -446,7 +446,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, default: drm_WARN_ONCE(&dev_priv->drm, 1, "unsupported intel_encoder type %d\n", - intel_encoder->type); + encoder->type); return -EINVAL; } @@ -1093,7 +1093,7 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) /** * intel_opregion_get_edid - Fetch EDID from ACPI OpRegion mailbox #5 - * @intel_connector: eDP connector + * @connector: eDP connector * * This reads the ACPI Opregion mailbox #5 to extract the EDID that is passed * to it. @@ -1102,10 +1102,9 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) * The EDID in the OpRegion, or NULL if there is none or it's invalid. * */ -const struct drm_edid *intel_opregion_get_edid(struct intel_connector *intel_connector) +const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct drm_i915_private *i915 = to_i915(connector->dev); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_opregion *opregion = i915->display.opregion; const struct drm_edid *drm_edid; const void *edid; diff --git a/drivers/gpu/drm/i915/display/intel_opregion.h b/drivers/gpu/drm/i915/display/intel_opregion.h index 4b2b8e752632..4dcd13c672e4 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.h +++ b/drivers/gpu/drm/i915/display/intel_opregion.h @@ -46,7 +46,7 @@ void intel_opregion_suspend(struct drm_i915_private *dev_priv, bool intel_opregion_asle_present(struct drm_i915_private *i915); void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); -int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, +int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable); int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, pci_power_t state); @@ -98,7 +98,7 @@ static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) } static inline int -intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) +intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) { return 0; } From 769b081c18b916e362a2b623d7c28ce761c3165c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 9 Aug 2024 17:27:05 +0300 Subject: [PATCH 0762/1523] drm/i915/opregion: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_opregion.[ch] to struct intel_display. v2: - Fix declarations for !CONFIG_ACPI (Imre, kernel test robot) - Pass encoder/connector directly to intel_display() (Imre) Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/aef94503909bbbf95f0244dc382a4d4cd050b903.1723213547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 10 +- .../drm/i915/display/intel_display_debugfs.c | 5 +- .../drm/i915/display/intel_display_device.c | 10 +- .../drm/i915/display/intel_display_driver.c | 6 +- .../gpu/drm/i915/display/intel_display_irq.c | 24 +- drivers/gpu/drm/i915/display/intel_opregion.c | 296 +++++++++--------- drivers/gpu/drm/i915/display/intel_opregion.h | 58 ++-- drivers/gpu/drm/i915/i915_driver.c | 22 +- drivers/gpu/drm/xe/display/xe_display.c | 19 +- 9 files changed, 241 insertions(+), 209 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 83fa093ccf8b..0b13b37f24a4 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -622,7 +622,9 @@ static int opregion_get_panel_type(struct drm_i915_private *i915, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { - return intel_opregion_get_panel_type(i915); + struct intel_display *display = &i915->display; + + return intel_opregion_get_panel_type(display); } static int vbt_get_panel_type(struct drm_i915_private *i915, @@ -3158,13 +3160,14 @@ err_unmap_oprom: static const struct vbt_header *intel_bios_get_vbt(struct drm_i915_private *i915, size_t *sizep) { + struct intel_display *display = &i915->display; const struct vbt_header *vbt = NULL; intel_wakeref_t wakeref; vbt = firmware_get_vbt(i915, sizep); if (!vbt) - vbt = intel_opregion_get_vbt(i915, sizep); + vbt = intel_opregion_get_vbt(display, sizep); /* * If the OpRegion does not have VBT, look in SPI flash @@ -3378,6 +3381,7 @@ bool intel_bios_is_tv_present(struct drm_i915_private *i915) */ bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) { + struct intel_display *display = &i915->display; const struct intel_bios_encoder_data *devdata; if (list_empty(&i915->display.vbt.display_devices)) @@ -3410,7 +3414,7 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) * additional data. Trust that if the VBT was written into * the OpRegion then they have validated the LVDS's existence. */ - return intel_opregion_vbt_present(i915); + return intel_opregion_vbt_present(display); } return false; diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index a0a3b3e180f4..7d3ae826a353 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1049,6 +1049,7 @@ static const struct { void intel_display_debugfs_register(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct drm_minor *minor = i915->drm.primary; int i; @@ -1067,9 +1068,9 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) intel_bios_debugfs_register(i915); intel_cdclk_debugfs_register(i915); intel_dmc_debugfs_register(i915); - intel_fbc_debugfs_register(&i915->display); + intel_fbc_debugfs_register(display); intel_hpd_debugfs_register(i915); - intel_opregion_debugfs_register(i915); + intel_opregion_debugfs_register(display); intel_psr_debugfs_register(i915); intel_wm_debugfs_register(i915); intel_display_debugfs_params(i915); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index dd7dce4b0e7a..a31f89df2c0a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1529,9 +1529,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf */ bool intel_display_device_enabled(struct drm_i915_private *i915) { - /* Only valid when HAS_DISPLAY() is true */ - drm_WARN_ON(&i915->drm, !HAS_DISPLAY(i915)); + struct intel_display *display = &i915->display; - return !i915->display.params.disable_display && - !intel_opregion_headless_sku(i915); + /* Only valid when HAS_DISPLAY() is true */ + drm_WARN_ON(display->drm, !HAS_DISPLAY(display)); + + return !display->params.disable_display && + !intel_opregion_headless_sku(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 392c10359a52..a2666b69834e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -535,7 +535,7 @@ void intel_display_driver_register(struct drm_i915_private *i915) return; /* Must be done after probing outputs */ - intel_opregion_register(i915); + intel_opregion_register(display); intel_acpi_video_register(display); intel_audio_init(i915); @@ -626,6 +626,8 @@ void intel_display_driver_remove_nogem(struct drm_i915_private *i915) void intel_display_driver_unregister(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + if (!HAS_DISPLAY(i915)) return; @@ -645,7 +647,7 @@ void intel_display_driver_unregister(struct drm_i915_private *i915) drm_atomic_helper_shutdown(&i915->drm); acpi_video_unregister(); - intel_opregion_unregister(i915); + intel_opregion_unregister(display); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 9f452d92f6d4..853c712686d8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -270,10 +270,12 @@ void i915_disable_pipestat(struct drm_i915_private *dev_priv, static bool i915_has_asle(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + if (!IS_PINEVIEW(i915) && !IS_MOBILE(i915)) return false; - return intel_opregion_asle_present(i915); + return intel_opregion_asle_present(display); } /** @@ -497,6 +499,8 @@ void i8xx_pipestat_irq_handler(struct drm_i915_private *dev_priv, void i915_pipestat_irq_handler(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { + struct intel_display *display = &dev_priv->display; + bool blc_event = false; enum pipe pipe; @@ -515,12 +519,13 @@ void i915_pipestat_irq_handler(struct drm_i915_private *dev_priv, } if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); } void i965_pipestat_irq_handler(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { + struct intel_display *display = &dev_priv->display; bool blc_event = false; enum pipe pipe; @@ -539,7 +544,7 @@ void i965_pipestat_irq_handler(struct drm_i915_private *dev_priv, } if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) intel_gmbus_irq_handler(dev_priv); @@ -695,6 +700,7 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) { + struct intel_display *display = &dev_priv->display; enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; @@ -705,7 +711,7 @@ void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) intel_dp_aux_irq_handler(dev_priv); if (de_iir & DE_GSE) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); if (de_iir & DE_POISON) drm_err(&dev_priv->drm, "Poison interrupt\n"); @@ -743,6 +749,7 @@ void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) { + struct intel_display *display = &dev_priv->display; enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; @@ -770,7 +777,7 @@ void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) intel_dp_aux_irq_handler(dev_priv); if (de_iir & DE_GSE_IVB) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); for_each_pipe(dev_priv, pipe) { if (de_iir & DE_PIPE_VBLANK_IVB(pipe)) @@ -894,6 +901,7 @@ static void intel_pmdemand_irq_handler(struct drm_i915_private *dev_priv) static void gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) { + struct intel_display *display = &dev_priv->display; bool found = false; if (DISPLAY_VER(dev_priv) >= 14) { @@ -914,7 +922,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) found = true; } } else if (iir & GEN8_DE_MISC_GSE) { - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); found = true; } @@ -1218,8 +1226,10 @@ u32 gen11_gu_misc_irq_ack(struct drm_i915_private *i915, const u32 master_ctl) void gen11_gu_misc_irq_handler(struct drm_i915_private *i915, const u32 iir) { + struct intel_display *display = &i915->display; + if (iir & GEN11_GU_MISC_GSE) - intel_opregion_asle_intr(i915); + intel_opregion_asle_intr(display); } void gen11_display_irq_handler(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 4c9b9e7ebc91..7a9d6b2f4100 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -252,7 +252,7 @@ struct opregion_asle_ext { #define OPREGION_SIZE (8 * 1024) struct intel_opregion { - struct drm_i915_private *i915; + struct intel_display *display; struct opregion_header *header; struct opregion_acpi *acpi; @@ -268,9 +268,9 @@ struct intel_opregion { struct notifier_block acpi_notifier; }; -static int check_swsci_function(struct drm_i915_private *i915, u32 function) +static int check_swsci_function(struct intel_display *display, u32 function) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; struct opregion_swsci *swsci; u32 main_function, sub_function; @@ -300,20 +300,20 @@ static int check_swsci_function(struct drm_i915_private *i915, u32 function) return 0; } -static int swsci(struct drm_i915_private *dev_priv, +static int swsci(struct intel_display *display, u32 function, u32 parm, u32 *parm_out) { struct opregion_swsci *swsci; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); u32 scic, dslp; u16 swsci_val; int ret; - ret = check_swsci_function(dev_priv, function); + ret = check_swsci_function(display, function); if (ret) return ret; - swsci = dev_priv->display.opregion->swsci; + swsci = display->opregion->swsci; /* Driver sleep timeout in ms. */ dslp = swsci->dslp; @@ -331,7 +331,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* The spec tells us to do this, but we are the only user... */ scic = swsci->scic; if (scic & SWSCI_SCIC_INDICATOR) { - drm_dbg(&dev_priv->drm, "SWSCI request already in progress\n"); + drm_dbg(display->drm, "SWSCI request already in progress\n"); return -EBUSY; } @@ -355,7 +355,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* Poll for the result. */ #define C (((scic = swsci->scic) & SWSCI_SCIC_INDICATOR) == 0) if (wait_for(C, dslp)) { - drm_dbg(&dev_priv->drm, "SWSCI request timed out\n"); + drm_dbg(display->drm, "SWSCI request timed out\n"); return -ETIMEDOUT; } @@ -364,7 +364,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* Note: scic == 0 is an error! */ if (scic != SWSCI_SCIC_EXIT_STATUS_SUCCESS) { - drm_dbg(&dev_priv->drm, "SWSCI request error %u\n", scic); + drm_dbg(display->drm, "SWSCI request error %u\n", scic); return -EIO; } @@ -384,18 +384,18 @@ static int swsci(struct drm_i915_private *dev_priv, int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); u32 parm = 0; u32 type = 0; u32 port; int ret; /* don't care about old stuff for now */ - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(display)) return 0; /* Avoid port out of bounds checks if SWSCI isn't there. */ - ret = check_swsci_function(dev_priv, SWSCI_SBCB_DISPLAY_POWER_STATE); + ret = check_swsci_function(display, SWSCI_SBCB_DISPLAY_POWER_STATE); if (ret) return ret; @@ -419,7 +419,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *encoder, * number is out of bounds after mapping. */ if (port > 4) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", encoder->base.base.id, encoder->base.name, port_name(encoder->port), port); @@ -444,7 +444,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *encoder, type = DISPLAY_TYPE_INTERNAL_FLAT_PANEL; break; default: - drm_WARN_ONCE(&dev_priv->drm, 1, + drm_WARN_ONCE(display->drm, 1, "unsupported intel_encoder type %d\n", encoder->type); return -EINVAL; @@ -452,7 +452,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *encoder, parm |= type << (16 + port * 3); - return swsci(dev_priv, SWSCI_SBCB_DISPLAY_POWER_STATE, parm, NULL); + return swsci(display, SWSCI_SBCB_DISPLAY_POWER_STATE, parm, NULL); } static const struct { @@ -466,33 +466,33 @@ static const struct { { PCI_D3cold, 0x04 }, }; -int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, +int intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state) { int i; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(display)) return 0; for (i = 0; i < ARRAY_SIZE(power_state_map); i++) { if (state == power_state_map[i].pci_power_state) - return swsci(dev_priv, SWSCI_SBCB_ADAPTER_POWER_STATE, + return swsci(display, SWSCI_SBCB_ADAPTER_POWER_STATE, power_state_map[i].parm, NULL); } return -EINVAL; } -static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) +static u32 asle_set_backlight(struct intel_display *display, u32 bclp) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - struct opregion_asle *asle = dev_priv->display.opregion->asle; + struct opregion_asle *asle = display->opregion->asle; - drm_dbg(&dev_priv->drm, "bclp = 0x%08x\n", bclp); + drm_dbg(display->drm, "bclp = 0x%08x\n", bclp); if (acpi_video_get_backlight_type() == acpi_backlight_native) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "opregion backlight request ignored\n"); return 0; } @@ -504,104 +504,104 @@ static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) if (bclp > 255) return ASLC_BACKLIGHT_FAILED; - drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, NULL); + drm_modeset_lock(&display->drm->mode_config.connection_mutex, NULL); /* * Update backlight on all connectors that support backlight (usually * only one). */ - drm_dbg_kms(&dev_priv->drm, "updating opregion backlight %d/255\n", + drm_dbg_kms(display->drm, "updating opregion backlight %d/255\n", bclp); - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) intel_backlight_set_acpi(connector->base.state, bclp, 255); drm_connector_list_iter_end(&conn_iter); asle->cblv = DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID; - drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + drm_modeset_unlock(&display->drm->mode_config.connection_mutex); return 0; } -static u32 asle_set_als_illum(struct drm_i915_private *dev_priv, u32 alsi) +static u32 asle_set_als_illum(struct intel_display *display, u32 alsi) { /* alsi is the current ALS reading in lux. 0 indicates below sensor range, 0xffff indicates above sensor range. 1-0xfffe are valid */ - drm_dbg(&dev_priv->drm, "Illum is not supported\n"); + drm_dbg(display->drm, "Illum is not supported\n"); return ASLC_ALS_ILLUM_FAILED; } -static u32 asle_set_pwm_freq(struct drm_i915_private *dev_priv, u32 pfmb) +static u32 asle_set_pwm_freq(struct intel_display *display, u32 pfmb) { - drm_dbg(&dev_priv->drm, "PWM freq is not supported\n"); + drm_dbg(display->drm, "PWM freq is not supported\n"); return ASLC_PWM_FREQ_FAILED; } -static u32 asle_set_pfit(struct drm_i915_private *dev_priv, u32 pfit) +static u32 asle_set_pfit(struct intel_display *display, u32 pfit) { /* Panel fitting is currently controlled by the X code, so this is a noop until modesetting support works fully */ - drm_dbg(&dev_priv->drm, "Pfit is not supported\n"); + drm_dbg(display->drm, "Pfit is not supported\n"); return ASLC_PFIT_FAILED; } -static u32 asle_set_supported_rotation_angles(struct drm_i915_private *dev_priv, u32 srot) +static u32 asle_set_supported_rotation_angles(struct intel_display *display, u32 srot) { - drm_dbg(&dev_priv->drm, "SROT is not supported\n"); + drm_dbg(display->drm, "SROT is not supported\n"); return ASLC_ROTATION_ANGLES_FAILED; } -static u32 asle_set_button_array(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_button_array(struct intel_display *display, u32 iuer) { if (!iuer) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (nothing)\n"); if (iuer & ASLE_IUER_ROTATION_LOCK_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (rotation lock)\n"); if (iuer & ASLE_IUER_VOLUME_DOWN_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (volume down)\n"); if (iuer & ASLE_IUER_VOLUME_UP_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (volume up)\n"); if (iuer & ASLE_IUER_WINDOWS_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (windows)\n"); if (iuer & ASLE_IUER_POWER_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (power)\n"); return ASLC_BUTTON_ARRAY_FAILED; } -static u32 asle_set_convertible(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_convertible(struct intel_display *display, u32 iuer) { if (iuer & ASLE_IUER_CONVERTIBLE) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Convertible is not supported (clamshell)\n"); else - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Convertible is not supported (slate)\n"); return ASLC_CONVERTIBLE_FAILED; } -static u32 asle_set_docking(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_docking(struct intel_display *display, u32 iuer) { if (iuer & ASLE_IUER_DOCKING) - drm_dbg(&dev_priv->drm, "Docking is not supported (docked)\n"); + drm_dbg(display->drm, "Docking is not supported (docked)\n"); else - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Docking is not supported (undocked)\n"); return ASLC_DOCKING_FAILED; } -static u32 asle_isct_state(struct drm_i915_private *dev_priv) +static u32 asle_isct_state(struct intel_display *display) { - drm_dbg(&dev_priv->drm, "ISCT is not supported\n"); + drm_dbg(display->drm, "ISCT is not supported\n"); return ASLC_ISCT_STATE_FAILED; } @@ -609,7 +609,7 @@ static void asle_work(struct work_struct *work) { struct intel_opregion *opregion = container_of(work, struct intel_opregion, asle_work); - struct drm_i915_private *dev_priv = opregion->i915; + struct intel_display *display = opregion->display; struct opregion_asle *asle = opregion->asle; u32 aslc_stat = 0; u32 aslc_req; @@ -620,50 +620,51 @@ static void asle_work(struct work_struct *work) aslc_req = asle->aslc; if (!(aslc_req & ASLC_REQ_MSK)) { - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "No request on ASLC interrupt 0x%08x\n", aslc_req); return; } if (aslc_req & ASLC_SET_ALS_ILLUM) - aslc_stat |= asle_set_als_illum(dev_priv, asle->alsi); + aslc_stat |= asle_set_als_illum(display, asle->alsi); if (aslc_req & ASLC_SET_BACKLIGHT) - aslc_stat |= asle_set_backlight(dev_priv, asle->bclp); + aslc_stat |= asle_set_backlight(display, asle->bclp); if (aslc_req & ASLC_SET_PFIT) - aslc_stat |= asle_set_pfit(dev_priv, asle->pfit); + aslc_stat |= asle_set_pfit(display, asle->pfit); if (aslc_req & ASLC_SET_PWM_FREQ) - aslc_stat |= asle_set_pwm_freq(dev_priv, asle->pfmb); + aslc_stat |= asle_set_pwm_freq(display, asle->pfmb); if (aslc_req & ASLC_SUPPORTED_ROTATION_ANGLES) - aslc_stat |= asle_set_supported_rotation_angles(dev_priv, + aslc_stat |= asle_set_supported_rotation_angles(display, asle->srot); if (aslc_req & ASLC_BUTTON_ARRAY) - aslc_stat |= asle_set_button_array(dev_priv, asle->iuer); + aslc_stat |= asle_set_button_array(display, asle->iuer); if (aslc_req & ASLC_CONVERTIBLE_INDICATOR) - aslc_stat |= asle_set_convertible(dev_priv, asle->iuer); + aslc_stat |= asle_set_convertible(display, asle->iuer); if (aslc_req & ASLC_DOCKING_INDICATOR) - aslc_stat |= asle_set_docking(dev_priv, asle->iuer); + aslc_stat |= asle_set_docking(display, asle->iuer); if (aslc_req & ASLC_ISCT_STATE_CHANGE) - aslc_stat |= asle_isct_state(dev_priv); + aslc_stat |= asle_isct_state(display); asle->aslc = aslc_stat; } -bool intel_opregion_asle_present(struct drm_i915_private *i915) +bool intel_opregion_asle_present(struct intel_display *display) { - return i915->display.opregion && i915->display.opregion->asle; + return display->opregion && display->opregion->asle; } -void intel_opregion_asle_intr(struct drm_i915_private *i915) +void intel_opregion_asle_intr(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct drm_i915_private *i915 = to_i915(display->drm); + struct intel_opregion *opregion = display->opregion; if (opregion && opregion->asle) queue_work(i915->unordered_wq, &opregion->asle_work); @@ -720,9 +721,8 @@ static void set_did(struct intel_opregion *opregion, int i, u32 val) } } -static void intel_didl_outputs(struct drm_i915_private *dev_priv) +static void intel_didl_outputs(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct intel_opregion *opregion = display->opregion; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; @@ -740,7 +740,7 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) intel_acpi_device_id_update(display); - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (i < max_outputs) set_did(opregion, i, connector->acpi_device_id); @@ -748,10 +748,10 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) } drm_connector_list_iter_end(&conn_iter); - drm_dbg_kms(&dev_priv->drm, "%d outputs detected\n", i); + drm_dbg_kms(display->drm, "%d outputs detected\n", i); if (i > max_outputs) - drm_err(&dev_priv->drm, + drm_err(display->drm, "More than %d outputs in connector list\n", max_outputs); @@ -760,9 +760,9 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) set_did(opregion, i, 0); } -static void intel_setup_cadls(struct drm_i915_private *dev_priv) +static void intel_setup_cadls(struct intel_display *display) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_opregion *opregion = display->opregion; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; int i = 0; @@ -777,7 +777,7 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) * Note that internal panels should be at the front of the connector * list already, ensuring they're not left out. */ - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (i >= ARRAY_SIZE(opregion->acpi->cadl)) break; @@ -790,9 +790,9 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) opregion->acpi->cadl[i] = 0; } -static void swsci_setup(struct drm_i915_private *dev_priv) +static void swsci_setup(struct intel_display *display) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_opregion *opregion = display->opregion; bool requested_callbacks = false; u32 tmp; @@ -801,7 +801,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) opregion->swsci_sbcb_sub_functions = 1; /* We use GBDA to ask for supported GBDA calls. */ - if (swsci(dev_priv, SWSCI_GBDA_SUPPORTED_CALLS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_GBDA_SUPPORTED_CALLS, 0, &tmp) == 0) { /* make the bits match the sub-function codes */ tmp <<= 1; opregion->swsci_gbda_sub_functions |= tmp; @@ -812,7 +812,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) * must not call interfaces that are not specifically requested by the * bios. */ - if (swsci(dev_priv, SWSCI_GBDA_REQUESTED_CALLBACKS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_GBDA_REQUESTED_CALLBACKS, 0, &tmp) == 0) { /* here, the bits already match sub-function codes */ opregion->swsci_sbcb_sub_functions |= tmp; requested_callbacks = true; @@ -823,7 +823,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) * the callback is _requested_. But we still can't call interfaces that * are not requested. */ - if (swsci(dev_priv, SWSCI_SBCB_SUPPORTED_CALLBACKS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_SBCB_SUPPORTED_CALLBACKS, 0, &tmp) == 0) { /* make the bits match the sub-function codes */ u32 low = tmp & 0x7ff; u32 high = tmp & ~0xfff; /* bit 11 is reserved */ @@ -833,7 +833,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) if (requested_callbacks) { u32 req = opregion->swsci_sbcb_sub_functions; if ((req & tmp) != req) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "SWSCI BIOS requested (%08x) SBCB callbacks that are not supported (%08x)\n", req, tmp); /* XXX: for now, trust the requested callbacks */ @@ -843,7 +843,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) } } - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "SWSCI GBDA callbacks %08x, SBCB callbacks %08x\n", opregion->swsci_gbda_sub_functions, opregion->swsci_sbcb_sub_functions); @@ -868,10 +868,11 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = { { } }; -int intel_opregion_setup(struct drm_i915_private *dev_priv) +int intel_opregion_setup(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_opregion *opregion; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); u32 asls, mboxes; char buf[sizeof(OPREGION_SIGNATURE)]; int err = 0; @@ -886,10 +887,10 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct opregion_asle_ext) != 0x400); pci_read_config_dword(pdev, ASLS, &asls); - drm_dbg(&dev_priv->drm, "graphic opregion physical addr: 0x%x\n", + drm_dbg(display->drm, "graphic opregion physical addr: 0x%x\n", asls); if (asls == 0) { - drm_dbg(&dev_priv->drm, "ACPI OpRegion not supported!\n"); + drm_dbg(display->drm, "ACPI OpRegion not supported!\n"); return -ENOTSUPP; } @@ -897,8 +898,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (!opregion) return -ENOMEM; - opregion->i915 = dev_priv; - dev_priv->display.opregion = opregion; + opregion->display = display; + display->opregion = opregion; INIT_WORK(&opregion->asle_work, asle_work); @@ -911,20 +912,20 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) memcpy(buf, base, sizeof(buf)); if (memcmp(buf, OPREGION_SIGNATURE, 16)) { - drm_dbg(&dev_priv->drm, "opregion signature mismatch\n"); + drm_dbg(display->drm, "opregion signature mismatch\n"); err = -EINVAL; goto err_out; } opregion->header = base; - drm_dbg(&dev_priv->drm, "ACPI OpRegion version %u.%u.%u\n", + drm_dbg(display->drm, "ACPI OpRegion version %u.%u.%u\n", opregion->header->over.major, opregion->header->over.minor, opregion->header->over.revision); mboxes = opregion->header->mboxes; if (mboxes & MBOX_ACPI) { - drm_dbg(&dev_priv->drm, "Public ACPI methods supported\n"); + drm_dbg(display->drm, "Public ACPI methods supported\n"); opregion->acpi = base + OPREGION_ACPI_OFFSET; /* * Indicate we handle monitor hotplug events ourselves so we do @@ -939,30 +940,30 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) u8 major = opregion->header->over.major; if (major >= 3) { - drm_err(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v3.x, ignoring\n"); + drm_err(display->drm, "SWSCI Mailbox #2 present for opregion v3.x, ignoring\n"); } else { if (major >= 2) - drm_dbg(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v2.x\n"); - drm_dbg(&dev_priv->drm, "SWSCI supported\n"); + drm_dbg(display->drm, "SWSCI Mailbox #2 present for opregion v2.x\n"); + drm_dbg(display->drm, "SWSCI supported\n"); opregion->swsci = base + OPREGION_SWSCI_OFFSET; - swsci_setup(dev_priv); + swsci_setup(display); } } if (mboxes & MBOX_ASLE) { - drm_dbg(&dev_priv->drm, "ASLE supported\n"); + drm_dbg(display->drm, "ASLE supported\n"); opregion->asle = base + OPREGION_ASLE_OFFSET; opregion->asle->ardy = ASLE_ARDY_NOT_READY; } if (mboxes & MBOX_ASLE_EXT) { - drm_dbg(&dev_priv->drm, "ASLE extension supported\n"); + drm_dbg(display->drm, "ASLE extension supported\n"); opregion->asle_ext = base + OPREGION_ASLE_EXT_OFFSET; } if (mboxes & MBOX_BACKLIGHT) { - drm_dbg(&dev_priv->drm, "Mailbox #2 for backlight present\n"); + drm_dbg(display->drm, "Mailbox #2 for backlight present\n"); } if (dmi_check_system(intel_no_opregion_vbt)) @@ -980,7 +981,7 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) */ if (opregion->header->over.major > 2 || opregion->header->over.minor >= 1) { - drm_WARN_ON(&dev_priv->drm, rvda < OPREGION_SIZE); + drm_WARN_ON(display->drm, rvda < OPREGION_SIZE); rvda += asls; } @@ -990,14 +991,14 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) vbt = opregion->rvda; vbt_size = opregion->asle->rvds; - if (intel_bios_is_valid_vbt(dev_priv, vbt, vbt_size)) { - drm_dbg_kms(&dev_priv->drm, + if (intel_bios_is_valid_vbt(i915, vbt, vbt_size)) { + drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (RVDA)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; goto out; } else { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid VBT in ACPI OpRegion (RVDA)\n"); memunmap(opregion->rvda); opregion->rvda = NULL; @@ -1015,13 +1016,13 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) vbt_size = (mboxes & MBOX_ASLE_EXT) ? OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(dev_priv, vbt, vbt_size)) { - drm_dbg_kms(&dev_priv->drm, + if (intel_bios_is_valid_vbt(i915, vbt, vbt_size)) { + drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; } else { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid VBT in ACPI OpRegion (Mailbox #4)\n"); } @@ -1032,7 +1033,7 @@ err_out: memunmap(base); err_memremap: kfree(opregion); - dev_priv->display.opregion = NULL; + display->opregion = NULL; return err; } @@ -1055,25 +1056,25 @@ static const struct dmi_system_id intel_use_opregion_panel_type[] = { }; int -intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) +intel_opregion_get_panel_type(struct intel_display *display) { u32 panel_details; int ret; - ret = swsci(dev_priv, SWSCI_GBDA_PANEL_DETAILS, 0x0, &panel_details); + ret = swsci(display, SWSCI_GBDA_PANEL_DETAILS, 0x0, &panel_details); if (ret) return ret; ret = (panel_details >> 8) & 0xff; if (ret > 0x10) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid OpRegion panel type 0x%x\n", ret); return -EINVAL; } /* fall back to VBT panel type? */ if (ret == 0x0) { - drm_dbg_kms(&dev_priv->drm, "No panel type in OpRegion\n"); + drm_dbg_kms(display->drm, "No panel type in OpRegion\n"); return -ENODEV; } @@ -1083,7 +1084,7 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) * via a quirk list :( */ if (!dmi_check_system(intel_use_opregion_panel_type)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Ignoring OpRegion panel type (%d)\n", ret - 1); return -ENODEV; } @@ -1104,8 +1105,8 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) */ const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct intel_opregion *opregion = i915->display.opregion; + struct intel_display *display = to_intel_display(connector); + struct intel_opregion *opregion = display->opregion; const struct drm_edid *drm_edid; const void *edid; int len; @@ -1123,7 +1124,7 @@ const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector drm_edid = drm_edid_alloc(edid, len); if (!drm_edid_valid(drm_edid)) { - drm_dbg_kms(&i915->drm, "Invalid EDID in ACPI OpRegion (Mailbox #5)\n"); + drm_dbg_kms(display->drm, "Invalid EDID in ACPI OpRegion (Mailbox #5)\n"); drm_edid_free(drm_edid); drm_edid = NULL; } @@ -1131,9 +1132,9 @@ const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector return drm_edid; } -bool intel_opregion_vbt_present(struct drm_i915_private *i915) +bool intel_opregion_vbt_present(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion || !opregion->vbt) return false; @@ -1141,9 +1142,9 @@ bool intel_opregion_vbt_present(struct drm_i915_private *i915) return true; } -const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) +const void *intel_opregion_get_vbt(struct intel_display *display, size_t *size) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion || !opregion->vbt) return NULL; @@ -1154,9 +1155,9 @@ const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) return kmemdup(opregion->vbt, opregion->vbt_size, GFP_KERNEL); } -bool intel_opregion_headless_sku(struct drm_i915_private *i915) +bool intel_opregion_headless_sku(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; struct opregion_header *header; if (!opregion) @@ -1171,9 +1172,9 @@ bool intel_opregion_headless_sku(struct drm_i915_private *i915) return opregion->header->pcon & PCON_HEADLESS_SKU; } -void intel_opregion_register(struct drm_i915_private *i915) +void intel_opregion_register(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; @@ -1184,17 +1185,16 @@ void intel_opregion_register(struct drm_i915_private *i915) register_acpi_notifier(&opregion->acpi_notifier); } - intel_opregion_resume(i915); + intel_opregion_resume(display); } -static void intel_opregion_resume_display(struct drm_i915_private *i915) +static void intel_opregion_resume_display(struct intel_display *display) { - struct intel_display *display = &i915->display; struct intel_opregion *opregion = display->opregion; if (opregion->acpi) { - intel_didl_outputs(i915); - intel_setup_cadls(i915); + intel_didl_outputs(display); + intel_setup_cadls(display); /* * Notify BIOS we are ready to handle ACPI video ext notifs. @@ -1214,22 +1214,22 @@ static void intel_opregion_resume_display(struct drm_i915_private *i915) intel_dsm_get_bios_data_funcs_supported(display); } -void intel_opregion_resume(struct drm_i915_private *i915) +void intel_opregion_resume(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; - if (HAS_DISPLAY(i915)) - intel_opregion_resume_display(i915); + if (HAS_DISPLAY(display)) + intel_opregion_resume_display(display); - intel_opregion_notify_adapter(i915, PCI_D0); + intel_opregion_notify_adapter(display, PCI_D0); } -static void intel_opregion_suspend_display(struct drm_i915_private *i915) +static void intel_opregion_suspend_display(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (opregion->asle) opregion->asle->ardy = ASLE_ARDY_NOT_READY; @@ -1240,24 +1240,24 @@ static void intel_opregion_suspend_display(struct drm_i915_private *i915) opregion->acpi->drdy = 0; } -void intel_opregion_suspend(struct drm_i915_private *i915, pci_power_t state) +void intel_opregion_suspend(struct intel_display *display, pci_power_t state) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; - intel_opregion_notify_adapter(i915, state); + intel_opregion_notify_adapter(display, state); - if (HAS_DISPLAY(i915)) - intel_opregion_suspend_display(i915); + if (HAS_DISPLAY(display)) + intel_opregion_suspend_display(display); } -void intel_opregion_unregister(struct drm_i915_private *i915) +void intel_opregion_unregister(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; - intel_opregion_suspend(i915, PCI_D1); + intel_opregion_suspend(display, PCI_D1); if (!opregion) return; @@ -1268,9 +1268,9 @@ void intel_opregion_unregister(struct drm_i915_private *i915) } } -void intel_opregion_cleanup(struct drm_i915_private *i915) +void intel_opregion_cleanup(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; @@ -1279,13 +1279,13 @@ void intel_opregion_cleanup(struct drm_i915_private *i915) if (opregion->rvda) memunmap(opregion->rvda); kfree(opregion); - i915->display.opregion = NULL; + display->opregion = NULL; } static int intel_opregion_show(struct seq_file *m, void *unused) { - struct drm_i915_private *i915 = m->private; - struct intel_opregion *opregion = i915->display.opregion; + struct intel_display *display = m->private; + struct intel_opregion *opregion = display->opregion; if (opregion) seq_write(m, opregion->header, OPREGION_SIZE); @@ -1295,10 +1295,10 @@ static int intel_opregion_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(intel_opregion); -void intel_opregion_debugfs_register(struct drm_i915_private *i915) +void intel_opregion_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; debugfs_create_file("i915_opregion", 0444, minor->debugfs_root, - i915, &intel_opregion_fops); + display, &intel_opregion_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.h b/drivers/gpu/drm/i915/display/intel_opregion.h index 4dcd13c672e4..8101eeebfd8b 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.h +++ b/drivers/gpu/drm/i915/display/intel_opregion.h @@ -28,72 +28,72 @@ #include #include -struct drm_i915_private; struct intel_connector; +struct intel_display; struct intel_encoder; #ifdef CONFIG_ACPI -int intel_opregion_setup(struct drm_i915_private *dev_priv); -void intel_opregion_cleanup(struct drm_i915_private *i915); +int intel_opregion_setup(struct intel_display *display); +void intel_opregion_cleanup(struct intel_display *display); -void intel_opregion_register(struct drm_i915_private *dev_priv); -void intel_opregion_unregister(struct drm_i915_private *dev_priv); +void intel_opregion_register(struct intel_display *display); +void intel_opregion_unregister(struct intel_display *display); -void intel_opregion_resume(struct drm_i915_private *dev_priv); -void intel_opregion_suspend(struct drm_i915_private *dev_priv, +void intel_opregion_resume(struct intel_display *display); +void intel_opregion_suspend(struct intel_display *display, pci_power_t state); -bool intel_opregion_asle_present(struct drm_i915_private *i915); -void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); +bool intel_opregion_asle_present(struct intel_display *display); +void intel_opregion_asle_intr(struct intel_display *display); int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable); -int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, +int intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state); -int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); +int intel_opregion_get_panel_type(struct intel_display *display); const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector); -bool intel_opregion_vbt_present(struct drm_i915_private *i915); -const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size); +bool intel_opregion_vbt_present(struct intel_display *display); +const void *intel_opregion_get_vbt(struct intel_display *display, size_t *size); -bool intel_opregion_headless_sku(struct drm_i915_private *i915); +bool intel_opregion_headless_sku(struct intel_display *display); -void intel_opregion_debugfs_register(struct drm_i915_private *i915); +void intel_opregion_debugfs_register(struct intel_display *display); #else /* CONFIG_ACPI*/ -static inline int intel_opregion_setup(struct drm_i915_private *dev_priv) +static inline int intel_opregion_setup(struct intel_display *display) { return 0; } -static inline void intel_opregion_cleanup(struct drm_i915_private *i915) +static inline void intel_opregion_cleanup(struct intel_display *display) { } -static inline void intel_opregion_register(struct drm_i915_private *dev_priv) +static inline void intel_opregion_register(struct intel_display *display) { } -static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) +static inline void intel_opregion_unregister(struct intel_display *display) { } -static inline void intel_opregion_resume(struct drm_i915_private *dev_priv) +static inline void intel_opregion_resume(struct intel_display *display) { } -static inline void intel_opregion_suspend(struct drm_i915_private *dev_priv, +static inline void intel_opregion_suspend(struct intel_display *display, pci_power_t state) { } -static inline bool intel_opregion_asle_present(struct drm_i915_private *i915) +static inline bool intel_opregion_asle_present(struct intel_display *display) { return false; } -static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) +static inline void intel_opregion_asle_intr(struct intel_display *display) { } @@ -104,12 +104,12 @@ intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) } static inline int -intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) +intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state) { return 0; } -static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) +static inline int intel_opregion_get_panel_type(struct intel_display *display) { return -ENODEV; } @@ -120,23 +120,23 @@ intel_opregion_get_edid(struct intel_connector *connector) return NULL; } -static inline bool intel_opregion_vbt_present(struct drm_i915_private *i915) +static inline bool intel_opregion_vbt_present(struct intel_display *display) { return false; } static inline const void * -intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) +intel_opregion_get_vbt(struct intel_display *display, size_t *size) { return NULL; } -static inline bool intel_opregion_headless_sku(struct drm_i915_private *i915) +static inline bool intel_opregion_headless_sku(struct intel_display *display) { return false; } -static inline void intel_opregion_debugfs_register(struct drm_i915_private *i915) +static inline void intel_opregion_debugfs_register(struct intel_display *display) { } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index b518d29596e9..e32766286369 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -442,6 +442,7 @@ static int i915_pcode_init(struct drm_i915_private *i915) */ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; @@ -542,7 +543,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_msi; - intel_opregion_setup(dev_priv); + intel_opregion_setup(display); ret = i915_pcode_init(dev_priv); if (ret) @@ -559,7 +560,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) return 0; err_opregion: - intel_opregion_cleanup(dev_priv); + intel_opregion_cleanup(display); err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -580,11 +581,12 @@ err_perf: */ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); i915_perf_fini(dev_priv); - intel_opregion_cleanup(dev_priv); + intel_opregion_cleanup(display); if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -1014,6 +1016,7 @@ static int i915_drm_prepare(struct drm_device *dev) static int i915_drm_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); pci_power_t opregion_target_state; @@ -1049,7 +1052,7 @@ static int i915_drm_suspend(struct drm_device *dev) i915_save_display(dev_priv); opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold; - intel_opregion_suspend(dev_priv, opregion_target_state); + intel_opregion_suspend(display, opregion_target_state); dev_priv->suspend_count++; @@ -1138,6 +1141,7 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, static int i915_drm_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct intel_gt *gt; int ret, i; @@ -1205,7 +1209,7 @@ static int i915_drm_resume(struct drm_device *dev) } intel_hpd_poll_disable(dev_priv); - intel_opregion_resume(dev_priv); + intel_opregion_resume(display); intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); @@ -1454,6 +1458,7 @@ static int i915_pm_restore(struct device *kdev) static int intel_runtime_suspend(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_display *display = &dev_priv->display; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct pci_dev *root_pdev; @@ -1528,7 +1533,7 @@ static int intel_runtime_suspend(struct device *kdev) * won't be able to restore them. Since PCI_D3hot matches the * actual specification and appears to be working, use it. */ - intel_opregion_notify_adapter(dev_priv, PCI_D3hot); + intel_opregion_notify_adapter(display, PCI_D3hot); } else { /* * current versions of firmware which depend on this opregion @@ -1537,7 +1542,7 @@ static int intel_runtime_suspend(struct device *kdev) * to distinguish it from notifications that might be sent via * the suspend path. */ - intel_opregion_notify_adapter(dev_priv, PCI_D1); + intel_opregion_notify_adapter(display, PCI_D1); } assert_forcewakes_inactive(&dev_priv->uncore); @@ -1552,6 +1557,7 @@ static int intel_runtime_suspend(struct device *kdev) static int intel_runtime_resume(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_display *display = &dev_priv->display; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct pci_dev *root_pdev; @@ -1566,7 +1572,7 @@ static int intel_runtime_resume(struct device *kdev) drm_WARN_ON_ONCE(&dev_priv->drm, atomic_read(&rpm->wakeref_count)); disable_rpm_wakeref_asserts(rpm); - intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_opregion_notify_adapter(display, PCI_D0); root_pdev = pcie_find_root_port(pdev); if (root_pdev) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ca4468c82078..0e4adde84cb2 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -127,16 +127,18 @@ int xe_display_init_nommio(struct xe_device *xe) static void xe_display_fini_noirq(void *arg) { struct xe_device *xe = arg; + struct intel_display *display = &xe->display; if (!xe->info.enable_display) return; intel_display_driver_remove_noirq(xe); - intel_opregion_cleanup(xe); + intel_opregion_cleanup(display); } int xe_display_init_noirq(struct xe_device *xe) { + struct intel_display *display = &xe->display; int err; if (!xe->info.enable_display) @@ -145,7 +147,7 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_driver_early_probe(xe); /* Early display init.. */ - intel_opregion_setup(xe); + intel_opregion_setup(display); /* * Fill the dram structure to get the system dram info. This will be @@ -159,7 +161,7 @@ int xe_display_init_noirq(struct xe_device *xe) err = intel_display_driver_probe_noirq(xe); if (err) { - intel_opregion_cleanup(xe); + intel_opregion_cleanup(display); return err; } @@ -250,11 +252,13 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { + struct intel_display *display = &xe->display; + if (!xe->info.enable_display) return; if (gu_misc_iir & GU_MISC_GSE) - intel_opregion_asle_intr(xe); + intel_opregion_asle_intr(display); } void xe_display_irq_reset(struct xe_device *xe) @@ -285,6 +289,7 @@ static bool suspend_to_idle(void) void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { + struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.enable_display) return; @@ -306,7 +311,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_encoder_suspend_all(&xe->display); - intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); + intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); @@ -336,6 +341,8 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe, bool runtime) { + struct intel_display *display = &xe->display; + if (!xe->info.enable_display) return; @@ -356,7 +363,7 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) if (has_display(xe)) drm_kms_helper_poll_enable(&xe->drm); - intel_opregion_resume(xe); + intel_opregion_resume(display); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); From 9aec6f76a28cd669aa98403883edda3a7981fef0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 9 Aug 2024 17:27:06 +0300 Subject: [PATCH 0763/1523] drm/i915/bios: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_bios.[ch] to struct intel_display. Do one drive-by conversion of unnecessary hex usage to decimal. Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/0d0261a53aff5f141b16b482222a5ffce78e176e.1723213547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/g4x_dp.c | 3 +- drivers/gpu/drm/i915/display/g4x_hdmi.c | 3 +- drivers/gpu/drm/i915/display/icl_dsi.c | 3 +- drivers/gpu/drm/i915/display/intel_bios.c | 844 +++++++++--------- drivers/gpu/drm/i915/display/intel_bios.h | 28 +- .../gpu/drm/i915/display/intel_combo_phy.c | 8 +- drivers/gpu/drm/i915/display/intel_ddi.c | 3 +- drivers/gpu/drm/i915/display/intel_ddi.h | 3 +- drivers/gpu/drm/i915/display/intel_display.c | 9 +- .../drm/i915/display/intel_display_debugfs.c | 2 +- .../drm/i915/display/intel_display_driver.c | 8 +- .../gpu/drm/i915/display/intel_display_irq.c | 3 +- drivers/gpu/drm/i915/display/intel_dp.c | 8 +- drivers/gpu/drm/i915/display/intel_lvds.c | 5 +- drivers/gpu/drm/i915/display/intel_opregion.c | 5 +- drivers/gpu/drm/i915/display/intel_sdvo.c | 3 +- drivers/gpu/drm/i915/display/intel_tv.c | 3 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 5 +- 18 files changed, 495 insertions(+), 451 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index a8e746a0f670..c2128b46bdbd 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -1279,6 +1279,7 @@ static const struct drm_encoder_funcs intel_dp_enc_funcs = { bool g4x_dp_init(struct drm_i915_private *dev_priv, i915_reg_t output_reg, enum port port) { + struct intel_display *display = &dev_priv->display; const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -1288,7 +1289,7 @@ bool g4x_dp_init(struct drm_i915_private *dev_priv, if (!assert_port_valid(dev_priv, port)) return false; - devdata = intel_bios_encoder_data_lookup(dev_priv, port); + devdata = intel_bios_encoder_data_lookup(display, port); /* FIXME bail? */ if (!devdata) diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 8096492b3fad..46f23bdb4c17 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -686,6 +686,7 @@ static bool assert_hdmi_port_valid(struct drm_i915_private *i915, enum port port void g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port) { + struct intel_display *display = &dev_priv->display; const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -697,7 +698,7 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, if (!assert_hdmi_port_valid(dev_priv, port)) return; - devdata = intel_bios_encoder_data_lookup(dev_priv, port); + devdata = intel_bios_encoder_data_lookup(display, port); /* FIXME bail? */ if (!devdata) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 55dd57d1bf94..293efc1f841d 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1945,6 +1945,7 @@ static void icl_dsi_add_properties(struct intel_connector *connector) void icl_dsi_init(struct drm_i915_private *dev_priv, const struct intel_bios_encoder_data *devdata) { + struct intel_display *display = &dev_priv->display; struct intel_dsi *intel_dsi; struct intel_encoder *encoder; struct intel_connector *intel_connector; @@ -2008,7 +2009,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, intel_dsi->panel_power_off_time = ktime_get_boottime(); - intel_bios_init_panel_late(dev_priv, &intel_connector->panel, encoder->devdata, NULL); + intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, NULL); mutex_lock(&dev_priv->drm.mode_config.mutex); intel_panel_add_vbt_lfp_fixed_mode(intel_connector); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 0b13b37f24a4..d49435af62c7 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -66,7 +66,7 @@ /* Wrapper for VBT child device config */ struct intel_bios_encoder_data { - struct drm_i915_private *i915; + struct intel_display *display; struct child_device_config child; struct dsc_compression_parameters_entry *dsc; @@ -145,12 +145,12 @@ struct bdb_block_entry { }; static const void * -bdb_find_section(struct drm_i915_private *i915, +bdb_find_section(struct intel_display *display, enum bdb_block_id section_id) { struct bdb_block_entry *entry; - list_for_each_entry(entry, &i915->display.vbt.bdb_blocks, node) { + list_for_each_entry(entry, &display->vbt.bdb_blocks, node) { if (entry->section_id == section_id) return entry->data + 3; } @@ -200,12 +200,12 @@ static const struct { .min_size = sizeof(struct bdb_generic_dtd), }, }; -static size_t lfp_data_min_size(struct drm_i915_private *i915) +static size_t lfp_data_min_size(struct intel_display *display) { const struct bdb_lfp_data_ptrs *ptrs; size_t size; - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return 0; @@ -360,7 +360,7 @@ static void next_lfp_data_ptr(struct lfp_data_ptr_table *next, next->offset = prev->offset + size; } -static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, +static void *generate_lfp_data_ptrs(struct intel_display *display, const void *bdb) { int i, size, table_size, block_size, offset, fp_timing_size; @@ -374,7 +374,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, * include block 41 and thus we don't need to * generate one. */ - if (i915->display.vbt.version < 155) + if (display->vbt.version < 155) return NULL; fp_timing_size = 38; @@ -383,7 +383,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, if (!block) return NULL; - drm_dbg_kms(&i915->drm, "Generating LFP data table pointers\n"); + drm_dbg_kms(display->drm, "Generating LFP data table pointers\n"); block_size = get_blocksize(block); @@ -451,7 +451,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, } static void -init_bdb_block(struct drm_i915_private *i915, +init_bdb_block(struct intel_display *display, const void *bdb, enum bdb_block_id section_id, size_t min_size) { @@ -464,14 +464,14 @@ init_bdb_block(struct drm_i915_private *i915, /* Modern VBTs lack the LFP data table pointers block, make one up */ if (!block && section_id == BDB_LFP_DATA_PTRS) { - temp_block = generate_lfp_data_ptrs(i915, bdb); + temp_block = generate_lfp_data_ptrs(display, bdb); if (temp_block) block = temp_block + 3; } if (!block) return; - drm_WARN(&i915->drm, min_size == 0, + drm_WARN(display->drm, min_size == 0, "Block %d min_size is zero\n", section_id); block_size = get_blocksize(block); @@ -495,20 +495,22 @@ init_bdb_block(struct drm_i915_private *i915, kfree(temp_block); - drm_dbg_kms(&i915->drm, "Found BDB block %d (size %zu, min size %zu)\n", + drm_dbg_kms(display->drm, + "Found BDB block %d (size %zu, min size %zu)\n", section_id, block_size, min_size); if (section_id == BDB_LFP_DATA_PTRS && !fixup_lfp_data_ptrs(bdb, entry->data + 3)) { - drm_err(&i915->drm, "VBT has malformed LFP data table pointers\n"); + drm_err(display->drm, + "VBT has malformed LFP data table pointers\n"); kfree(entry); return; } - list_add_tail(&entry->node, &i915->display.vbt.bdb_blocks); + list_add_tail(&entry->node, &display->vbt.bdb_blocks); } -static void init_bdb_blocks(struct drm_i915_private *i915, +static void init_bdb_blocks(struct intel_display *display, const void *bdb) { int i; @@ -518,14 +520,14 @@ static void init_bdb_blocks(struct drm_i915_private *i915, size_t min_size = bdb_blocks[i].min_size; if (section_id == BDB_LFP_DATA) - min_size = lfp_data_min_size(i915); + min_size = lfp_data_min_size(display); - init_bdb_block(i915, bdb, section_id, min_size); + init_bdb_block(display, bdb, section_id, min_size); } } static void -fill_detail_timing_data(struct drm_i915_private *i915, +fill_detail_timing_data(struct intel_display *display, struct drm_display_mode *panel_fixed_mode, const struct bdb_edid_dtd *dvo_timing) { @@ -568,12 +570,12 @@ fill_detail_timing_data(struct drm_i915_private *i915, /* Some VBTs have bogus h/vsync_end values */ if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) { - drm_dbg_kms(&i915->drm, "reducing hsync_end %d->%d\n", + drm_dbg_kms(display->drm, "reducing hsync_end %d->%d\n", panel_fixed_mode->hsync_end, panel_fixed_mode->htotal); panel_fixed_mode->hsync_end = panel_fixed_mode->htotal; } if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) { - drm_dbg_kms(&i915->drm, "reducing vsync_end %d->%d\n", + drm_dbg_kms(display->drm, "reducing vsync_end %d->%d\n", panel_fixed_mode->vsync_end, panel_fixed_mode->vtotal); panel_fixed_mode->vsync_end = panel_fixed_mode->vtotal; } @@ -618,28 +620,26 @@ get_lfp_data_tail(const struct bdb_lfp_data *data, return NULL; } -static int opregion_get_panel_type(struct drm_i915_private *i915, +static int opregion_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { - struct intel_display *display = &i915->display; - return intel_opregion_get_panel_type(display); } -static int vbt_get_panel_type(struct drm_i915_private *i915, +static int vbt_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { const struct bdb_lfp_options *lfp_options; - lfp_options = bdb_find_section(i915, BDB_LFP_OPTIONS); + lfp_options = bdb_find_section(display, BDB_LFP_OPTIONS); if (!lfp_options) return -1; if (lfp_options->panel_type > 0xf && lfp_options->panel_type != 0xff) { - drm_dbg_kms(&i915->drm, "Invalid VBT panel type 0x%x\n", + drm_dbg_kms(display->drm, "Invalid VBT panel type 0x%x\n", lfp_options->panel_type); return -1; } @@ -647,12 +647,13 @@ static int vbt_get_panel_type(struct drm_i915_private *i915, if (devdata && devdata->child.handle == DEVICE_HANDLE_LFP2) return lfp_options->panel_type2; - drm_WARN_ON(&i915->drm, devdata && devdata->child.handle != DEVICE_HANDLE_LFP1); + drm_WARN_ON(display->drm, + devdata && devdata->child.handle != DEVICE_HANDLE_LFP1); return lfp_options->panel_type; } -static int pnpid_get_panel_type(struct drm_i915_private *i915, +static int pnpid_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { @@ -671,14 +672,14 @@ static int pnpid_get_panel_type(struct drm_i915_private *i915, product_id_nodate.week_of_manufacture = 0; product_id_nodate.year_of_manufacture = 0; - p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "EDID"); + p = drm_dbg_printer(display->drm, DRM_UT_KMS, "EDID"); drm_edid_print_product_id(&p, &product_id, true); - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return -1; - data = bdb_find_section(i915, BDB_LFP_DATA); + data = bdb_find_section(display, BDB_LFP_DATA); if (!data) return -1; @@ -702,7 +703,7 @@ static int pnpid_get_panel_type(struct drm_i915_private *i915, return best; } -static int fallback_get_panel_type(struct drm_i915_private *i915, +static int fallback_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { @@ -716,13 +717,13 @@ enum panel_type { PANEL_TYPE_FALLBACK, }; -static int get_panel_type(struct drm_i915_private *i915, +static int get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { struct { const char *name; - int (*get_panel_type)(struct drm_i915_private *i915, + int (*get_panel_type)(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback); int panel_type; @@ -747,14 +748,14 @@ static int get_panel_type(struct drm_i915_private *i915, int i; for (i = 0; i < ARRAY_SIZE(panel_types); i++) { - panel_types[i].panel_type = panel_types[i].get_panel_type(i915, devdata, + panel_types[i].panel_type = panel_types[i].get_panel_type(display, devdata, drm_edid, use_fallback); - drm_WARN_ON(&i915->drm, panel_types[i].panel_type > 0xf && + drm_WARN_ON(display->drm, panel_types[i].panel_type > 0xf && panel_types[i].panel_type != 0xff); if (panel_types[i].panel_type >= 0) - drm_dbg_kms(&i915->drm, "Panel type (%s): %d\n", + drm_dbg_kms(display->drm, "Panel type (%s): %d\n", panel_types[i].name, panel_types[i].panel_type); } @@ -769,7 +770,7 @@ static int get_panel_type(struct drm_i915_private *i915, else i = PANEL_TYPE_FALLBACK; - drm_dbg_kms(&i915->drm, "Selected panel type (%s): %d\n", + drm_dbg_kms(display->drm, "Selected panel type (%s): %d\n", panel_types[i].name, panel_types[i].panel_type); return panel_types[i].panel_type; @@ -787,14 +788,14 @@ static bool panel_bool(unsigned int value, int panel_type) /* Parse general panel options */ static void -parse_panel_options(struct drm_i915_private *i915, +parse_panel_options(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_options *lfp_options; int panel_type = panel->vbt.panel_type; int drrs_mode; - lfp_options = bdb_find_section(i915, BDB_LFP_OPTIONS); + lfp_options = bdb_find_section(display, BDB_LFP_OPTIONS); if (!lfp_options) return; @@ -818,23 +819,23 @@ parse_panel_options(struct drm_i915_private *i915, switch (drrs_mode) { case 0: panel->vbt.drrs_type = DRRS_TYPE_STATIC; - drm_dbg_kms(&i915->drm, "DRRS supported mode is static\n"); + drm_dbg_kms(display->drm, "DRRS supported mode is static\n"); break; case 2: panel->vbt.drrs_type = DRRS_TYPE_SEAMLESS; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DRRS supported mode is seamless\n"); break; default: panel->vbt.drrs_type = DRRS_TYPE_NONE; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DRRS not supported (VBT input)\n"); break; } } static void -parse_lfp_panel_dtd(struct drm_i915_private *i915, +parse_lfp_panel_dtd(struct intel_display *display, struct intel_panel *panel, const struct bdb_lfp_data *lfp_data, const struct bdb_lfp_data_ptrs *lfp_data_ptrs) @@ -852,11 +853,11 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915, if (!panel_fixed_mode) return; - fill_detail_timing_data(i915, panel_fixed_mode, panel_dvo_timing); + fill_detail_timing_data(display, panel_fixed_mode, panel_dvo_timing); panel->vbt.lfp_vbt_mode = panel_fixed_mode; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found panel mode in BIOS VBT legacy lfp table: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); @@ -868,14 +869,14 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915, if (fp_timing->x_res == panel_fixed_mode->hdisplay && fp_timing->y_res == panel_fixed_mode->vdisplay) { panel->vbt.bios_lvds_val = fp_timing->lvds_reg_val; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT initial LVDS value %x\n", panel->vbt.bios_lvds_val); } } static void -parse_lfp_data(struct drm_i915_private *i915, +parse_lfp_data(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_data *data; @@ -885,41 +886,41 @@ parse_lfp_data(struct drm_i915_private *i915, struct drm_printer p; int panel_type = panel->vbt.panel_type; - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return; - data = bdb_find_section(i915, BDB_LFP_DATA); + data = bdb_find_section(display, BDB_LFP_DATA); if (!data) return; if (!panel->vbt.lfp_vbt_mode) - parse_lfp_panel_dtd(i915, panel, data, ptrs); + parse_lfp_panel_dtd(display, panel, data, ptrs); pnp_id = get_lfp_pnp_id(data, ptrs, panel_type); - p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "Panel"); + p = drm_dbg_printer(display->drm, DRM_UT_KMS, "Panel"); drm_edid_print_product_id(&p, pnp_id, false); tail = get_lfp_data_tail(data, ptrs); if (!tail) return; - drm_dbg_kms(&i915->drm, "Panel name: %.*s\n", + drm_dbg_kms(display->drm, "Panel name: %.*s\n", (int)sizeof(tail->panel_name[0].name), tail->panel_name[panel_type].name); - if (i915->display.vbt.version >= 188) { + if (display->vbt.version >= 188) { panel->vbt.seamless_drrs_min_refresh_rate = tail->seamless_drrs_min_refresh_rate[panel_type]; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Seamless DRRS min refresh rate: %d Hz\n", panel->vbt.seamless_drrs_min_refresh_rate); } } static void -parse_generic_dtd(struct drm_i915_private *i915, +parse_generic_dtd(struct intel_display *display, struct intel_panel *panel) { const struct bdb_generic_dtd *generic_dtd; @@ -935,20 +936,20 @@ parse_generic_dtd(struct drm_i915_private *i915, * first on VBT >= 229, but still fall back to trying the old LFP * block if that fails. */ - if (i915->display.vbt.version < 229) + if (display->vbt.version < 229) return; - generic_dtd = bdb_find_section(i915, BDB_GENERIC_DTD); + generic_dtd = bdb_find_section(display, BDB_GENERIC_DTD); if (!generic_dtd) return; if (generic_dtd->gdtd_size < sizeof(struct generic_dtd_entry)) { - drm_err(&i915->drm, "GDTD size %u is too small.\n", + drm_err(display->drm, "GDTD size %u is too small.\n", generic_dtd->gdtd_size); return; } else if (generic_dtd->gdtd_size != sizeof(struct generic_dtd_entry)) { - drm_err(&i915->drm, "Unexpected GDTD size %u\n", + drm_err(display->drm, "Unexpected GDTD size %u\n", generic_dtd->gdtd_size); /* DTD has unknown fields, but keep going */ } @@ -956,7 +957,7 @@ parse_generic_dtd(struct drm_i915_private *i915, num_dtd = (get_blocksize(generic_dtd) - sizeof(struct bdb_generic_dtd)) / generic_dtd->gdtd_size; if (panel->vbt.panel_type >= num_dtd) { - drm_err(&i915->drm, + drm_err(display->drm, "Panel type %d not found in table of %d DTD's\n", panel->vbt.panel_type, num_dtd); return; @@ -1001,7 +1002,7 @@ parse_generic_dtd(struct drm_i915_private *i915, else panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found panel mode in BIOS VBT generic dtd table: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); @@ -1009,7 +1010,7 @@ parse_generic_dtd(struct drm_i915_private *i915, } static void -parse_lfp_backlight(struct drm_i915_private *i915, +parse_lfp_backlight(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_backlight *backlight_data; @@ -1017,12 +1018,12 @@ parse_lfp_backlight(struct drm_i915_private *i915, int panel_type = panel->vbt.panel_type; u16 level; - backlight_data = bdb_find_section(i915, BDB_LFP_BACKLIGHT); + backlight_data = bdb_find_section(display, BDB_LFP_BACKLIGHT); if (!backlight_data) return; if (backlight_data->entry_size != sizeof(backlight_data->data[0])) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported backlight data entry size %u\n", backlight_data->entry_size); return; @@ -1032,7 +1033,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.present = entry->type == BDB_BACKLIGHT_TYPE_PWM; if (!panel->vbt.backlight.present) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "PWM backlight not present in VBT (type %u)\n", entry->type); return; @@ -1040,7 +1041,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI; panel->vbt.backlight.controller = 0; - if (i915->display.vbt.version >= 191) { + if (display->vbt.version >= 191) { const struct lfp_backlight_control_method *method; method = &backlight_data->backlight_control[panel_type]; @@ -1051,14 +1052,14 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz; panel->vbt.backlight.active_low_pwm = entry->active_low_pwm; - if (i915->display.vbt.version >= 234) { + if (display->vbt.version >= 234) { u16 min_level; bool scale; level = backlight_data->brightness_level[panel_type].level; min_level = backlight_data->brightness_min_level[panel_type].level; - if (i915->display.vbt.version >= 236) + if (display->vbt.version >= 236) scale = backlight_data->brightness_precision_bits[panel_type] == 16; else scale = level > 255; @@ -1067,7 +1068,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, min_level = min_level / 255; if (min_level > 255) { - drm_warn(&i915->drm, "Brightness min level > 255\n"); + drm_warn(display->drm, "Brightness min level > 255\n"); level = 255; } panel->vbt.backlight.min_brightness = min_level; @@ -1079,13 +1080,13 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.min_brightness = entry->min_brightness; } - if (i915->display.vbt.version >= 239) + if (display->vbt.version >= 239) panel->vbt.backlight.hdr_dpcd_refresh_timeout = DIV_ROUND_UP(backlight_data->hdr_dpcd_refresh_timeout[panel_type], 100); else panel->vbt.backlight.hdr_dpcd_refresh_timeout = 30; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT backlight PWM modulation frequency %u Hz, " "active %s, min brightness %u, level %u, controller %u\n", panel->vbt.backlight.pwm_freq_hz, @@ -1096,16 +1097,16 @@ parse_lfp_backlight(struct drm_i915_private *i915, } static void -parse_sdvo_lvds_data(struct drm_i915_private *i915, +parse_sdvo_lvds_data(struct intel_display *display, struct intel_panel *panel) { const struct bdb_sdvo_lvds_dtd *dtd; struct drm_display_mode *panel_fixed_mode; int index; - index = i915->display.params.vbt_sdvo_panel_type; + index = display->params.vbt_sdvo_panel_type; if (index == -2) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignore SDVO LVDS mode from BIOS VBT tables.\n"); return; } @@ -1113,14 +1114,14 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, if (index == -1) { const struct bdb_sdvo_lvds_options *sdvo_lvds_options; - sdvo_lvds_options = bdb_find_section(i915, BDB_SDVO_LVDS_OPTIONS); + sdvo_lvds_options = bdb_find_section(display, BDB_SDVO_LVDS_OPTIONS); if (!sdvo_lvds_options) return; index = sdvo_lvds_options->panel_type; } - dtd = bdb_find_section(i915, BDB_SDVO_LVDS_DTD); + dtd = bdb_find_section(display, BDB_SDVO_LVDS_DTD); if (!dtd) return; @@ -1131,7 +1132,8 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, * it here to be sure. */ if (index >= ARRAY_SIZE(dtd->dtd)) { - drm_err(&i915->drm, "index %d is larger than dtd->dtd[4] array\n", + drm_err(display->drm, + "index %d is larger than dtd->dtd[4] array\n", index); return; } @@ -1140,19 +1142,19 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, if (!panel_fixed_mode) return; - fill_detail_timing_data(i915, panel_fixed_mode, &dtd->dtd[index]); + fill_detail_timing_data(display, panel_fixed_mode, &dtd->dtd[index]); panel->vbt.sdvo_lvds_vbt_mode = panel_fixed_mode; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found SDVO LVDS mode in BIOS VBT tables: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); } -static int intel_bios_ssc_frequency(struct drm_i915_private *i915, +static int intel_bios_ssc_frequency(struct intel_display *display, bool alternate) { - switch (DISPLAY_VER(i915)) { + switch (DISPLAY_VER(display)) { case 2: return alternate ? 66667 : 48000; case 3: @@ -1164,45 +1166,46 @@ static int intel_bios_ssc_frequency(struct drm_i915_private *i915, } static void -parse_general_features(struct drm_i915_private *i915) +parse_general_features(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_general_features *general; - general = bdb_find_section(i915, BDB_GENERAL_FEATURES); + general = bdb_find_section(display, BDB_GENERAL_FEATURES); if (!general) return; - i915->display.vbt.int_tv_support = general->int_tv_support; + display->vbt.int_tv_support = general->int_tv_support; /* int_crt_support can't be trusted on earlier platforms */ - if (i915->display.vbt.version >= 155 && - (HAS_DDI(i915) || IS_VALLEYVIEW(i915))) - i915->display.vbt.int_crt_support = general->int_crt_support; - i915->display.vbt.lvds_use_ssc = general->enable_ssc; - i915->display.vbt.lvds_ssc_freq = - intel_bios_ssc_frequency(i915, general->ssc_freq); - i915->display.vbt.display_clock_mode = general->display_clock_mode; - i915->display.vbt.fdi_rx_polarity_inverted = general->fdi_rx_polarity_inverted; - if (i915->display.vbt.version >= 181) { - i915->display.vbt.orientation = general->rotate_180 ? + if (display->vbt.version >= 155 && + (HAS_DDI(display) || IS_VALLEYVIEW(i915))) + display->vbt.int_crt_support = general->int_crt_support; + display->vbt.lvds_use_ssc = general->enable_ssc; + display->vbt.lvds_ssc_freq = + intel_bios_ssc_frequency(display, general->ssc_freq); + display->vbt.display_clock_mode = general->display_clock_mode; + display->vbt.fdi_rx_polarity_inverted = general->fdi_rx_polarity_inverted; + if (display->vbt.version >= 181) { + display->vbt.orientation = general->rotate_180 ? DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP : DRM_MODE_PANEL_ORIENTATION_NORMAL; } else { - i915->display.vbt.orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN; + display->vbt.orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN; } - if (i915->display.vbt.version >= 249 && general->afc_startup_config) { - i915->display.vbt.override_afc_startup = true; - i915->display.vbt.override_afc_startup_val = general->afc_startup_config == 0x1 ? 0x0 : 0x7; + if (display->vbt.version >= 249 && general->afc_startup_config) { + display->vbt.override_afc_startup = true; + display->vbt.override_afc_startup_val = general->afc_startup_config == 1 ? 0 : 7; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d fdi_rx_polarity_inverted %d\n", - i915->display.vbt.int_tv_support, - i915->display.vbt.int_crt_support, - i915->display.vbt.lvds_use_ssc, - i915->display.vbt.lvds_ssc_freq, - i915->display.vbt.display_clock_mode, - i915->display.vbt.fdi_rx_polarity_inverted); + display->vbt.int_tv_support, + display->vbt.int_crt_support, + display->vbt.lvds_use_ssc, + display->vbt.lvds_ssc_freq, + display->vbt.display_clock_mode, + display->vbt.fdi_rx_polarity_inverted); } static const struct child_device_config * @@ -1212,7 +1215,7 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i) } static void -parse_sdvo_device_mapping(struct drm_i915_private *i915) +parse_sdvo_device_mapping(struct intel_display *display) { const struct intel_bios_encoder_data *devdata; int count = 0; @@ -1221,12 +1224,12 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) * Only parse SDVO mappings on gens that could have SDVO. This isn't * accurate and doesn't have to be, as long as it's not too strict. */ - if (!IS_DISPLAY_VER(i915, 3, 7)) { - drm_dbg_kms(&i915->drm, "Skipping SDVO device mapping\n"); + if (!IS_DISPLAY_VER(display, 3, 7)) { + drm_dbg_kms(display->drm, "Skipping SDVO device mapping\n"); return; } - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; struct sdvo_device_mapping *mapping; @@ -1241,17 +1244,17 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) if (child->dvo_port != DEVICE_PORT_DVOB && child->dvo_port != DEVICE_PORT_DVOC) { /* skip the incorrect SDVO port */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Incorrect SDVO port. Skip it\n"); continue; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "the SDVO device with target addr %2x is found on" " %s port\n", child->target_addr, (child->dvo_port == DEVICE_PORT_DVOB) ? "SDVOB" : "SDVOC"); - mapping = &i915->display.vbt.sdvo_mappings[child->dvo_port - 1]; + mapping = &display->vbt.sdvo_mappings[child->dvo_port - 1]; if (!mapping->initialized) { mapping->dvo_port = child->dvo_port; mapping->target_addr = child->target_addr; @@ -1259,20 +1262,20 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) mapping->ddc_pin = child->ddc_pin; mapping->i2c_pin = child->i2c_pin; mapping->initialized = 1; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d\n", mapping->dvo_port, mapping->target_addr, mapping->dvo_wiring, mapping->ddc_pin, mapping->i2c_pin); } else { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Maybe one SDVO port is shared by " "two SDVO device.\n"); } if (child->target2_addr) { /* Maybe this is a SDVO device with multiple inputs */ /* And the mapping info is not added */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "there exists the target2_addr. Maybe this" " is a SDVO device with multiple inputs.\n"); } @@ -1281,28 +1284,28 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) if (!count) { /* No SDVO device info is found */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No SDVO device info is found in VBT\n"); } } static void -parse_driver_features(struct drm_i915_private *i915) +parse_driver_features(struct intel_display *display) { const struct bdb_driver_features *driver; - driver = bdb_find_section(i915, BDB_DRIVER_FEATURES); + driver = bdb_find_section(display, BDB_DRIVER_FEATURES); if (!driver) return; - if (DISPLAY_VER(i915) >= 5) { + if (DISPLAY_VER(display) >= 5) { /* * Note that we consider BDB_DRIVER_FEATURE_INT_SDVO_LVDS * to mean "eDP". The VBT spec doesn't agree with that * interpretation, but real world VBTs seem to. */ if (driver->lvds_config != BDB_DRIVER_FEATURE_INT_LVDS) - i915->display.vbt.int_lvds_support = 0; + display->vbt.int_lvds_support = 0; } else { /* * FIXME it's not clear which BDB version has the LVDS config @@ -1315,25 +1318,25 @@ parse_driver_features(struct drm_i915_private *i915) * in the wild with the bits correctly populated. Version * 108 (on i85x) does not have the bits correctly populated. */ - if (i915->display.vbt.version >= 134 && + if (display->vbt.version >= 134 && driver->lvds_config != BDB_DRIVER_FEATURE_INT_LVDS && driver->lvds_config != BDB_DRIVER_FEATURE_INT_SDVO_LVDS) - i915->display.vbt.int_lvds_support = 0; + display->vbt.int_lvds_support = 0; } } static void -parse_panel_driver_features(struct drm_i915_private *i915, +parse_panel_driver_features(struct intel_display *display, struct intel_panel *panel) { const struct bdb_driver_features *driver; - driver = bdb_find_section(i915, BDB_DRIVER_FEATURES); + driver = bdb_find_section(display, BDB_DRIVER_FEATURES); if (!driver) return; - if (i915->display.vbt.version < 228) { - drm_dbg_kms(&i915->drm, "DRRS State Enabled:%d\n", + if (display->vbt.version < 228) { + drm_dbg_kms(display->drm, "DRRS State Enabled:%d\n", driver->drrs_enabled); /* * If DRRS is not supported, drrs_type has to be set to 0. @@ -1357,7 +1360,7 @@ parse_panel_driver_features(struct drm_i915_private *i915, } static void -parse_power_conservation_features(struct drm_i915_private *i915, +parse_power_conservation_features(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_power *power; @@ -1365,10 +1368,10 @@ parse_power_conservation_features(struct drm_i915_private *i915, panel->vbt.vrr = true; /* matches Windows behaviour */ - if (i915->display.vbt.version < 228) + if (display->vbt.version < 228) return; - power = bdb_find_section(i915, BDB_LFP_POWER); + power = bdb_find_section(display, BDB_LFP_POWER); if (!power) return; @@ -1391,16 +1394,16 @@ parse_power_conservation_features(struct drm_i915_private *i915, panel->vbt.drrs_type = DRRS_TYPE_NONE; } - if (i915->display.vbt.version >= 232) + if (display->vbt.version >= 232) panel->vbt.edp.hobl = panel_bool(power->hobl, panel_type); - if (i915->display.vbt.version >= 233) + if (display->vbt.version >= 233) panel->vbt.vrr = panel_bool(power->vrr_feature_enabled, panel_type); } static void -parse_edp(struct drm_i915_private *i915, +parse_edp(struct intel_display *display, struct intel_panel *panel) { const struct bdb_edp *edp; @@ -1408,7 +1411,7 @@ parse_edp(struct drm_i915_private *i915, const struct edp_fast_link_params *edp_link_params; int panel_type = panel->vbt.panel_type; - edp = bdb_find_section(i915, BDB_EDP); + edp = bdb_find_section(display, BDB_EDP); if (!edp) return; @@ -1430,7 +1433,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.pps = *edp_pps; - if (i915->display.vbt.version >= 224) { + if (display->vbt.version >= 224) { panel->vbt.edp.rate = edp->edp_fast_link_training_rate[panel_type] * 20; } else { @@ -1445,7 +1448,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.rate = 540000; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP link rate value %u\n", edp_link_params->rate); break; @@ -1463,7 +1466,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.lanes = 4; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP lane count value %u\n", edp_link_params->lanes); break; @@ -1483,7 +1486,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_3; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP pre-emphasis value %u\n", edp_link_params->preemphasis); break; @@ -1503,19 +1506,19 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_3; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP voltage swing value %u\n", edp_link_params->vswing); break; } - if (i915->display.vbt.version >= 173) { + if (display->vbt.version >= 173) { u8 vswing; /* Don't read from VBT if module parameter has valid value*/ - if (i915->display.params.edp_vswing) { + if (display->params.edp_vswing) { panel->vbt.edp.low_vswing = - i915->display.params.edp_vswing == 1; + display->params.edp_vswing == 1; } else { vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF; panel->vbt.edp.low_vswing = vswing == 0; @@ -1525,26 +1528,27 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.drrs_msa_timing_delay = panel_bits(edp->sdrrs_msa_timing_delay, panel_type, 2); - if (i915->display.vbt.version >= 244) + if (display->vbt.version >= 244) panel->vbt.edp.max_link_rate = edp->edp_max_port_link_rate[panel_type] * 20; - if (i915->display.vbt.version >= 251) + if (display->vbt.version >= 251) panel->vbt.edp.dsc_disable = panel_bool(edp->edp_dsc_disable, panel_type); } static void -parse_psr(struct drm_i915_private *i915, +parse_psr(struct intel_display *display, struct intel_panel *panel) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_psr *psr; const struct psr_table *psr_table; int panel_type = panel->vbt.panel_type; - psr = bdb_find_section(i915, BDB_PSR); + psr = bdb_find_section(display, BDB_PSR); if (!psr) { - drm_dbg_kms(&i915->drm, "No PSR BDB found.\n"); + drm_dbg_kms(display->drm, "No PSR BDB found.\n"); return; } @@ -1561,8 +1565,8 @@ parse_psr(struct drm_i915_private *i915, * New psr options 0=500us, 1=100us, 2=2500us, 3=0us * Old decimal value is wake up time in multiples of 100 us. */ - if (i915->display.vbt.version >= 205 && - (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915))) { + if (display->vbt.version >= 205 && + (DISPLAY_VER(display) >= 9 && !IS_BROXTON(i915))) { switch (psr_table->tp1_wakeup_time) { case 0: panel->vbt.psr.tp1_wakeup_time_us = 500; @@ -1574,7 +1578,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp1_wakeup_time_us = 0; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT tp1 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp1_wakeup_time); fallthrough; @@ -1594,7 +1598,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp2_tp3_wakeup_time_us = 0; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT tp2_tp3 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp2_tp3_wakeup_time); fallthrough; @@ -1607,7 +1611,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp2_tp3_wakeup_time_us = psr_table->tp2_tp3_wakeup_time * 100; } - if (i915->display.vbt.version >= 226) { + if (display->vbt.version >= 226) { u32 wakeup_time = psr->psr2_tp2_tp3_wakeup_time; wakeup_time = panel_bits(wakeup_time, panel_type, 2); @@ -1633,13 +1637,13 @@ parse_psr(struct drm_i915_private *i915, } } -static void parse_dsi_backlight_ports(struct drm_i915_private *i915, +static void parse_dsi_backlight_ports(struct intel_display *display, struct intel_panel *panel, enum port port) { - enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C; + enum port port_bc = DISPLAY_VER(display) >= 11 ? PORT_B : PORT_C; - if (!panel->vbt.dsi.config->dual_link || i915->display.vbt.version < 197) { + if (!panel->vbt.dsi.config->dual_link || display->vbt.version < 197) { panel->vbt.dsi.bl_ports = BIT(port); if (panel->vbt.dsi.config->cabc_supported) panel->vbt.dsi.cabc_ports = BIT(port); @@ -1679,7 +1683,7 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, } static void -parse_mipi_config(struct drm_i915_private *i915, +parse_mipi_config(struct intel_display *display, struct intel_panel *panel) { const struct bdb_mipi_config *start; @@ -1689,19 +1693,19 @@ parse_mipi_config(struct drm_i915_private *i915, enum port port; /* parse MIPI blocks only if LFP type is MIPI */ - if (!intel_bios_is_dsi_present(i915, &port)) + if (!intel_bios_is_dsi_present(display, &port)) return; /* Initialize this to undefined indicating no generic MIPI support */ panel->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID; - start = bdb_find_section(i915, BDB_MIPI_CONFIG); + start = bdb_find_section(display, BDB_MIPI_CONFIG); if (!start) { - drm_dbg_kms(&i915->drm, "No MIPI config BDB found"); + drm_dbg_kms(display->drm, "No MIPI config BDB found"); return; } - drm_dbg(&i915->drm, "Found MIPI Config block, panel index = %d\n", + drm_dbg(display->drm, "Found MIPI Config block, panel index = %d\n", panel_type); /* @@ -1722,7 +1726,7 @@ parse_mipi_config(struct drm_i915_private *i915, return; } - parse_dsi_backlight_ports(i915, panel, port); + parse_dsi_backlight_ports(display, panel, port); /* FIXME is the 90 vs. 270 correct? */ switch (config->rotation) { @@ -1754,7 +1758,7 @@ parse_mipi_config(struct drm_i915_private *i915, /* Find the sequence block and size for the given panel. */ static const u8 * -find_panel_sequence_block(struct drm_i915_private *i915, +find_panel_sequence_block(struct intel_display *display, const struct bdb_mipi_sequence *sequence, u16 panel_id, u32 *seq_size) { @@ -1772,7 +1776,8 @@ find_panel_sequence_block(struct drm_i915_private *i915, for (i = 0; i < MAX_MIPI_CONFIGURATIONS && index < total; i++) { if (index + header_size > total) { - drm_err(&i915->drm, "Invalid sequence block (header)\n"); + drm_err(display->drm, + "Invalid sequence block (header)\n"); return NULL; } @@ -1785,7 +1790,7 @@ find_panel_sequence_block(struct drm_i915_private *i915, index += header_size; if (index + current_size > total) { - drm_err(&i915->drm, "Invalid sequence block\n"); + drm_err(display->drm, "Invalid sequence block\n"); return NULL; } @@ -1797,12 +1802,13 @@ find_panel_sequence_block(struct drm_i915_private *i915, index += current_size; } - drm_err(&i915->drm, "Sequence block detected but no valid configuration\n"); + drm_err(display->drm, + "Sequence block detected but no valid configuration\n"); return NULL; } -static int goto_next_sequence(struct drm_i915_private *i915, +static int goto_next_sequence(struct intel_display *display, const u8 *data, int index, int total) { u16 len; @@ -1833,7 +1839,7 @@ static int goto_next_sequence(struct drm_i915_private *i915, len = *(data + index + 6) + 7; break; default: - drm_err(&i915->drm, "Unknown operation byte\n"); + drm_err(display->drm, "Unknown operation byte\n"); return 0; } } @@ -1841,7 +1847,7 @@ static int goto_next_sequence(struct drm_i915_private *i915, return 0; } -static int goto_next_sequence_v3(struct drm_i915_private *i915, +static int goto_next_sequence_v3(struct intel_display *display, const u8 *data, int index, int total) { int seq_end; @@ -1853,7 +1859,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, * checking on the structure. */ if (total < 5) { - drm_err(&i915->drm, "Too small sequence size\n"); + drm_err(display->drm, "Too small sequence size\n"); return 0; } @@ -1870,7 +1876,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, seq_end = index + size_of_sequence; if (seq_end > total) { - drm_err(&i915->drm, "Invalid sequence size\n"); + drm_err(display->drm, "Invalid sequence size\n"); return 0; } @@ -1880,7 +1886,8 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, if (operation_byte == MIPI_SEQ_ELEM_END) { if (index != seq_end) { - drm_err(&i915->drm, "Invalid element structure\n"); + drm_err(display->drm, + "Invalid element structure\n"); return 0; } return index; @@ -1902,7 +1909,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, case MIPI_SEQ_ELEM_PMIC: break; default: - drm_err(&i915->drm, "Unknown operation byte %u\n", + drm_err(display->drm, "Unknown operation byte %u\n", operation_byte); break; } @@ -1915,13 +1922,13 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, * Get len of pre-fixed deassert fragment from a v1 init OTP sequence, * skip all delay + gpio operands and stop at the first DSI packet op. */ -static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, +static int get_init_otp_deassert_fragment_len(struct intel_display *display, struct intel_panel *panel) { const u8 *data = panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP]; int index, len; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !data || panel->vbt.dsi.seq_version != 1)) return 0; @@ -1950,7 +1957,7 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. */ -static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, +static void vlv_fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { u8 *init_otp; @@ -1968,11 +1975,11 @@ static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, return; /* The deassert-sequence ends at the first DSI packet */ - len = get_init_otp_deassert_fragment_len(i915, panel); + len = get_init_otp_deassert_fragment_len(display, panel); if (!len) return; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Using init OTP fragment to deassert reset\n"); /* Copy the fragment, update seq byte and terminate it */ @@ -2005,29 +2012,32 @@ static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, * or examine the contents of the sequences to * avoid false positives? */ -static void icl_fixup_mipi_sequences(struct drm_i915_private *i915, +static void icl_fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] && panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) { - drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); + drm_dbg_kms(display->drm, + "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP], panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]); } } -static void fixup_mipi_sequences(struct drm_i915_private *i915, +static void fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { - if (DISPLAY_VER(i915) >= 11) - icl_fixup_mipi_sequences(i915, panel); + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 11) + icl_fixup_mipi_sequences(display, panel); else if (IS_VALLEYVIEW(i915)) - vlv_fixup_mipi_sequences(i915, panel); + vlv_fixup_mipi_sequences(display, panel); } static void -parse_mipi_sequence(struct drm_i915_private *i915, +parse_mipi_sequence(struct intel_display *display, struct intel_panel *panel) { int panel_type = panel->vbt.panel_type; @@ -2041,25 +2051,25 @@ parse_mipi_sequence(struct drm_i915_private *i915, if (panel->vbt.dsi.panel_id != MIPI_DSI_GENERIC_PANEL_ID) return; - sequence = bdb_find_section(i915, BDB_MIPI_SEQUENCE); + sequence = bdb_find_section(display, BDB_MIPI_SEQUENCE); if (!sequence) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No MIPI Sequence found, parsing complete\n"); return; } /* Fail gracefully for forward incompatible sequence block. */ if (sequence->version >= 4) { - drm_err(&i915->drm, + drm_err(display->drm, "Unable to parse MIPI Sequence Block v%u\n", sequence->version); return; } - drm_dbg(&i915->drm, "Found MIPI sequence block v%u\n", + drm_dbg(display->drm, "Found MIPI sequence block v%u\n", sequence->version); - seq_data = find_panel_sequence_block(i915, sequence, panel_type, &seq_size); + seq_data = find_panel_sequence_block(display, sequence, panel_type, &seq_size); if (!seq_data) return; @@ -2074,24 +2084,24 @@ parse_mipi_sequence(struct drm_i915_private *i915, break; if (seq_id >= MIPI_SEQ_MAX) { - drm_err(&i915->drm, "Unknown sequence %u\n", + drm_err(display->drm, "Unknown sequence %u\n", seq_id); goto err; } /* Log about presence of sequences we won't run. */ if (seq_id == MIPI_SEQ_TEAR_ON || seq_id == MIPI_SEQ_TEAR_OFF) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported sequence %u\n", seq_id); panel->vbt.dsi.sequence[seq_id] = data + index; if (sequence->version >= 3) - index = goto_next_sequence_v3(i915, data, index, seq_size); + index = goto_next_sequence_v3(display, data, index, seq_size); else - index = goto_next_sequence(i915, data, index, seq_size); + index = goto_next_sequence(display, data, index, seq_size); if (!index) { - drm_err(&i915->drm, "Invalid sequence %u\n", + drm_err(display->drm, "Invalid sequence %u\n", seq_id); goto err; } @@ -2101,9 +2111,9 @@ parse_mipi_sequence(struct drm_i915_private *i915, panel->vbt.dsi.size = seq_size; panel->vbt.dsi.seq_version = sequence->version; - fixup_mipi_sequences(i915, panel); + fixup_mipi_sequences(display, panel); - drm_dbg(&i915->drm, "MIPI related VBT parsing complete\n"); + drm_dbg(display->drm, "MIPI related VBT parsing complete\n"); return; err: @@ -2112,47 +2122,47 @@ err: } static void -parse_compression_parameters(struct drm_i915_private *i915) +parse_compression_parameters(struct intel_display *display) { const struct bdb_compression_parameters *params; struct intel_bios_encoder_data *devdata; u16 block_size; int index; - if (i915->display.vbt.version < 198) + if (display->vbt.version < 198) return; - params = bdb_find_section(i915, BDB_COMPRESSION_PARAMETERS); + params = bdb_find_section(display, BDB_COMPRESSION_PARAMETERS); if (params) { /* Sanity checks */ if (params->entry_size != sizeof(params->data[0])) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: unsupported compression param entry size\n"); return; } block_size = get_blocksize(params); if (block_size < sizeof(*params)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: expected 16 compression param entries\n"); return; } } - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; if (!child->compression_enable) continue; if (!params) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: compression params not available\n"); continue; } if (child->compression_method_cps) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: CPS compression not supported\n"); continue; } @@ -2164,12 +2174,12 @@ parse_compression_parameters(struct drm_i915_private *i915) } } -static u8 translate_iboost(struct drm_i915_private *i915, u8 val) +static u8 translate_iboost(struct intel_display *display, u8 val) { static const u8 mapping[] = { 1, 3, 7 }; /* See VBT spec */ if (val >= ARRAY_SIZE(mapping)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported I_boost value found in VBT (%d), display may not work properly\n", val); return 0; } @@ -2226,8 +2236,9 @@ static const u8 adlp_ddc_pin_map[] = { [GMBUS_PIN_12_TC4_ICP] = ADLP_DDC_BUS_PORT_TC4, }; -static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) +static u8 map_ddc_pin(struct intel_display *display, u8 vbt_pin) { + struct drm_i915_private *i915 = to_i915(display->drm); const u8 *ddc_pin_map; int i, n_entries; @@ -2242,7 +2253,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(i915) == 9) { + } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(display) == 9) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { @@ -2261,7 +2272,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) return i; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignoring alternate pin: VBT claims DDC pin %d, which is not valid for this platform\n", vbt_pin); return 0; @@ -2319,9 +2330,10 @@ static enum port __dvo_port_to_port(int n_ports, int n_dvo, return PORT_NONE; } -static enum port dvo_port_to_port(struct drm_i915_private *i915, +static enum port dvo_port_to_port(struct intel_display *display, u8 dvo_port) { + struct drm_i915_private *i915 = to_i915(display->drm); /* * Each DDI port can have more than one value on the "DVO Port" field, * so look for all the possible values for each port. @@ -2373,7 +2385,7 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, [PORT_TC4] = { DVO_PORT_HDMII, DVO_PORT_DPI, -1 }, }; - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return __dvo_port_to_port(ARRAY_SIZE(xelpd_port_mapping), ARRAY_SIZE(xelpd_port_mapping[0]), xelpd_port_mapping, @@ -2396,13 +2408,13 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, } static enum port -dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) +dsi_dvo_port_to_port(struct intel_display *display, u8 dvo_port) { switch (dvo_port) { case DVO_PORT_MIPIA: return PORT_A; case DVO_PORT_MIPIC: - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) return PORT_B; else return PORT_C; @@ -2413,13 +2425,13 @@ dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) enum port intel_bios_encoder_port(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; const struct child_device_config *child = &devdata->child; enum port port; - port = dvo_port_to_port(i915, child->dvo_port); - if (port == PORT_NONE && DISPLAY_VER(i915) >= 11) - port = dsi_dvo_port_to_port(i915, child->dvo_port); + port = dvo_port_to_port(display, child->dvo_port); + if (port == PORT_NONE && DISPLAY_VER(display) >= 11) + port = dsi_dvo_port_to_port(display, child->dvo_port); return port; } @@ -2464,10 +2476,10 @@ static int parse_bdb_216_dp_max_link_rate(const int vbt_max_link_rate) int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 216) + if (!devdata || devdata->display->vbt.version < 216) return 0; - if (devdata->i915->display.vbt.version >= 230) + if (devdata->display->vbt.version >= 230) return parse_bdb_230_dp_max_link_rate(devdata->child.dp_max_link_rate); else return parse_bdb_216_dp_max_link_rate(devdata->child.dp_max_link_rate); @@ -2475,7 +2487,7 @@ int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 244) + if (!devdata || devdata->display->vbt.version < 244) return 0; return devdata->child.dp_max_lane_count + 1; @@ -2484,10 +2496,10 @@ int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) static void sanitize_device_type(struct intel_bios_encoder_data *devdata, enum port port) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; bool is_hdmi; - if (port != PORT_A || DISPLAY_VER(i915) >= 12) + if (port != PORT_A || DISPLAY_VER(display) >= 12) return; if (!intel_bios_encoder_supports_dvi(devdata)) @@ -2495,7 +2507,7 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata, is_hdmi = intel_bios_encoder_supports_hdmi(devdata); - drm_dbg_kms(&i915->drm, "VBT claims port A supports DVI%s, ignoring\n", + drm_dbg_kms(display->drm, "VBT claims port A supports DVI%s, ignoring\n", is_hdmi ? "/HDMI" : ""); devdata->child.device_type &= ~DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -2505,7 +2517,8 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata, static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata, enum port port) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (!intel_bios_encoder_supports_dvi(devdata)) return; @@ -2516,7 +2529,8 @@ static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata, * up to 11, whereas the BDW max is 9. */ if (IS_BROADWELL(i915) && devdata->child.hdmi_level_shifter_value > 9) { - drm_dbg_kms(&i915->drm, "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n", + drm_dbg_kms(display->drm, + "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n", port_name(port), devdata->child.hdmi_level_shifter_value, 9); devdata->child.hdmi_level_shifter_value = 9; @@ -2564,14 +2578,14 @@ intel_bios_encoder_supports_dsi(const struct intel_bios_encoder_data *devdata) bool intel_bios_encoder_is_lspcon(const struct intel_bios_encoder_data *devdata) { - return devdata && HAS_LSPCON(devdata->i915) && devdata->child.lspcon; + return devdata && HAS_LSPCON(devdata->display) && devdata->child.lspcon; } /* This is an index in the HDMI/DVI DDI buffer translation table, or -1 */ int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 158 || - DISPLAY_VER(devdata->i915) >= 14) + if (!devdata || devdata->display->vbt.version < 158 || + DISPLAY_VER(devdata->display) >= 14) return -1; return devdata->child.hdmi_level_shifter_value; @@ -2579,7 +2593,7 @@ int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata) int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 204) + if (!devdata || devdata->display->vbt.version < 204) return 0; switch (devdata->child.hdmi_max_data_rate) { @@ -2601,8 +2615,9 @@ int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata } } -static bool is_port_valid(struct drm_i915_private *i915, enum port port) +static bool is_port_valid(struct intel_display *display, enum port port) { + struct drm_i915_private *i915 = to_i915(display->drm); /* * On some ICL SKUs port F is not present, but broken VBTs mark * the port as present. Only try to initialize port F for the @@ -2616,7 +2631,7 @@ static bool is_port_valid(struct drm_i915_private *i915, enum port port) static void print_ddi_port(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; const struct child_device_config *child = &devdata->child; bool is_dvi, is_hdmi, is_dp, is_edp, is_dsi, is_crt, supports_typec_usb, supports_tbt; int dp_boost_level, dp_max_link_rate, hdmi_boost_level, hdmi_level_shift, max_tmds_clock; @@ -2636,7 +2651,7 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) supports_typec_usb = intel_bios_encoder_supports_typec_usb(devdata); supports_tbt = intel_bios_encoder_supports_tbt(devdata); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d DSI:%d DP++:%d LSPCON:%d USB-Type-C:%d TBT:%d DSC:%d\n", port_name(port), is_crt, is_dvi, is_hdmi, is_dp, is_edp, is_dsi, intel_bios_encoder_supports_dp_dual_mode(devdata), @@ -2646,33 +2661,33 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) hdmi_level_shift = intel_bios_hdmi_level_shift(devdata); if (hdmi_level_shift >= 0) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI level shift: %d\n", port_name(port), hdmi_level_shift); } max_tmds_clock = intel_bios_hdmi_max_tmds_clock(devdata); if (max_tmds_clock) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI max TMDS clock: %d kHz\n", port_name(port), max_tmds_clock); /* I_boost config for SKL and above */ dp_boost_level = intel_bios_dp_boost_level(devdata); if (dp_boost_level) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT (e)DP boost level: %d\n", port_name(port), dp_boost_level); hdmi_boost_level = intel_bios_hdmi_boost_level(devdata); if (hdmi_boost_level) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI boost level: %d\n", port_name(port), hdmi_boost_level); dp_max_link_rate = intel_bios_dp_max_link_rate(devdata); if (dp_max_link_rate) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT DP max link rate: %d\n", port_name(port), dp_max_link_rate); @@ -2680,22 +2695,22 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) * FIXME need to implement support for VBT * vswing/preemph tables should this ever trigger. */ - drm_WARN(&i915->drm, child->use_vbt_vswing, + drm_WARN(display->drm, child->use_vbt_vswing, "Port %c asks to use VBT vswing/preemph tables\n", port_name(port)); } static void parse_ddi_port(struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; enum port port; port = intel_bios_encoder_port(devdata); if (port == PORT_NONE) return; - if (!is_port_valid(i915, port)) { - drm_dbg_kms(&i915->drm, + if (!is_port_valid(display, port)) { + drm_dbg_kms(display->drm, "VBT reports port %c as supported, but that can't be true: skipping\n", port_name(port)); return; @@ -2705,22 +2720,24 @@ static void parse_ddi_port(struct intel_bios_encoder_data *devdata) sanitize_hdmi_level_shift(devdata, port); } -static bool has_ddi_port_info(struct drm_i915_private *i915) +static bool has_ddi_port_info(struct intel_display *display) { - return DISPLAY_VER(i915) >= 5 || IS_G4X(i915); + struct drm_i915_private *i915 = to_i915(display->drm); + + return DISPLAY_VER(display) >= 5 || IS_G4X(i915); } -static void parse_ddi_ports(struct drm_i915_private *i915) +static void parse_ddi_ports(struct intel_display *display) { struct intel_bios_encoder_data *devdata; - if (!has_ddi_port_info(i915)) + if (!has_ddi_port_info(display)) return; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) + list_for_each_entry(devdata, &display->vbt.display_devices, node) parse_ddi_port(devdata); - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) + list_for_each_entry(devdata, &display->vbt.display_devices, node) print_ddi_port(devdata); } @@ -2746,27 +2763,27 @@ static int child_device_expected_size(u16 version) return 22; } -static bool child_device_size_valid(struct drm_i915_private *i915, int size) +static bool child_device_size_valid(struct intel_display *display, int size) { int expected_size; - expected_size = child_device_expected_size(i915->display.vbt.version); + expected_size = child_device_expected_size(display->vbt.version); if (expected_size < 0) { expected_size = sizeof(struct child_device_config); - drm_dbg(&i915->drm, + drm_dbg(display->drm, "Expected child device config size for VBT version %u not known; assuming %d\n", - i915->display.vbt.version, expected_size); + display->vbt.version, expected_size); } /* Flag an error for unexpected size, but continue anyway. */ if (size != expected_size) - drm_err(&i915->drm, + drm_err(display->drm, "Unexpected child device config size %d (expected %d for VBT version %u)\n", - size, expected_size, i915->display.vbt.version); + size, expected_size, display->vbt.version); /* The legacy sized child device config is the minimum we need. */ if (size < LEGACY_CHILD_DEVICE_CONFIG_SIZE) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Child device config size %d is too small.\n", size); return false; @@ -2776,8 +2793,9 @@ static bool child_device_size_valid(struct drm_i915_private *i915, int size) } static void -parse_general_definitions(struct drm_i915_private *i915) +parse_general_definitions(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_general_definitions *defs; struct intel_bios_encoder_data *devdata; const struct child_device_config *child; @@ -2785,27 +2803,27 @@ parse_general_definitions(struct drm_i915_private *i915) u16 block_size; int bus_pin; - defs = bdb_find_section(i915, BDB_GENERAL_DEFINITIONS); + defs = bdb_find_section(display, BDB_GENERAL_DEFINITIONS); if (!defs) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No general definition block is found, no devices defined.\n"); return; } block_size = get_blocksize(defs); if (block_size < sizeof(*defs)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "General definitions block too small (%u)\n", block_size); return; } bus_pin = defs->crt_ddc_gmbus_pin; - drm_dbg_kms(&i915->drm, "crt_ddc_bus_pin: %d\n", bus_pin); + drm_dbg_kms(display->drm, "crt_ddc_bus_pin: %d\n", bus_pin); if (intel_gmbus_is_valid_pin(i915, bus_pin)) - i915->display.vbt.crt_ddc_pin = bus_pin; + display->vbt.crt_ddc_pin = bus_pin; - if (!child_device_size_valid(i915, defs->child_dev_size)) + if (!child_device_size_valid(display, defs->child_dev_size)) return; /* get the number of child device */ @@ -2816,7 +2834,7 @@ parse_general_definitions(struct drm_i915_private *i915) if (!child->device_type) continue; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found VBT child device with type 0x%x\n", child->device_type); @@ -2824,7 +2842,7 @@ parse_general_definitions(struct drm_i915_private *i915) if (!devdata) break; - devdata->i915 = i915; + devdata->display = display; /* * Copy as much as we know (sizeof) and is available @@ -2834,37 +2852,39 @@ parse_general_definitions(struct drm_i915_private *i915) memcpy(&devdata->child, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - list_add_tail(&devdata->node, &i915->display.vbt.display_devices); + list_add_tail(&devdata->node, &display->vbt.display_devices); } - if (list_empty(&i915->display.vbt.display_devices)) - drm_dbg_kms(&i915->drm, + if (list_empty(&display->vbt.display_devices)) + drm_dbg_kms(display->drm, "no child dev is parsed from VBT\n"); } /* Common defaults which may be overridden by VBT. */ static void -init_vbt_defaults(struct drm_i915_private *i915) +init_vbt_defaults(struct intel_display *display) { - i915->display.vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; + struct drm_i915_private *i915 = to_i915(display->drm); + + display->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; /* general features */ - i915->display.vbt.int_tv_support = 1; - i915->display.vbt.int_crt_support = 1; + display->vbt.int_tv_support = 1; + display->vbt.int_crt_support = 1; /* driver features */ - i915->display.vbt.int_lvds_support = 1; + display->vbt.int_lvds_support = 1; /* Default to using SSC */ - i915->display.vbt.lvds_use_ssc = 1; + display->vbt.lvds_use_ssc = 1; /* * Core/SandyBridge/IvyBridge use alternative (120MHz) reference * clock for LVDS. */ - i915->display.vbt.lvds_ssc_freq = intel_bios_ssc_frequency(i915, - !HAS_PCH_SPLIT(i915)); - drm_dbg_kms(&i915->drm, "Set default to SSC at %d kHz\n", - i915->display.vbt.lvds_ssc_freq); + display->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(display, + !HAS_PCH_SPLIT(i915)); + drm_dbg_kms(display->drm, "Set default to SSC at %d kHz\n", + display->vbt.lvds_ssc_freq); } /* Common defaults which may be overridden by VBT. */ @@ -2880,12 +2900,13 @@ init_vbt_panel_defaults(struct intel_panel *panel) /* Defaults to initialize only if there is no VBT. */ static void -init_vbt_missing_defaults(struct drm_i915_private *i915) +init_vbt_missing_defaults(struct intel_display *display) { - unsigned int ports = DISPLAY_RUNTIME_INFO(i915)->port_mask; + struct drm_i915_private *i915 = to_i915(display->drm); + unsigned int ports = DISPLAY_RUNTIME_INFO(display)->port_mask; enum port port; - if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915)) + if (!HAS_DDI(display) && !IS_CHERRYVIEW(i915)) return; for_each_port_masked(port, ports) { @@ -2905,7 +2926,7 @@ init_vbt_missing_defaults(struct drm_i915_private *i915) if (!devdata) break; - devdata->i915 = i915; + devdata->display = display; child = &devdata->child; if (port == PORT_F) @@ -2924,15 +2945,15 @@ init_vbt_missing_defaults(struct drm_i915_private *i915) if (port == PORT_A) child->device_type |= DEVICE_TYPE_INTERNAL_CONNECTOR; - list_add_tail(&devdata->node, &i915->display.vbt.display_devices); + list_add_tail(&devdata->node, &display->vbt.display_devices); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Generating default VBT child device with type 0x04%x on port %c\n", child->device_type, port_name(port)); } /* Bypass some minimum baseline VBT version checks */ - i915->display.vbt.version = 155; + display->vbt.version = 155; } static const struct bdb_header *get_bdb_header(const struct vbt_header *vbt) @@ -2944,13 +2965,13 @@ static const struct bdb_header *get_bdb_header(const struct vbt_header *vbt) /** * intel_bios_is_valid_vbt - does the given buffer contain a valid VBT - * @i915: the device + * @display: display device * @buf: pointer to a buffer to validate * @size: size of the buffer * * Returns true on valid VBT. */ -bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, +bool intel_bios_is_valid_vbt(struct intel_display *display, const void *buf, size_t size) { const struct vbt_header *vbt = buf; @@ -2960,17 +2981,18 @@ bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, return false; if (sizeof(struct vbt_header) > size) { - drm_dbg_kms(&i915->drm, "VBT header incomplete\n"); + drm_dbg_kms(display->drm, "VBT header incomplete\n"); return false; } if (memcmp(vbt->signature, "$VBT", 4)) { - drm_dbg_kms(&i915->drm, "VBT invalid signature\n"); + drm_dbg_kms(display->drm, "VBT invalid signature\n"); return false; } if (vbt->vbt_size > size) { - drm_dbg_kms(&i915->drm, "VBT incomplete (vbt_size overflows)\n"); + drm_dbg_kms(display->drm, + "VBT incomplete (vbt_size overflows)\n"); return false; } @@ -2980,48 +3002,48 @@ bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, vbt->bdb_offset, sizeof(struct bdb_header), size)) { - drm_dbg_kms(&i915->drm, "BDB header incomplete\n"); + drm_dbg_kms(display->drm, "BDB header incomplete\n"); return false; } bdb = get_bdb_header(vbt); if (range_overflows_t(size_t, vbt->bdb_offset, bdb->bdb_size, size)) { - drm_dbg_kms(&i915->drm, "BDB incomplete\n"); + drm_dbg_kms(display->drm, "BDB incomplete\n"); return false; } return vbt; } -static struct vbt_header *firmware_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *firmware_get_vbt(struct intel_display *display, size_t *size) { struct vbt_header *vbt = NULL; const struct firmware *fw = NULL; - const char *name = i915->display.params.vbt_firmware; + const char *name = display->params.vbt_firmware; int ret; if (!name || !*name) return NULL; - ret = request_firmware(&fw, name, i915->drm.dev); + ret = request_firmware(&fw, name, display->drm->dev); if (ret) { - drm_err(&i915->drm, + drm_err(display->drm, "Requesting VBT firmware \"%s\" failed (%d)\n", name, ret); return NULL; } - if (intel_bios_is_valid_vbt(i915, fw->data, fw->size)) { + if (intel_bios_is_valid_vbt(display, fw->data, fw->size)) { vbt = kmemdup(fw->data, fw->size, GFP_KERNEL); if (vbt) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found valid VBT firmware \"%s\"\n", name); if (size) *size = fw->size; } } else { - drm_dbg_kms(&i915->drm, "Invalid VBT firmware \"%s\"\n", + drm_dbg_kms(display->drm, "Invalid VBT firmware \"%s\"\n", name); } @@ -3037,9 +3059,10 @@ static u32 intel_spi_read(struct intel_uncore *uncore, u32 offset) return intel_uncore_read(uncore, PRIMARY_SPI_TRIGGER); } -static struct vbt_header *spi_oprom_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *spi_oprom_get_vbt(struct intel_display *display, size_t *size) { + struct drm_i915_private *i915 = to_i915(display->drm); u32 count, data, found, store = 0; u32 static_region, oprom_offset; u32 oprom_size = 0x200000; @@ -3076,10 +3099,10 @@ static struct vbt_header *spi_oprom_get_vbt(struct drm_i915_private *i915, for (count = 0; count < vbt_size; count += 4) *(vbt + store++) = intel_spi_read(&i915->uncore, found + count); - if (!intel_bios_is_valid_vbt(i915, vbt, vbt_size)) + if (!intel_bios_is_valid_vbt(display, vbt, vbt_size)) goto err_free_vbt; - drm_dbg_kms(&i915->drm, "Found valid VBT in SPI flash\n"); + drm_dbg_kms(display->drm, "Found valid VBT in SPI flash\n"); if (size) *size = vbt_size; @@ -3092,10 +3115,10 @@ err_not_found: return NULL; } -static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *oprom_get_vbt(struct intel_display *display, size_t *sizep) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); void __iomem *p = NULL, *oprom; struct vbt_header *vbt; u16 vbt_size; @@ -3119,13 +3142,13 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, goto err_unmap_oprom; if (sizeof(struct vbt_header) > size) { - drm_dbg(&i915->drm, "VBT header incomplete\n"); + drm_dbg(display->drm, "VBT header incomplete\n"); goto err_unmap_oprom; } vbt_size = ioread16(p + offsetof(struct vbt_header, vbt_size)); if (vbt_size > size) { - drm_dbg(&i915->drm, + drm_dbg(display->drm, "VBT incomplete (vbt_size overflows)\n"); goto err_unmap_oprom; } @@ -3137,7 +3160,7 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, memcpy_fromio(vbt, p, vbt_size); - if (!intel_bios_is_valid_vbt(i915, vbt, vbt_size)) + if (!intel_bios_is_valid_vbt(display, vbt, vbt_size)) goto err_free_vbt; pci_unmap_rom(pdev, oprom); @@ -3145,7 +3168,7 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, if (sizep) *sizep = vbt_size; - drm_dbg_kms(&i915->drm, "Found valid VBT in PCI ROM\n"); + drm_dbg_kms(display->drm, "Found valid VBT in PCI ROM\n"); return vbt; @@ -3157,14 +3180,14 @@ err_unmap_oprom: return NULL; } -static const struct vbt_header *intel_bios_get_vbt(struct drm_i915_private *i915, +static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display, size_t *sizep) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); const struct vbt_header *vbt = NULL; intel_wakeref_t wakeref; - vbt = firmware_get_vbt(i915, sizep); + vbt = firmware_get_vbt(display, sizep); if (!vbt) vbt = intel_opregion_get_vbt(display, sizep); @@ -3175,76 +3198,77 @@ static const struct vbt_header *intel_bios_get_vbt(struct drm_i915_private *i915 */ if (!vbt && IS_DGFX(i915)) with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vbt = spi_oprom_get_vbt(i915, sizep); + vbt = spi_oprom_get_vbt(display, sizep); if (!vbt) with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vbt = oprom_get_vbt(i915, sizep); + vbt = oprom_get_vbt(display, sizep); return vbt; } /** * intel_bios_init - find VBT and initialize settings from the BIOS - * @i915: i915 device instance + * @display: display device instance * * Parse and initialize settings from the Video BIOS Tables (VBT). If the VBT * was not found in ACPI OpRegion, try to find it in PCI ROM first. Also * initialize some defaults if the VBT is not present at all. */ -void intel_bios_init(struct drm_i915_private *i915) +void intel_bios_init(struct intel_display *display) { const struct vbt_header *vbt; const struct bdb_header *bdb; - INIT_LIST_HEAD(&i915->display.vbt.display_devices); - INIT_LIST_HEAD(&i915->display.vbt.bdb_blocks); + INIT_LIST_HEAD(&display->vbt.display_devices); + INIT_LIST_HEAD(&display->vbt.bdb_blocks); - if (!HAS_DISPLAY(i915)) { - drm_dbg_kms(&i915->drm, + if (!HAS_DISPLAY(display)) { + drm_dbg_kms(display->drm, "Skipping VBT init due to disabled display.\n"); return; } - init_vbt_defaults(i915); + init_vbt_defaults(display); - vbt = intel_bios_get_vbt(i915, NULL); + vbt = intel_bios_get_vbt(display, NULL); if (!vbt) goto out; bdb = get_bdb_header(vbt); - i915->display.vbt.version = bdb->version; + display->vbt.version = bdb->version; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT signature \"%.*s\", BDB version %d\n", - (int)sizeof(vbt->signature), vbt->signature, i915->display.vbt.version); + (int)sizeof(vbt->signature), vbt->signature, + display->vbt.version); - init_bdb_blocks(i915, bdb); + init_bdb_blocks(display, bdb); /* Grab useful general definitions */ - parse_general_features(i915); - parse_general_definitions(i915); - parse_driver_features(i915); + parse_general_features(display); + parse_general_definitions(display); + parse_driver_features(display); /* Depends on child device list */ - parse_compression_parameters(i915); + parse_compression_parameters(display); out: if (!vbt) { - drm_info(&i915->drm, + drm_info(display->drm, "Failed to find VBIOS tables (VBT)\n"); - init_vbt_missing_defaults(i915); + init_vbt_missing_defaults(display); } /* Further processing on pre-parsed or generated child device data */ - parse_sdvo_device_mapping(i915); - parse_ddi_ports(i915); + parse_sdvo_device_mapping(display); + parse_ddi_ports(display); kfree(vbt); } -static void intel_bios_init_panel(struct drm_i915_private *i915, +static void intel_bios_init_panel(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, @@ -3252,63 +3276,64 @@ static void intel_bios_init_panel(struct drm_i915_private *i915, { /* already have it? */ if (panel->vbt.panel_type >= 0) { - drm_WARN_ON(&i915->drm, !use_fallback); + drm_WARN_ON(display->drm, !use_fallback); return; } - panel->vbt.panel_type = get_panel_type(i915, devdata, + panel->vbt.panel_type = get_panel_type(display, devdata, drm_edid, use_fallback); if (panel->vbt.panel_type < 0) { - drm_WARN_ON(&i915->drm, use_fallback); + drm_WARN_ON(display->drm, use_fallback); return; } init_vbt_panel_defaults(panel); - parse_panel_options(i915, panel); - parse_generic_dtd(i915, panel); - parse_lfp_data(i915, panel); - parse_lfp_backlight(i915, panel); - parse_sdvo_lvds_data(i915, panel); - parse_panel_driver_features(i915, panel); - parse_power_conservation_features(i915, panel); - parse_edp(i915, panel); - parse_psr(i915, panel); - parse_mipi_config(i915, panel); - parse_mipi_sequence(i915, panel); + parse_panel_options(display, panel); + parse_generic_dtd(display, panel); + parse_lfp_data(display, panel); + parse_lfp_backlight(display, panel); + parse_sdvo_lvds_data(display, panel); + parse_panel_driver_features(display, panel); + parse_power_conservation_features(display, panel); + parse_edp(display, panel); + parse_psr(display, panel); + parse_mipi_config(display, panel); + parse_mipi_sequence(display, panel); } -void intel_bios_init_panel_early(struct drm_i915_private *i915, +void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata) { - intel_bios_init_panel(i915, panel, devdata, NULL, false); + intel_bios_init_panel(display, panel, devdata, NULL, false); } -void intel_bios_init_panel_late(struct drm_i915_private *i915, +void intel_bios_init_panel_late(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid) { - intel_bios_init_panel(i915, panel, devdata, drm_edid, true); + intel_bios_init_panel(display, panel, devdata, drm_edid, true); } /** * intel_bios_driver_remove - Free any resources allocated by intel_bios_init() - * @i915: i915 device instance + * @display: display device instance */ -void intel_bios_driver_remove(struct drm_i915_private *i915) +void intel_bios_driver_remove(struct intel_display *display) { struct intel_bios_encoder_data *devdata, *nd; struct bdb_block_entry *entry, *ne; - list_for_each_entry_safe(devdata, nd, &i915->display.vbt.display_devices, node) { + list_for_each_entry_safe(devdata, nd, &display->vbt.display_devices, + node) { list_del(&devdata->node); kfree(devdata->dsc); kfree(devdata); } - list_for_each_entry_safe(entry, ne, &i915->display.vbt.bdb_blocks, node) { + list_for_each_entry_safe(entry, ne, &display->vbt.bdb_blocks, node) { list_del(&entry->node); kfree(entry); } @@ -3332,22 +3357,22 @@ void intel_bios_fini_panel(struct intel_panel *panel) /** * intel_bios_is_tv_present - is integrated TV present in VBT - * @i915: i915 device instance + * @display: display device instance * * Return true if TV is present. If no child devices were parsed from VBT, * assume TV is present. */ -bool intel_bios_is_tv_present(struct drm_i915_private *i915) +bool intel_bios_is_tv_present(struct intel_display *display) { const struct intel_bios_encoder_data *devdata; - if (!i915->display.vbt.int_tv_support) + if (!display->vbt.int_tv_support) return false; - if (list_empty(&i915->display.vbt.display_devices)) + if (list_empty(&display->vbt.display_devices)) return true; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; /* @@ -3373,21 +3398,21 @@ bool intel_bios_is_tv_present(struct drm_i915_private *i915) /** * intel_bios_is_lvds_present - is LVDS present in VBT - * @i915: i915 device instance + * @display: display device instance * @i2c_pin: i2c pin for LVDS if present * * Return true if LVDS is present. If no child devices were parsed from VBT, * assume LVDS is present. */ -bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) +bool intel_bios_is_lvds_present(struct intel_display *display, u8 *i2c_pin) { - struct intel_display *display = &i915->display; + struct drm_i915_private *i915 = to_i915(display->drm); const struct intel_bios_encoder_data *devdata; - if (list_empty(&i915->display.vbt.display_devices)) + if (list_empty(&display->vbt.display_devices)) return true; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; /* If the device type is not LFP, continue. @@ -3422,25 +3447,25 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) /** * intel_bios_is_port_present - is the specified digital port present - * @i915: i915 device instance + * @display: display device instance * @port: port to check * * Return true if the device in %port is present. */ -bool intel_bios_is_port_present(struct drm_i915_private *i915, enum port port) +bool intel_bios_is_port_present(struct intel_display *display, enum port port) { const struct intel_bios_encoder_data *devdata; - if (WARN_ON(!has_ddi_port_info(i915))) + if (WARN_ON(!has_ddi_port_info(display))) return true; - if (!is_port_valid(i915, port)) + if (!is_port_valid(display, port)) return false; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; - if (dvo_port_to_port(i915, child->dvo_port) == port) + if (dvo_port_to_port(display, child->dvo_port) == port) return true; } @@ -3471,32 +3496,32 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da /** * intel_bios_is_dsi_present - is DSI present in VBT - * @i915: i915 device instance + * @display: display device instance * @port: port for DSI if present * * Return true if DSI is present, and return the port in %port. */ -bool intel_bios_is_dsi_present(struct drm_i915_private *i915, +bool intel_bios_is_dsi_present(struct intel_display *display, enum port *port) { const struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; u8 dvo_port = child->dvo_port; if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) { - drm_dbg_kms(&i915->drm, + if (dsi_dvo_port_to_port(display, dvo_port) == PORT_NONE) { + drm_dbg_kms(display->drm, "VBT has unsupported DSI port %c\n", port_name(dvo_port - DVO_PORT_MIPIA)); continue; } if (port) - *port = dsi_dvo_port_to_port(i915, dvo_port); + *port = dsi_dvo_port_to_port(display, dvo_port); return true; } @@ -3507,7 +3532,7 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, struct dsc_compression_parameters_entry *dsc, int dsc_max_bpc) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; int bpc = 8; @@ -3521,7 +3546,7 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, else if (dsc->support_8bpc && dsc_max_bpc >= 8) bpc = 8; else - drm_dbg_kms(&i915->drm, "VBT: Unsupported BPC %d for DCS\n", + drm_dbg_kms(display->drm, "VBT: Unsupported BPC %d for DCS\n", dsc_max_bpc); crtc_state->pipe_bpp = bpc * 3; @@ -3542,14 +3567,16 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, } else { /* FIXME */ if (!(dsc->slices_per_line & BIT(0))) - drm_dbg_kms(&i915->drm, "VBT: Unsupported DSC slice count for DSI\n"); + drm_dbg_kms(display->drm, + "VBT: Unsupported DSC slice count for DSI\n"); crtc_state->dsc.slice_count = 1; } if (crtc_state->hw.adjusted_mode.crtc_hdisplay % crtc_state->dsc.slice_count != 0) - drm_dbg_kms(&i915->drm, "VBT: DSC hdisplay %d not divisible by slice count %d\n", + drm_dbg_kms(display->drm, + "VBT: DSC hdisplay %d not divisible by slice count %d\n", crtc_state->hw.adjusted_mode.crtc_hdisplay, crtc_state->dsc.slice_count); @@ -3573,16 +3600,16 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int dsc_max_bpc) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); const struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) { + if (dsi_dvo_port_to_port(display, child->dvo_port) == encoder->port) { if (!devdata->dsc) return false; @@ -3642,12 +3669,13 @@ static const u8 direct_aux_ch_map[] = { [AUX_CH_I] = DP_AUX_I, /* aka AUX_CH_USBC6 */ }; -static enum aux_ch map_aux_ch(struct drm_i915_private *i915, u8 aux_channel) +static enum aux_ch map_aux_ch(struct intel_display *display, u8 aux_channel) { + struct drm_i915_private *i915 = to_i915(display->drm); const u8 *aux_ch_map; int i, n_entries; - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { aux_ch_map = adlp_aux_ch_map; n_entries = ARRAY_SIZE(adlp_aux_ch_map); } else if (IS_ALDERLAKE_S(i915)) { @@ -3666,7 +3694,7 @@ static enum aux_ch map_aux_ch(struct drm_i915_private *i915, u8 aux_channel) return i; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignoring alternate AUX CH: VBT claims AUX 0x%x, which is not valid for this platform\n", aux_channel); @@ -3678,22 +3706,22 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata) if (!devdata || !devdata->child.aux_channel) return AUX_CH_NONE; - return map_aux_ch(devdata->i915, devdata->child.aux_channel); + return map_aux_ch(devdata->display, devdata->child.aux_channel); } bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915; + struct intel_display *display; u8 aux_channel; int count = 0; if (!devdata || !devdata->child.aux_channel) return false; - i915 = devdata->i915; + display = devdata->display; aux_channel = devdata->child.aux_channel; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { if (intel_bios_encoder_supports_dp(devdata) && aux_channel == devdata->child.aux_channel) count++; @@ -3704,18 +3732,18 @@ bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devda int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) + if (!devdata || devdata->display->vbt.version < 196 || !devdata->child.iboost) return 0; - return translate_iboost(devdata->i915, devdata->child.dp_iboost_level); + return translate_iboost(devdata->display, devdata->child.dp_iboost_level); } int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) + if (!devdata || devdata->display->vbt.version < 196 || !devdata->child.iboost) return 0; - return translate_iboost(devdata->i915, devdata->child.hdmi_iboost_level); + return translate_iboost(devdata->display, devdata->child.hdmi_iboost_level); } int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata) @@ -3723,17 +3751,17 @@ int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata) if (!devdata || !devdata->child.ddc_pin) return 0; - return map_ddc_pin(devdata->i915, devdata->child.ddc_pin); + return map_ddc_pin(devdata->display, devdata->child.ddc_pin); } bool intel_bios_encoder_supports_typec_usb(const struct intel_bios_encoder_data *devdata) { - return devdata->i915->display.vbt.version >= 195 && devdata->child.dp_usb_type_c; + return devdata->display->vbt.version >= 195 && devdata->child.dp_usb_type_c; } bool intel_bios_encoder_supports_tbt(const struct intel_bios_encoder_data *devdata) { - return devdata->i915->display.vbt.version >= 209 && devdata->child.tbt; + return devdata->display->vbt.version >= 209 && devdata->child.tbt; } bool intel_bios_encoder_lane_reversal(const struct intel_bios_encoder_data *devdata) @@ -3747,11 +3775,11 @@ bool intel_bios_encoder_hpd_invert(const struct intel_bios_encoder_data *devdata } const struct intel_bios_encoder_data * -intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port) +intel_bios_encoder_data_lookup(struct intel_display *display, enum port port) { struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { if (intel_bios_encoder_port(devdata) == port) return devdata; } @@ -3759,23 +3787,23 @@ intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port) return NULL; } -void intel_bios_for_each_encoder(struct drm_i915_private *i915, - void (*func)(struct drm_i915_private *i915, +void intel_bios_for_each_encoder(struct intel_display *display, + void (*func)(struct intel_display *display, const struct intel_bios_encoder_data *devdata)) { struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) - func(i915, devdata); + list_for_each_entry(devdata, &display->vbt.display_devices, node) + func(display, devdata); } static int intel_bios_vbt_show(struct seq_file *m, void *unused) { - struct drm_i915_private *i915 = m->private; + struct intel_display *display = m->private; const void *vbt; size_t vbt_size; - vbt = intel_bios_get_vbt(i915, &vbt_size); + vbt = intel_bios_get_vbt(display, &vbt_size); if (vbt) { seq_write(m, vbt, vbt_size); @@ -3787,10 +3815,10 @@ static int intel_bios_vbt_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(intel_bios_vbt); -void intel_bios_debugfs_register(struct drm_i915_private *i915) +void intel_bios_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; debugfs_create_file("i915_vbt", 0444, minor->debugfs_root, - i915, &intel_bios_vbt_fops); + display, &intel_bios_vbt_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 06a51be4afd8..8b703f6cfe17 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -33,9 +33,9 @@ #include struct drm_edid; -struct drm_i915_private; struct intel_bios_encoder_data; struct intel_crtc_state; +struct intel_display; struct intel_encoder; struct intel_panel; enum aux_ch; @@ -232,28 +232,28 @@ struct mipi_pps_data { u16 panel_power_cycle_delay; } __packed; -void intel_bios_init(struct drm_i915_private *dev_priv); -void intel_bios_init_panel_early(struct drm_i915_private *dev_priv, +void intel_bios_init(struct intel_display *display); +void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata); -void intel_bios_init_panel_late(struct drm_i915_private *dev_priv, +void intel_bios_init_panel_late(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid); void intel_bios_fini_panel(struct intel_panel *panel); -void intel_bios_driver_remove(struct drm_i915_private *dev_priv); -bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, +void intel_bios_driver_remove(struct intel_display *display); +bool intel_bios_is_valid_vbt(struct intel_display *display, const void *buf, size_t size); -bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); -bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); -bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port); -bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port); +bool intel_bios_is_tv_present(struct intel_display *display); +bool intel_bios_is_lvds_present(struct intel_display *display, u8 *i2c_pin); +bool intel_bios_is_port_present(struct intel_display *display, enum port port); +bool intel_bios_is_dsi_present(struct intel_display *display, enum port *port); bool intel_bios_get_dsc_params(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int dsc_max_bpc); const struct intel_bios_encoder_data * -intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port); +intel_bios_encoder_data_lookup(struct intel_display *display, enum port port); bool intel_bios_encoder_supports_dvi(const struct intel_bios_encoder_data *devdata); bool intel_bios_encoder_supports_hdmi(const struct intel_bios_encoder_data *devdata); @@ -277,10 +277,10 @@ int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata); -void intel_bios_for_each_encoder(struct drm_i915_private *i915, - void (*func)(struct drm_i915_private *i915, +void intel_bios_for_each_encoder(struct intel_display *display, + void (*func)(struct intel_display *display, const struct intel_bios_encoder_data *devdata)); -void intel_bios_debugfs_register(struct drm_i915_private *i915); +void intel_bios_debugfs_register(struct intel_display *display); #endif /* _INTEL_BIOS_H_ */ diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 143d66951631..3252dab56430 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -159,9 +159,11 @@ static bool icl_combo_phy_enabled(struct drm_i915_private *dev_priv, static bool ehl_vbt_ddi_d_present(struct drm_i915_private *i915) { - bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A); - bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D); - bool dsi_present = intel_bios_is_dsi_present(i915, NULL); + struct intel_display *display = &i915->display; + + bool ddi_a_present = intel_bios_is_port_present(display, PORT_A); + bool ddi_d_present = intel_bios_is_port_present(display, PORT_D); + bool dsi_present = intel_bios_is_dsi_present(display, NULL); /* * VBT's 'dvo port' field for child devices references the DDI, not diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 85c3f806c821..926cf3751593 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4854,9 +4854,10 @@ static bool port_in_use(struct drm_i915_private *i915, enum port port) return false; } -void intel_ddi_init(struct drm_i915_private *dev_priv, +void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_digital_port *dig_port; struct intel_encoder *encoder; bool init_hdmi, init_dp; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 434de7196875..6d85422bdefe 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -15,6 +15,7 @@ struct intel_bios_encoder_data; struct intel_connector; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dp; struct intel_dpll_hw_state; struct intel_encoder; @@ -53,7 +54,7 @@ void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, enum port port); -void intel_ddi_init(struct drm_i915_private *dev_priv, +void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3d695688b6b5..9f2a4a854548 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7786,6 +7786,7 @@ bool assert_port_valid(struct drm_i915_private *i915, enum port port) void intel_setup_outputs(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_encoder *encoder; bool dpd_is_edp = false; @@ -7798,7 +7799,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) if (intel_ddi_crt_present(dev_priv)) intel_crt_init(dev_priv); - intel_bios_for_each_encoder(dev_priv, intel_ddi_init); + intel_bios_for_each_encoder(display, intel_ddi_init); if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) vlv_dsi_init(dev_priv); @@ -7860,14 +7861,14 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) * HDMI ports that the VBT claim are DP or eDP. */ has_edp = intel_dp_is_port_edp(dev_priv, PORT_B); - has_port = intel_bios_is_port_present(dev_priv, PORT_B); + has_port = intel_bios_is_port_present(display, PORT_B); if (intel_de_read(dev_priv, VLV_DP_B) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_B, PORT_B); if ((intel_de_read(dev_priv, VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp) g4x_hdmi_init(dev_priv, VLV_HDMIB, PORT_B); has_edp = intel_dp_is_port_edp(dev_priv, PORT_C); - has_port = intel_bios_is_port_present(dev_priv, PORT_C); + has_port = intel_bios_is_port_present(display, PORT_C); if (intel_de_read(dev_priv, VLV_DP_C) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_C, PORT_C); if ((intel_de_read(dev_priv, VLV_HDMIC) & SDVO_DETECTED || has_port) && !has_edp) @@ -7878,7 +7879,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) * eDP not supported on port D, * so no need to worry about it */ - has_port = intel_bios_is_port_present(dev_priv, PORT_D); + has_port = intel_bios_is_port_present(display, PORT_D); if (intel_de_read(dev_priv, CHV_DP_D) & DP_DETECTED || has_port) g4x_dp_init(dev_priv, CHV_DP_D, PORT_D); if (intel_de_read(dev_priv, CHV_HDMID) & SDVO_DETECTED || has_port) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 7d3ae826a353..0cf0b4223513 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1065,7 +1065,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) ARRAY_SIZE(intel_display_debugfs_list), minor->debugfs_root, minor); - intel_bios_debugfs_register(i915); + intel_bios_debugfs_register(display); intel_cdclk_debugfs_register(i915); intel_dmc_debugfs_register(i915); intel_fbc_debugfs_register(display); diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index a2666b69834e..328d8b5a6b66 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -217,7 +217,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) return ret; } - intel_bios_init(i915); + intel_bios_init(display); ret = intel_vga_register(i915); if (ret) @@ -275,7 +275,7 @@ cleanup_vga_client_pw_domain_dmc: cleanup_vga: intel_vga_unregister(i915); cleanup_bios: - intel_bios_driver_remove(i915); + intel_bios_driver_remove(display); return ret; } @@ -615,13 +615,15 @@ void intel_display_driver_remove_noirq(struct drm_i915_private *i915) /* part #3: call after gem init */ void intel_display_driver_remove_nogem(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + intel_dmc_fini(i915); intel_power_domains_driver_remove(i915); intel_vga_unregister(i915); - intel_bios_driver_remove(i915); + intel_bios_driver_remove(display); } void intel_display_driver_unregister(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 853c712686d8..afcd2af82942 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1697,6 +1697,7 @@ static void icp_irq_postinstall(struct drm_i915_private *i915); void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_uncore *uncore = &dev_priv->uncore; u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | @@ -1731,7 +1732,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) } else if (DISPLAY_VER(dev_priv) >= 11) { enum port port; - if (intel_bios_is_dsi_present(dev_priv, &port)) + if (intel_bios_is_dsi_present(display, &port)) de_port_masked |= DSI0_TE | DSI1_TE; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 49a37b996530..977f149551f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6503,8 +6503,9 @@ static bool _intel_dp_is_port_edp(struct drm_i915_private *dev_priv, bool intel_dp_is_port_edp(struct drm_i915_private *i915, enum port port) { + struct intel_display *display = &i915->display; const struct intel_bios_encoder_data *devdata = - intel_bios_encoder_data_lookup(i915, port); + intel_bios_encoder_data_lookup(display, port); return _intel_dp_is_port_edp(i915, devdata, port); } @@ -6607,6 +6608,7 @@ static void intel_edp_backlight_setup(struct intel_dp *intel_dp, static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *intel_connector) { + struct intel_display *display = to_intel_display(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct drm_connector *connector = &intel_connector->base; struct drm_display_mode *fixed_mode; @@ -6632,7 +6634,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } - intel_bios_init_panel_early(dev_priv, &intel_connector->panel, + intel_bios_init_panel_early(display, &intel_connector->panel, encoder->devdata); if (!intel_pps_init(intel_dp)) { @@ -6729,7 +6731,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, drm_edid = ERR_PTR(-ENOENT); } - intel_bios_init_panel_late(dev_priv, &intel_connector->panel, encoder->devdata, + intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, IS_ERR(drm_edid) ? NULL : drm_edid); intel_panel_add_edid_fixed_modes(intel_connector, true); diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 9f018503d4fd..fb4ed9f7855b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -838,6 +838,7 @@ static void intel_lvds_add_properties(struct drm_connector *connector) */ void intel_lvds_init(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct intel_lvds_encoder *lvds_encoder; struct intel_connector *connector; const struct drm_edid *drm_edid; @@ -872,7 +873,7 @@ void intel_lvds_init(struct drm_i915_private *i915) } ddc_pin = GMBUS_PIN_PANEL; - if (!intel_bios_is_lvds_present(i915, &ddc_pin)) { + if (!intel_bios_is_lvds_present(display, &ddc_pin)) { if ((lvds & LVDS_PORT_EN) == 0) { drm_dbg_kms(&i915->drm, "LVDS is not present in VBT\n"); @@ -966,7 +967,7 @@ void intel_lvds_init(struct drm_i915_private *i915) } else { drm_edid = ERR_PTR(-ENOENT); } - intel_bios_init_panel_late(i915, &connector->panel, NULL, + intel_bios_init_panel_late(display, &connector->panel, NULL, IS_ERR(drm_edid) ? NULL : drm_edid); /* Try EDID first */ diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 7a9d6b2f4100..dfa1d9f30d33 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -870,7 +870,6 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = { int intel_opregion_setup(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_opregion *opregion; struct pci_dev *pdev = to_pci_dev(display->drm->dev); u32 asls, mboxes; @@ -991,7 +990,7 @@ int intel_opregion_setup(struct intel_display *display) vbt = opregion->rvda; vbt_size = opregion->asle->rvds; - if (intel_bios_is_valid_vbt(i915, vbt, vbt_size)) { + if (intel_bios_is_valid_vbt(display, vbt, vbt_size)) { drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (RVDA)\n"); opregion->vbt = vbt; @@ -1016,7 +1015,7 @@ int intel_opregion_setup(struct intel_display *display) vbt_size = (mboxes & MBOX_ASLE_EXT) ? OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(i915, vbt, vbt_size)) { + if (intel_bios_is_valid_vbt(display, vbt, vbt_size)) { drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 4cfa27ca8c22..7cc519b402e9 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2919,6 +2919,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, u16 type) static bool intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, u16 type) { + struct intel_display *display = to_intel_display(&intel_sdvo->base); struct drm_encoder *encoder = &intel_sdvo->base.base; struct drm_i915_private *i915 = to_i915(encoder->dev); struct drm_connector *connector; @@ -2946,7 +2947,7 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, u16 type) if (!intel_sdvo_create_enhance_property(intel_sdvo, intel_sdvo_connector)) goto err; - intel_bios_init_panel_late(i915, &intel_connector->panel, NULL, NULL); + intel_bios_init_panel_late(display, &intel_connector->panel, NULL, NULL); /* * Fetch modes from VBT. For SDVO prefer the VBT mode since some diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 9df0f1263913..bfc43bda8532 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1930,6 +1930,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) void intel_tv_init(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct drm_connector *connector; struct intel_tv *intel_tv; struct intel_encoder *intel_encoder; @@ -1939,7 +1940,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) if ((intel_de_read(dev_priv, TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) return; - if (!intel_bios_is_tv_present(dev_priv)) { + if (!intel_bios_is_tv_present(display)) { drm_dbg_kms(&dev_priv->drm, "Integrated TV is not present.\n"); return; } diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index eae5b5e09aa8..248bab691181 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1880,6 +1880,7 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { void vlv_dsi_init(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_dsi *intel_dsi; struct intel_encoder *encoder; struct intel_connector *connector; @@ -1891,7 +1892,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "\n"); /* There is no detection method for MIPI so rely on VBT */ - if (!intel_bios_is_dsi_present(dev_priv, &port)) + if (!intel_bios_is_dsi_present(display, &port)) return; if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) @@ -1946,7 +1947,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_dsi->panel_power_off_time = ktime_get_boottime(); - intel_bios_init_panel_late(dev_priv, &connector->panel, NULL, NULL); + intel_bios_init_panel_late(display, &connector->panel, NULL, NULL); if (connector->panel.vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); From e332a5aba83500e8d422c90d2a84d8a5f888673e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 8 Aug 2024 02:47:28 +0900 Subject: [PATCH 0764/1523] treewide: remove unnecessary inclusion These files do not use any macros defined in . Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- drivers/accessibility/speakup/genmap.c | 1 - drivers/accessibility/speakup/makemapdata.c | 1 - drivers/staging/media/atomisp/include/linux/atomisp.h | 1 - samples/trace_events/trace_custom_sched.c | 1 - sound/soc/codecs/cs42l42.c | 1 - 5 files changed, 5 deletions(-) diff --git a/drivers/accessibility/speakup/genmap.c b/drivers/accessibility/speakup/genmap.c index 0125000e00d9..0882bab10fb8 100644 --- a/drivers/accessibility/speakup/genmap.c +++ b/drivers/accessibility/speakup/genmap.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c index d7d41bb9b05f..55e4ef8a93dc 100644 --- a/drivers/accessibility/speakup/makemapdata.c +++ b/drivers/accessibility/speakup/makemapdata.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/staging/media/atomisp/include/linux/atomisp.h b/drivers/staging/media/atomisp/include/linux/atomisp.h index 16c9da172c03..fefbe3cd08f3 100644 --- a/drivers/staging/media/atomisp/include/linux/atomisp.h +++ b/drivers/staging/media/atomisp/include/linux/atomisp.h @@ -20,7 +20,6 @@ #define _ATOM_ISP_H #include -#include /* struct media_device_info.hw_revision */ #define ATOMISP_HW_REVISION_MASK 0x0000ff00 diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index b99d9ab7db85..dd409b704b35 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) fmt #include -#include #include #include diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 60d366e53526..6400ac875e6f 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include From 1472464c6248575bf2d01c7f076b94704bb32c95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 8 Aug 2024 03:03:00 +0900 Subject: [PATCH 0765/1523] kbuild: avoid scripts/kallsyms parsing /dev/null On macOS, as reported by Daniel Gomez, getline() sets ENOTTY to errno if it is requested to read from /dev/null. If this is worth fixing, I would rather pass an empty file to scripts/kallsyms instead of adding the ugly #ifdef __APPLE__. Fixes: c442db3f49f2 ("kbuild: remove PROVIDE() for kallsyms symbols") Reported-by: Daniel Gomez Closes: https://lore.kernel.org/all/20240807-macos-build-support-v1-12-4cd1ded85694@samsung.com/ Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Daniel Gomez --- scripts/link-vmlinux.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba9..41c68ae3415d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -219,7 +219,8 @@ kallsymso= strip_debug= if is_enabled CONFIG_KALLSYMS; then - kallsyms /dev/null .tmp_vmlinux0.kallsyms + truncate -s0 .tmp_vmlinux.kallsyms0.syms + kallsyms .tmp_vmlinux.kallsyms0.syms .tmp_vmlinux0.kallsyms fi if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then From a9a18e8f770c9b0703dab93580d0b02e199a4c79 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2024 15:28:19 +0300 Subject: [PATCH 0766/1523] atm: idt77252: prevent use after free in dequeue_rx() We can't dereference "skb" after calling vcc->push() because the skb is released. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/atm/idt77252.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e7f713cd70d3..a876024d8a05 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) rpp->len += skb->len; if (stat & SAR_RSQE_EPDU) { + unsigned int len, truesize; unsigned char *l1l2; - unsigned int len; l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6); @@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); + truesize = skb->truesize; vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - if (skb->truesize > SAR_FB_SIZE_3) + if (truesize > SAR_FB_SIZE_3) add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); - else if (skb->truesize > SAR_FB_SIZE_2) + else if (truesize > SAR_FB_SIZE_2) add_rx_skb(card, 2, SAR_FB_SIZE_2, 1); - else if (skb->truesize > SAR_FB_SIZE_1) + else if (truesize > SAR_FB_SIZE_1) add_rx_skb(card, 1, SAR_FB_SIZE_1, 1); else add_rx_skb(card, 0, SAR_FB_SIZE_0, 1); From 9ff2f816e2aa65ca9a1cdf0954842f8173c0f48d Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 9 Aug 2024 11:56:09 +0530 Subject: [PATCH 0767/1523] net: axienet: Fix register defines comment description In axiethernet header fix register defines comment description to be inline with IP documentation. It updates MAC configuration register, MDIO configuration register and frame filter control description. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index fa5500decc96..c7d9221fafdc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -160,16 +160,16 @@ #define XAE_RCW1_OFFSET 0x00000404 /* Rx Configuration Word 1 */ #define XAE_TC_OFFSET 0x00000408 /* Tx Configuration */ #define XAE_FCC_OFFSET 0x0000040C /* Flow Control Configuration */ -#define XAE_EMMC_OFFSET 0x00000410 /* EMAC mode configuration */ -#define XAE_PHYC_OFFSET 0x00000414 /* RGMII/SGMII configuration */ +#define XAE_EMMC_OFFSET 0x00000410 /* MAC speed configuration */ +#define XAE_PHYC_OFFSET 0x00000414 /* RX Max Frame Configuration */ #define XAE_ID_OFFSET 0x000004F8 /* Identification register */ -#define XAE_MDIO_MC_OFFSET 0x00000500 /* MII Management Config */ -#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MII Management Control */ -#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ -#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ +#define XAE_MDIO_MC_OFFSET 0x00000500 /* MDIO Setup */ +#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MDIO Control */ +#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MDIO Write Data */ +#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MDIO Read Data */ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ -#define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ +#define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ @@ -308,7 +308,7 @@ */ #define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF -/* Bit masks for Axi Ethernet FMI register */ +/* Bit masks for Axi Ethernet FMC register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ #define XAE_FMI_IND_MASK 0x00000003 /* Index Mask */ From 63796bc2e97cd5ebcef60bad4953259d4ad11cb4 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 9 Aug 2024 21:38:02 +0200 Subject: [PATCH 0768/1523] net: dsa: vsc73xx: fix port MAC configuration in full duplex mode According to the datasheet description ("Port Mode Procedure" in 5.6.2), the VSC73XX_MAC_CFG_WEXC_DIS bit is configured only for half duplex mode. The WEXC_DIS bit is responsible for MAC behavior after an excessive collision. Let's set it as described in the datasheet. Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Signed-off-by: Pawel Dembicki Signed-off-by: David S. Miller --- drivers/net/dsa/vitesse-vsc73xx-core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index d9d3e30fd47a..f548ed4cb23f 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -957,6 +957,11 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, if (duplex == DUPLEX_FULL) val |= VSC73XX_MAC_CFG_FDX; + else + /* In datasheet description ("Port Mode Procedure" in 5.6.2) + * this bit is configured only for half duplex. + */ + val |= VSC73XX_MAC_CFG_WEXC_DIS; /* This routine is described in the datasheet (below ARBDISC register * description) @@ -967,7 +972,6 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, get_random_bytes(&seed, 1); val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET; val |= VSC73XX_MAC_CFG_SEED_LOAD; - val |= VSC73XX_MAC_CFG_WEXC_DIS; /* Those bits are responsible for MTU only. Kernel takes care about MTU, * let's enable +8 bytes frame length unconditionally. From 5b9eebc2c7a5f0cc7950d918c1e8a4ad4bed5010 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 9 Aug 2024 21:38:03 +0200 Subject: [PATCH 0769/1523] net: dsa: vsc73xx: pass value in phy_write operation In the 'vsc73xx_phy_write' function, the register value is missing, and the phy write operation always sends zeros. This commit passes the value variable into the proper register. Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Signed-off-by: Pawel Dembicki Signed-off-by: David S. Miller --- drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index f548ed4cb23f..4b300c293dec 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -574,7 +574,7 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, return 0; } - cmd = (phy << 21) | (regnum << 16); + cmd = (phy << 21) | (regnum << 16) | val; ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; From fa63c6434b6f6aaf9d8d599dc899bc0a074cc0ad Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 9 Aug 2024 21:38:04 +0200 Subject: [PATCH 0770/1523] net: dsa: vsc73xx: check busy flag in MDIO operations The VSC73xx has a busy flag used during MDIO operations. It is raised when MDIO read/write operations are in progress. Without it, PHYs are misconfigured and bus operations do not work as expected. Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Signed-off-by: Pawel Dembicki Signed-off-by: David S. Miller --- drivers/net/dsa/vitesse-vsc73xx-core.c | 37 +++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 4b300c293dec..a789b2da9b7d 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -40,6 +40,10 @@ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ +/* MII Block subblock */ +#define VSC73XX_BLOCK_MII_INTERNAL 0x0 /* Internal MDIO subblock */ +#define VSC73XX_BLOCK_MII_EXTERNAL 0x1 /* External MDIO subblock */ + #define CPU_PORT 6 /* CPU port */ /* MAC Block registers */ @@ -225,6 +229,8 @@ #define VSC73XX_MII_CMD 0x1 #define VSC73XX_MII_DATA 0x2 +#define VSC73XX_MII_STAT_BUSY BIT(3) + /* Arbiter block 5 registers */ #define VSC73XX_ARBEMPTY 0x0c #define VSC73XX_ARBDISC 0x0e @@ -299,6 +305,7 @@ #define IS_739X(a) (IS_7395(a) || IS_7398(a)) #define VSC73XX_POLL_SLEEP_US 1000 +#define VSC73XX_MDIO_POLL_SLEEP_US 5 #define VSC73XX_POLL_TIMEOUT_US 10000 struct vsc73xx_counter { @@ -527,6 +534,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc) return 0; } +static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc) +{ + int ret, err; + u32 val; + + ret = read_poll_timeout(vsc73xx_read, err, + err < 0 || !(val & VSC73XX_MII_STAT_BUSY), + VSC73XX_MDIO_POLL_SLEEP_US, + VSC73XX_POLL_TIMEOUT_US, false, vsc, + VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL, + VSC73XX_MII_STAT, &val); + if (ret) + return ret; + return err; +} + static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) { struct vsc73xx *vsc = ds->priv; @@ -534,12 +557,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) u32 val; int ret; + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + /* Setting bit 26 means "read" */ cmd = BIT(26) | (phy << 21) | (regnum << 16); ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; - msleep(2); + + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val); if (ret) return ret; @@ -563,6 +594,10 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, u32 cmd; int ret; + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + /* It was found through tedious experiments that this router * chip really hates to have it's PHYs reset. They * never recover if that happens: autonegotiation stops From 9f9a72654622bae75adb1e1923d709e96ede3042 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 9 Aug 2024 21:38:05 +0200 Subject: [PATCH 0771/1523] net: dsa: vsc73xx: allow phy resetting Resetting the VSC73xx PHY was problematic because the MDIO bus, without a busy check, read and wrote incorrect register values. My investigation indicates that resetting the PHY only triggers changes in configuration. However, improper register values written earlier were only exposed after a soft reset. The reset itself wasn't the issue; rather, the problem stemmed from incorrect read and write operations. A 'soft_reset' can now proceed normally. There are no reasons to keep the VSC73xx from being reset. This commit removes the reset blockade in the 'vsc73xx_phy_write' function. Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Signed-off-by: Pawel Dembicki Signed-off-by: David S. Miller --- drivers/net/dsa/vitesse-vsc73xx-core.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index a789b2da9b7d..e3f95d2cc2c1 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -598,17 +598,6 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, if (ret) return ret; - /* It was found through tedious experiments that this router - * chip really hates to have it's PHYs reset. They - * never recover if that happens: autonegotiation stops - * working after a reset. Just filter out this command. - * (Resetting the whole chip is OK.) - */ - if (regnum == 0 && (val & BIT(15))) { - dev_info(vsc->dev, "reset PHY - disallowed\n"); - return 0; - } - cmd = (phy << 21) | (regnum << 16) | val; ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) From de7a670f8defe4ed2115552ad23dea0f432f7be4 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 9 Aug 2024 21:38:06 +0200 Subject: [PATCH 0772/1523] net: phy: vitesse: repair vsc73xx autonegotiation When the vsc73xx mdio bus work properly, the generic autonegotiation configuration works well. Reviewed-by: Linus Walleij Signed-off-by: Pawel Dembicki Signed-off-by: David S. Miller --- drivers/net/phy/vitesse.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 897b979ec03c..3b5fcaf0dd36 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -237,16 +237,6 @@ static int vsc739x_config_init(struct phy_device *phydev) return 0; } -static int vsc73xx_config_aneg(struct phy_device *phydev) -{ - /* The VSC73xx switches does not like to be instructed to - * do autonegotiation in any way, it prefers that you just go - * with the power-on/reset defaults. Writing some registers will - * just make autonegotiation permanently fail. - */ - return 0; -} - /* This adds a skew for both TX and RX clocks, so the skew should only be * applied to "rgmii-id" interfaces. It may not work as expected * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. @@ -444,7 +434,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -453,7 +442,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -462,7 +450,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -471,7 +458,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { From e7a9af8c93aa9f408f9972809b642faeec5287e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Aug 2024 11:32:47 +0200 Subject: [PATCH 0773/1523] powerpc/mm: Fix size of allocated PGDIR Commit 6b0e82791bd0 ("powerpc/e500: switch to 64 bits PGD on 85xx (32 bits)") increased the size of PGD entries but failed to increase the PGD directory. Use the size of pgd_t instead of the size of pointers to calculate the allocated size. Reported-by: Guenter Roeck Fixes: 6b0e82791bd0 ("powerpc/e500: switch to 64 bits PGD on 85xx (32 bits)") Signed-off-by: Christophe Leroy Tested-by: Guenter Roeck Signed-off-by: Michael Ellerman Link: https://msgid.link/1cdaacb391cbd3e0240f0e0faf691202874e9422.1723109462.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/init-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f8..2978fcbe307e 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store From e7e846dc6c73fbc94ae8b4ec20d05627646416f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Aug 2024 09:05:08 +0200 Subject: [PATCH 0774/1523] powerpc/mm: Fix boot warning with hugepages and CONFIG_DEBUG_VIRTUAL Booting with CONFIG_DEBUG_VIRTUAL leads to following warning when passing hugepage reservation on command line: Kernel command line: hugepagesz=1g hugepages=1 hugepagesz=64m hugepages=1 hugepagesz=256m hugepages=1 noreboot HugeTLB: allocating 1 of page size 1.00 GiB failed. Only allocated 0 hugepages. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at arch/powerpc/include/asm/io.h:948 __alloc_bootmem_huge_page+0xd4/0x284 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.10.0-rc6-00396-g6b0e82791bd0-dirty #936 Hardware name: MPC8544DS e500v2 0x80210030 MPC8544 DS NIP: c1020240 LR: c10201d0 CTR: 00000000 REGS: c13fdd30 TRAP: 0700 Not tainted (6.10.0-rc6-00396-g6b0e82791bd0-dirty) MSR: 00021000 CR: 44084288 XER: 20000000 GPR00: c10201d0 c13fde20 c130b560 e8000000 e8001000 00000000 00000000 c1420000 GPR08: 00000000 00028001 00000000 00000004 44084282 01066ac0 c0eb7c9c efffe149 GPR16: c0fc4228 0000005f ffffffff c0eb7d0c c0eb7cc0 c0eb7ce0 ffffffff 00000000 GPR24: c1441cec efffe153 e8001000 c14240c0 00000000 c1441d64 00000000 e8000000 NIP [c1020240] __alloc_bootmem_huge_page+0xd4/0x284 LR [c10201d0] __alloc_bootmem_huge_page+0x64/0x284 Call Trace: [c13fde20] [c10201d0] __alloc_bootmem_huge_page+0x64/0x284 (unreliable) [c13fde50] [c10207b8] hugetlb_hstate_alloc_pages+0x8c/0x3e8 [c13fdeb0] [c1021384] hugepages_setup+0x240/0x2cc [c13fdef0] [c1000574] unknown_bootoption+0xfc/0x280 [c13fdf30] [c0078904] parse_args+0x200/0x4c4 [c13fdfa0] [c1000d9c] start_kernel+0x238/0x7d0 [c13fdff0] [c0000434] set_ivor+0x12c/0x168 Code: 554aa33e 7c042840 3ce0c142 80a7427c 5109a016 50caa016 7c9a2378 7fdcf378 4180000c 7c052040 41810160 7c095040 <0fe00000> 38c00000 40800108 3c60c0eb ---[ end trace 0000000000000000 ]--- This is due to virt_addr_valid() using high_memory before it is set. high_memory is set in mem_init() using max_low_pfn, but max_low_pfn is available long before, it is set in mem_topology_setup(). So just like commit daa9ada2093e ("powerpc/mm: Fix boot crash with FLATMEM") moved the setting of max_mapnr immediately after the call to mem_topology_setup(), the same can be done for high_memory. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/62b69c4baad067093f39e7e60df0fe27a86b8d2a.1723100702.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/setup-common.c | 1 + arch/powerpc/mm/mem.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616ba..943430077375 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201..da21cb018984 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); From c25504a0ba36968f919aa30caff172ef23346299 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 9 Aug 2024 16:06:53 -0400 Subject: [PATCH 0775/1523] dt-bindings: net: fsl,qoriq-mc-dpmac: add missed property phys Add missed property phys, which indicate how connect to serdes phy. Fix below warning: arch/arm64/boot/dts/freescale/fsl-lx2160a-honeycomb.dtb: fsl-mc@80c000000: dpmacs:ethernet@7: Unevaluated properties are not allowed ('phys' was unexpected) Signed-off-by: Frank Li Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index a1b71b35319e..42f9843d1868 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -38,6 +38,10 @@ properties: managed: true + phys: + description: A reference to the SerDes lane(s) + maxItems: 1 + required: - reg From 32316f676b4ee87c0404d333d248ccf777f739bc Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 9 Aug 2024 14:01:24 -0700 Subject: [PATCH 0776/1523] net: mana: Fix RX buf alloc_size alignment and atomic op panic The MANA driver's RX buffer alloc_size is passed into napi_build_skb() to create SKB. skb_shinfo(skb) is located at the end of skb, and its alignment is affected by the alloc_size passed into napi_build_skb(). The size needs to be aligned properly for better performance and atomic operations. Otherwise, on ARM64 CPU, for certain MTU settings like 4000, atomic operations may panic on the skb_shinfo(skb)->dataref due to alignment fault. To fix this bug, add proper alignment to the alloc_size calculation. Sample panic info: [ 253.298819] Unable to handle kernel paging request at virtual address ffff000129ba5cce [ 253.300900] Mem abort info: [ 253.301760] ESR = 0x0000000096000021 [ 253.302825] EC = 0x25: DABT (current EL), IL = 32 bits [ 253.304268] SET = 0, FnV = 0 [ 253.305172] EA = 0, S1PTW = 0 [ 253.306103] FSC = 0x21: alignment fault Call trace: __skb_clone+0xfc/0x198 skb_clone+0x78/0xe0 raw6_local_deliver+0xfc/0x228 ip6_protocol_deliver_rcu+0x80/0x500 ip6_input_finish+0x48/0x80 ip6_input+0x48/0xc0 ip6_sublist_rcv_finish+0x50/0x78 ip6_sublist_rcv+0x1cc/0x2b8 ipv6_list_rcv+0x100/0x150 __netif_receive_skb_list_core+0x180/0x220 netif_receive_skb_list_internal+0x198/0x2a8 __napi_poll+0x138/0x250 net_rx_action+0x148/0x330 handle_softirqs+0x12c/0x3a0 Cc: stable@vger.kernel.org Fixes: 80f6215b450e ("net: mana: Add support for jumbo frame") Signed-off-by: Haiyang Zhang Reviewed-by: Long Li Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/mana_en.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d2f07e179e86..ae717d06e66f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } From db1b4bedb9b97c6d34b03d03815147c04fffe8b4 Mon Sep 17 00:00:00 2001 From: Zheng Zhang Date: Sat, 10 Aug 2024 13:26:51 +0800 Subject: [PATCH 0777/1523] net: ethernet: mtk_wed: fix use-after-free panic in mtk_wed_setup_tc_block_cb() When there are multiple ap interfaces on one band and with WED on, turning the interface down will cause a kernel panic on MT798X. Previously, cb_priv was freed in mtk_wed_setup_tc_block() without marking NULL,and mtk_wed_setup_tc_block_cb() didn't check the value, too. Assign NULL after free cb_priv in mtk_wed_setup_tc_block() and check NULL in mtk_wed_setup_tc_block_cb(). ---------- Unable to handle kernel paging request at virtual address 0072460bca32b4f5 Call trace: mtk_wed_setup_tc_block_cb+0x4/0x38 0xffffffc0794084bc tcf_block_playback_offloads+0x70/0x1e8 tcf_block_unbind+0x6c/0xc8 ... --------- Fixes: 799684448e3e ("net: ethernet: mtk_wed: introduce wed wo support") Signed-off-by: Zheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_wed.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 61334a71058c..e212a4ba9275 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri { struct mtk_wed_flow_block_priv *priv = cb_priv; struct flow_cls_offload *cls = type_data; - struct mtk_wed_hw *hw = priv->hw; + struct mtk_wed_hw *hw = NULL; - if (!tc_can_offload(priv->dev)) + if (!priv || !tc_can_offload(priv->dev)) return -EOPNOTSUPP; if (type != TC_SETUP_CLSFLOWER) return -EOPNOTSUPP; + hw = priv->hw; return mtk_flow_offload_cmd(hw->eth, cls, hw->index); } @@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev, flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); kfree(block_cb->cb_priv); + block_cb->cb_priv = NULL; } return 0; default: From 497d370a644d95a9f04271aa92cb96d32e84c770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 9 Aug 2024 12:18:45 -0300 Subject: [PATCH 0778/1523] drm/v3d: Fix out-of-bounds read in `v3d_csd_job_run()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabling UBSAN on Raspberry Pi 5, we get the following warning: [ 387.894977] UBSAN: array-index-out-of-bounds in drivers/gpu/drm/v3d/v3d_sched.c:320:3 [ 387.903868] index 7 is out of range for type '__u32 [7]' [ 387.909692] CPU: 0 PID: 1207 Comm: kworker/u16:2 Tainted: G WC 6.10.3-v8-16k-numa #151 [ 387.919166] Hardware name: Raspberry Pi 5 Model B Rev 1.0 (DT) [ 387.925961] Workqueue: v3d_csd drm_sched_run_job_work [gpu_sched] [ 387.932525] Call trace: [ 387.935296] dump_backtrace+0x170/0x1b8 [ 387.939403] show_stack+0x20/0x38 [ 387.942907] dump_stack_lvl+0x90/0xd0 [ 387.946785] dump_stack+0x18/0x28 [ 387.950301] __ubsan_handle_out_of_bounds+0x98/0xd0 [ 387.955383] v3d_csd_job_run+0x3a8/0x438 [v3d] [ 387.960707] drm_sched_run_job_work+0x520/0x6d0 [gpu_sched] [ 387.966862] process_one_work+0x62c/0xb48 [ 387.971296] worker_thread+0x468/0x5b0 [ 387.975317] kthread+0x1c4/0x1e0 [ 387.978818] ret_from_fork+0x10/0x20 [ 387.983014] ---[ end trace ]--- This happens because the UAPI provides only seven configuration registers and we are reading the eighth position of this u32 array. Therefore, fix the out-of-bounds read in `v3d_csd_job_run()` by accessing only seven positions on the '__u32 [7]' array. The eighth register exists indeed on V3D 7.1, but it isn't currently used. That being so, let's guarantee that it remains unused and add a note that it could be set in a future patch. Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x") Reported-by: Tvrtko Ursulin Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240809152001.668314-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_sched.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 9bd7453b25ad..b8682818bafa 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -315,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -335,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); From 2a07bb64d80152701d507b1498237ed1b8d83866 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Mon, 12 Aug 2024 14:57:32 +0200 Subject: [PATCH 0779/1523] s390/dasd: Remove DMA alignment This reverts commit bc792884b76f ("s390/dasd: Establish DMA alignment"). Quoting the original commit: linux-next commit bf8d08532bc1 ("iomap: add support for dma aligned direct-io") changes the alignment requirement to come from the block device rather than the block size, and the default alignment requirement is 512-byte boundaries. Since DASD I/O has page alignments for IDAW/TIDAW requests, let's override this value to restore the expected behavior. I mentioned TIDAW, but that was wrong. TIDAWs have no distinct alignment requirement (per p. 15-70 of POPS SA22-7832-13): Unless otherwise specified, TIDAWs may designate a block of main storage on any boundary and length up to 4K bytes, provided the specified block does not cross a 4 K-byte boundary. IDAWs do, but the original commit neglected that while ECKD DASD are typically formatted in 4096-byte blocks, they don't HAVE to be. Formatting an ECKD volume with smaller blocks is permitted (dasdfmt -b xxx), and the problematic commit enforces alignment properties to such a device that will result in errors, such as: [test@host ~]# lsdasd -l a367 | grep blksz blksz: 512 [test@host ~]# mkfs.xfs -f /dev/disk/by-path/ccw-0.0.a367-part1 meta-data=/dev/dasdc1 isize=512 agcount=4, agsize=230075 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=1 = reflink=1 bigtime=1 inobtcount=1 nrext64=1 data = bsize=4096 blocks=920299, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=16384, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 error reading existing superblock: Invalid argument mkfs.xfs: pwrite failed: Invalid argument libxfs_bwrite: write failed on (unknown) bno 0x70565c/0x100, err=22 mkfs.xfs: Releasing dirty buffer to free list! found dirty buffer (bulk) on free list! mkfs.xfs: pwrite failed: Invalid argument ...snipped... The original commit omitted the FBA discipline for just this reason, but the formatted block size of the other disciplines was overlooked. The solution to all of this is to revert to the original behavior, such that the block size can be respected. There were two commits [1] that moved this code in the interim, so a straight git-revert is not possible, but the change is straightforward. But what of the original problem? That was manifested with a direct-io QEMU guest, where QEMU itself was changed a month or two later with commit 25474d90aa ("block: use the request length for iov alignment") such that the blamed kernel commit is unnecessary. [1] commit 0127a47f58c6 ("dasd: move queue setup to common code") commit fde07a4d74e3 ("dasd: use the atomic queue limits API") Fixes: bc792884b76f ("s390/dasd: Establish DMA alignment") Reviewed-by: Stefan Haberland Signed-off-by: Eric Farman Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240812125733.126431-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_genhd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 1aa426b1dedd..6da47a65af61 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,7 +41,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) */ .max_segment_size = PAGE_SIZE, .seg_boundary_mask = PAGE_SIZE - 1, - .dma_alignment = PAGE_SIZE - 1, .max_segments = USHRT_MAX, }; struct gendisk *gdp; From 7db4042336580dfd75cb5faa82c12cd51098c90b Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 12 Aug 2024 14:57:33 +0200 Subject: [PATCH 0780/1523] s390/dasd: fix error recovery leading to data corruption on ESE devices Extent Space Efficient (ESE) or thin provisioned volumes need to be formatted on demand during usual IO processing. The dasd_ese_needs_format function checks for error codes that signal the non existence of a proper track format. The check for incorrect length is to imprecise since other error cases leading to transport of insufficient data also have this flag set. This might lead to data corruption in certain error cases for example during a storage server warmstart. Fix by removing the check for incorrect length and replacing by explicitly checking for invalid track format in transport mode. Also remove the check for file protected since this is not a valid ESE handling case. Cc: stable@vger.kernel.org # 5.3+ Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240812125733.126431-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 36 ++++++++++++------- drivers/s390/block/dasd_3990_erp.c | 10 ++---- drivers/s390/block/dasd_eckd.c | 57 +++++++++++++----------------- drivers/s390/block/dasd_int.h | 2 +- 4 files changed, 51 insertions(+), 54 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a97cfedd706..42a4a996defb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct dasd_block *block, struct irb *irb) if (!sense) return 0; - return !!(sense[1] & SNS1_NO_REC_FOUND) || - !!(sense[1] & SNS1_FILE_PROTECTED) || - scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN; + if (sense[1] & SNS1_NO_REC_FOUND) + return 1; + + if ((sense[1] & SNS1_INV_TRACK_FORMAT) && + scsw_is_tm(&irb->scsw) && + !(sense[2] & SNS2_ENV_DATA_PRESENT)) + return 1; + + return 0; } static int dasd_ese_oos_cond(u8 *sense) @@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, struct dasd_device *device; unsigned long now; int nrf_suppressed = 0; - int fp_suppressed = 0; + int it_suppressed = 0; struct request *req; u8 *sense = NULL; int expires; @@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, */ sense = dasd_get_sense(irb); if (sense) { - fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); + it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) && test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); @@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } } - if (!(fp_suppressed || nrf_suppressed)) + if (!(it_suppressed || nrf_suppressed)) device->discipline->dump_sense_dbf(device, irb, "int"); if (device->features & DASD_FEATURE_ERPLOG) @@ -2459,14 +2466,17 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and error recovery would be - * unnecessary in these cases. Check if the according suppress - * bit is set. + * In some cases certain errors might be expected and + * error recovery would be unnecessary in these cases. + * Check if the according suppress bit is set. */ sense = dasd_get_sense(&cqr->irb); - if (sense && sense[1] & SNS1_FILE_PROTECTED && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags)) + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags)) + continue; + if (sense && (sense[1] & SNS1_NO_REC_FOUND) && + test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags)) continue; if (scsw_cstat(&cqr->irb.scsw) == 0x40 && test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags)) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index bbbacfc386f2..d0aa267462c5 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp) struct dasd_device *device = erp->startdev; - /* - * In some cases the 'File Protected' error might be expected and - * log messages shouldn't be written then. - * Check if the according suppress bit is set. - */ - if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags)) - dev_err(&device->cdev->dev, - "Accessing the DASD failed because of a hardware error\n"); + dev_err(&device->cdev->dev, + "Accessing the DASD failed because of a hardware error\n"); return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9388b5c383ca..90b106408992 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2275,6 +2275,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); return cqr; } @@ -2556,7 +2557,6 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); return cqr; @@ -4130,8 +4130,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); } @@ -4633,9 +4631,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); } return cqr; @@ -5780,36 +5777,32 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, { u8 *sense = dasd_get_sense(irb); - if (scsw_is_tm(&irb->scsw)) { - /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and log messages shouldn't be written - * then. Check if the according suppress bit is set. - */ - if (sense && (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &req->flags)) - return; - if (scsw_cstat(&irb->scsw) == 0x40 && - test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) - return; + /* + * In some cases certain errors might be expected and + * log messages shouldn't be written then. + * Check if the according suppress bit is set. + */ + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &req->flags)) + return; + if (sense && sense[0] & SNS0_CMD_REJECT && + test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) + return; + + if (sense && sense[1] & SNS1_NO_REC_FOUND && + test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) + return; + + if (scsw_cstat(&irb->scsw) == 0x40 && + test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) + return; + + if (scsw_is_tm(&irb->scsw)) dasd_eckd_dump_sense_tcw(device, req, irb); - } else { - /* - * In some cases the 'Command Reject' or 'No Record Found' - * error might be expected and log messages shouldn't be - * written then. Check if the according suppress bit is set. - */ - if (sense && sense[0] & SNS0_CMD_REJECT && - test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) - return; - - if (sense && sense[1] & SNS1_NO_REC_FOUND && - test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) - return; - + else dasd_eckd_dump_sense_ccw(device, req, irb); - } } static int dasd_eckd_reload_device(struct dasd_device *device) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e5f40536b425..81cfb5c89681 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -196,7 +196,7 @@ struct dasd_ccw_req { * The following flags are used to suppress output of certain errors. */ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ -#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ +#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ From acc4e41ec41fa28ae9fbdb1a2750525a7a242743 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 21:15:18 +0200 Subject: [PATCH 0781/1523] drm/xe/gt: Add APIs for printing stats over debugfs Add skeleton APIs for recording and printing various stats over debugfs. This currently only added counter types stats which is backed by atomic_t and wrapped with CONFIG_DRM_XE_STATS so this can be disabled on production system. v4: Rebase and other minor fixes (Matt) v3: s/CONFIG_DRM_XE_STATS/CONFIG_DEBUG_FS(Lucas) v2: add missing docs Add boundary checks for stats id and other improvements (Michal) Fix build when CONFIG_DRM_XE_STATS is disabled(Matt) Cc: Lucas De Marchi Cc: Matthew Brost Cc: Michal Wajdeczko Cc: Sai Gowtham Ch Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240810191522.18616-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 ++ drivers/gpu/drm/xe/xe_gt_stats.c | 48 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_stats.h | 28 +++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 9 ++++++ 5 files changed, 88 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_stats.c create mode 100644 drivers/gpu/drm/xe/xe_gt_stats.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b846b36a7bfd..e11392b5dd3d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -277,6 +277,7 @@ ifeq ($(CONFIG_DEBUG_FS),y) xe-y += xe_debugfs.o \ xe_gt_debugfs.o \ xe_gt_sriov_vf_debugfs.o \ + xe_gt_stats.o \ xe_guc_debugfs.o \ xe_huc_debugfs.o \ xe_uc_debugfs.o diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5e7fd937917a..5125d76ccfac 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -17,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_stats.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_lrc.h" @@ -286,6 +287,7 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c new file mode 100644 index 000000000000..a054cd52329c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include + +#include "xe_gt.h" +#include "xe_gt_stats.h" + +/** + * xe_gt_stats_incr - Increments the specified stats counter + * @gt: graphics tile + * @id: xe_gt_stats_id type id that needs to be incremented + * @incr: value to be incremented with + * + * Increments the specified stats counter. + */ +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) +{ + if (id >= __XE_GT_STATS_NUM_IDS) + return; + + atomic_add(incr, >->stats.counters[id]); +} + +static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { +}; + +/** + * xe_gt_stats_print_info - Print the GT stats + * @gt: graphics tile + * @p: drm_printer where it will be printed out. + * + * This prints out all the available GT stats. + */ +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) +{ + enum xe_gt_stats_id id; + + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) + drm_printf(p, "%s: %d\n", stat_description[id], + atomic_read(>->stats.counters[id])); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h new file mode 100644 index 000000000000..1be71ce533db --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_H_ +#define _XE_GT_STATS_H_ + +struct xe_gt; +struct drm_printer; + +enum xe_gt_stats_id { + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#ifdef CONFIG_DEBUG_FS +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); +#else +static inline void +xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, + int incr) +{ +} + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 631928258d71..8e46c0969bc7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -10,6 +10,7 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_oa.h" @@ -133,6 +134,14 @@ struct xe_gt { u8 has_indirect_ring_state:1; } info; +#if IS_ENABLED(CONFIG_DEBUG_FS) + /** @stats: GT stats */ + struct { + /** @stats.counters: counters for various GT stats */ + atomic_t counters[__XE_GT_STATS_NUM_IDS]; + } stats; +#endif + /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a From 39fa14e5bdd037f50df0af408da7251e400b41fd Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 21:15:19 +0200 Subject: [PATCH 0782/1523] drm/xe: Add stats for tlb invalidation count Add stats for tlb invalidation count which can be viewed with per GT stat debugfs file. Example output: cat /sys/kernel/debug/dri/0/gt0/stats tlb_inval_count: 22 v2: fix #include order(Tejas) Cc: Lucas De Marchi Cc: Matthew Brost Cc: Michal Wajdeczko Cc: Sai Gowtham Ch Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240810191522.18616-2-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_gt_stats.c | 1 + drivers/gpu/drm/xe/xe_gt_stats.h | 1 + drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index a054cd52329c..c7364a5aef8f 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "tlb_inval_count", }; /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 1be71ce533db..91d944f6c4e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -10,6 +10,7 @@ struct xe_gt; struct drm_printer; enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 87cb76a8718c..cca9cf536f76 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_gt_stats.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -213,6 +214,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; } mutex_unlock(&guc->ct.lock); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); return ret; } From 84f2eecf95018386c145ada19bb45b03bdb80d9e Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sun, 11 Aug 2024 14:07:11 -0400 Subject: [PATCH 0783/1523] io_uring/napi: check napi_enabled in io_napi_add() before proceeding doing so avoids the overhead of adding napi ids to all the rings that do not enable napi. if no id is added to napi_list because napi is disabled, __io_napi_busy_loop() will not be called. Signed-off-by: Olivier Langlois Fixes: b4ccc4dd1330 ("io_uring/napi: enable even with a timeout of 0") Link: https://lore.kernel.org/r/bd989ccef5fda14f5fd9888faf4fefcf66bd0369.1723400131.git.olivier@trillion01.com Signed-off-by: Jens Axboe --- io_uring/napi.c | 2 +- io_uring/napi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index a3dc3762008f..73c4159e8405 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -302,7 +302,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); - if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } diff --git a/io_uring/napi.h b/io_uring/napi.h index 88f1c21d5548..27b88c3eb428 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_busy_poll_dt)) + if (!READ_ONCE(ctx->napi_enabled)) return; sock = sock_from_file(req->file); From 48cc7ecd3a68e0fbfa281ef1ed6f6b6cb7638390 Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sun, 11 Aug 2024 20:34:46 -0400 Subject: [PATCH 0784/1523] io_uring/napi: remove duplicate io_napi_entry timeout assignation io_napi_entry() has 2 calling sites. One of them is unlikely to find an entry and if it does, the timeout should arguable not be updated. The other io_napi_entry() calling site is overwriting the update made by io_napi_entry() so the io_napi_entry() timeout value update has no or little value and therefore is removed. Signed-off-by: Olivier Langlois Link: https://lore.kernel.org/r/145b54ff179f87609e20dffaf5563c07cdbcad1a.1723423275.git.olivier@trillion01.com Signed-off-by: Jens Axboe --- io_uring/napi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index 73c4159e8405..1de1d4d62925 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -26,7 +26,6 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; - e->timeout = jiffies + NAPI_TIMEOUT; return e; } From 98055bc3595500bcf2126b93b1595354bdb86a66 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 27 May 2024 21:17:32 +0100 Subject: [PATCH 0785/1523] netfs: Fault in smaller chunks for non-large folio mappings As in commit 4e527d5841e2 ("iomap: fault in smaller chunks for non-large folio mappings"), we can see a performance loss for filesystems which have not yet been converted to large folios. Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write") Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240527201735.1898381-1-willy@infradead.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 4726c315453c..ca53c5d1622e 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; - size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; + size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || From 3f65f3c099bcb27949e712f39ba836f21785924a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 29 Jul 2024 15:48:12 -0700 Subject: [PATCH 0786/1523] filelock: fix name of file_lease slab cache When struct file_lease was split out from struct file_lock, the name of the file_lock slab cache was copied to the new slab cache for file_lease. This name conflict causes confusion in /proc/slabinfo and /sys/kernel/slab. In particular, it caused failures in drgn's test case for slab cache merging. Link: https://github.com/osandov/drgn/blob/9ad29fd86499eb32847473e928b6540872d3d59a/tests/linux_kernel/helpers/test_slab.py#L81 Fixes: c69ff4071935 ("filelock: split leases out of struct file_lock") Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/r/2d1d053da1cafb3e7940c4f25952da4f0af34e38.1722293276.git.osandov@fb.com Reviewed-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index 9afb16e0683f..e45cad40f8b6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2984,7 +2984,7 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - filelease_cache = kmem_cache_create("file_lock_cache", + filelease_cache = kmem_cache_create("file_lease_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { From f71aa06398aabc2e3eaac25acdf3d62e0094ba70 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 29 Jul 2024 17:19:30 +0100 Subject: [PATCH 0787/1523] fs/netfs/fscache_cookie: add missing "n_accesses" check This fixes a NULL pointer dereference bug due to a data race which looks like this: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 33 PID: 16573 Comm: kworker/u97:799 Not tainted 6.8.7-cm4all1-hp+ #43 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 10/17/2018 Workqueue: events_unbound netfs_rreq_write_to_cache_work RIP: 0010:cachefiles_prepare_write+0x30/0xa0 Code: 57 41 56 45 89 ce 41 55 49 89 cd 41 54 49 89 d4 55 53 48 89 fb 48 83 ec 08 48 8b 47 08 48 83 7f 10 00 48 89 34 24 48 8b 68 20 <48> 8b 45 08 4c 8b 38 74 45 49 8b 7f 50 e8 4e a9 b0 ff 48 8b 73 10 RSP: 0018:ffffb4e78113bde0 EFLAGS: 00010286 RAX: ffff976126be6d10 RBX: ffff97615cdb8438 RCX: 0000000000020000 RDX: ffff97605e6c4c68 RSI: ffff97605e6c4c60 RDI: ffff97615cdb8438 RBP: 0000000000000000 R08: 0000000000278333 R09: 0000000000000001 R10: ffff97605e6c4600 R11: 0000000000000001 R12: ffff97605e6c4c68 R13: 0000000000020000 R14: 0000000000000001 R15: ffff976064fe2c00 FS: 0000000000000000(0000) GS:ffff9776dfd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000005942c002 CR4: 00000000001706f0 Call Trace: ? __die+0x1f/0x70 ? page_fault_oops+0x15d/0x440 ? search_module_extables+0xe/0x40 ? fixup_exception+0x22/0x2f0 ? exc_page_fault+0x5f/0x100 ? asm_exc_page_fault+0x22/0x30 ? cachefiles_prepare_write+0x30/0xa0 netfs_rreq_write_to_cache_work+0x135/0x2e0 process_one_work+0x137/0x2c0 worker_thread+0x2e9/0x400 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Modules linked in: CR2: 0000000000000008 ---[ end trace 0000000000000000 ]--- This happened because fscache_cookie_state_machine() was slow and was still running while another process invoked fscache_unuse_cookie(); this led to a fscache_cookie_lru_do_one() call, setting the FSCACHE_COOKIE_DO_LRU_DISCARD flag, which was picked up by fscache_cookie_state_machine(), withdrawing the cookie via cachefiles_withdraw_cookie(), clearing cookie->cache_priv. At the same time, yet another process invoked cachefiles_prepare_write(), which found a NULL pointer in this code line: struct cachefiles_object *object = cachefiles_cres_object(cres); The next line crashes, obviously: struct cachefiles_cache *cache = object->volume->cache; During cachefiles_prepare_write(), the "n_accesses" counter is non-zero (via fscache_begin_operation()). The cookie must not be withdrawn until it drops to zero. The counter is checked by fscache_cookie_state_machine() before switching to FSCACHE_COOKIE_STATE_RELINQUISHING and FSCACHE_COOKIE_STATE_WITHDRAWING (in "case FSCACHE_COOKIE_STATE_FAILED"), but not for FSCACHE_COOKIE_STATE_LRU_DISCARDING ("case FSCACHE_COOKIE_STATE_ACTIVE"). This patch adds the missing check. With a non-zero access counter, the function returns and the next fscache_end_cookie_access() call will queue another fscache_cookie_state_machine() call to handle the still-pending FSCACHE_COOKIE_DO_LRU_DISCARD. Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning") Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240729162002.3436763-2-dhowells@redhat.com cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/fscache_cookie.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0..d4d4b3a8b106 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -741,6 +741,10 @@ again_locked: spin_lock(&cookie->lock); } if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) { + if (atomic_read(&cookie->n_accesses) != 0) + /* still being accessed: postpone it */ + break; + __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LRU_DISCARDING); wake = true; From 42b0f8da3acc87953161baeb24f756936eb4d4b2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 31 Jul 2024 07:47:27 +0200 Subject: [PATCH 0788/1523] nsfs: fix ioctl declaration The kernel is writing an object of type __u64, so the ioctl has to be defined to _IOR(NSIO, 0x5, __u64) instead of _IO(NSIO, 0x5). Reported-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20240730164554.GA18486@altlinux.org Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6..5fad3d0fcd70 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include +#include #define NSIO 0xb7 @@ -16,7 +17,7 @@ /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) /* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ From 64a7ce76fb901bf9f9c36cf5d681328fc0fd4b5a Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 31 Jul 2024 12:38:35 +0800 Subject: [PATCH 0789/1523] libfs: fix infinite directory reads for offset dir After we switch tmpfs dir operations from simple_dir_operations to simple_offset_dir_operations, every rename happened will fill new dentry to dest dir's maple tree(&SHMEM_I(inode)->dir_offsets->mt) with a free key starting with octx->newx_offset, and then set newx_offset equals to free key + 1. This will lead to infinite readdir combine with rename happened at the same time, which fail generic/736 in xfstests(detail show as below). 1. create 5000 files(1 2 3...) under one dir 2. call readdir(man 3 readdir) once, and get one entry 3. rename(entry, "TEMPFILE"), then rename("TEMPFILE", entry) 4. loop 2~3, until readdir return nothing or we loop too many times(tmpfs break test with the second condition) We choose the same logic what commit 9b378f6ad48cf ("btrfs: fix infinite directory reads") to fix it, record the last_index when we open dir, and do not emit the entry which index >= last_index. The file->private_data now used in offset dir can use directly to do this, and we also update the last_index when we llseek the dir file. Fixes: a2e459555c5f ("shmem: stable directory offsets") Signed-off-by: yangerkun Link: https://lore.kernel.org/r/20240731043835.1828697-1-yangerkun@huawei.com Reviewed-by: Chuck Lever [brauner: only update last_index after seek when offset is zero like Jan suggested] Signed-off-by: Christian Brauner --- fs/libfs.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 8aa34870449f..02602d00939e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } +static int offset_dir_open(struct inode *inode, struct file *file) +{ + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + + file->private_data = (void *)ctx->next_offset; + return 0; +} + /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx) */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { + struct inode *inode = file->f_inode; + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + switch (whence) { case SEEK_CUR: offset += file->f_pos; @@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) } /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; + if (!offset) + file->private_data = (void *)ctx->next_offset; return vfs_setpos(file, offset, LONG_MAX); } @@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return ERR_PTR(-ENOENT); + return; + + if (dentry2offset(dentry) >= last_index) { + dput(dentry); + return; + } if (!offset_dir_emit(ctx, dentry)) { dput(dentry); - break; + return; } ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; } /** @@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; + long last_index = (long)file->private_data; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + offset_iterate_dir(d_inode(dir), ctx, last_index); return 0; } const struct file_operations simple_offset_dir_operations = { + .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, From 889ced4c9388785952d78d20d338bda2df209bb5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 31 Jul 2024 09:39:02 +0200 Subject: [PATCH 0790/1523] netfs: clean up after renaming FSCACHE_DEBUG config Commit 6b8e61472529 ("netfs: Rename CONFIG_FSCACHE_DEBUG to CONFIG_NETFS_DEBUG") renames the config, but introduces two issues: First, NETFS_DEBUG mistakenly depends on the non-existing config NETFS, whereas the actual intended config is called NETFS_SUPPORT. Second, the config renaming misses to adjust the documentation of the functionality of this config. Clean up those two points. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20240731073902.69262-1-lukas.bulwahn@redhat.com Signed-off-by: Christian Brauner --- Documentation/filesystems/caching/fscache.rst | 8 ++++---- fs/netfs/Kconfig | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst index a74d7b052dc1..de1f32526cc1 100644 --- a/Documentation/filesystems/caching/fscache.rst +++ b/Documentation/filesystems/caching/fscache.rst @@ -318,10 +318,10 @@ where the columns are: Debugging ========= -If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime -debugging enabled by adjusting the value in:: +If CONFIG_NETFS_DEBUG is enabled, the FS-Cache facility and NETFS support can +have runtime debugging enabled by adjusting the value in:: - /sys/module/fscache/parameters/debug + /sys/module/netfs/parameters/debug This is a bitmask of debugging streams to enable: @@ -343,6 +343,6 @@ This is a bitmask of debugging streams to enable: The appropriate set of values should be OR'd together and the result written to the control file. For example:: - echo $((1|8|512)) >/sys/module/fscache/parameters/debug + echo $((1|8|512)) >/sys/module/netfs/parameters/debug will turn on all function entry debugging. diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index 1b78e8b65ebc..7701c037c328 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -24,7 +24,7 @@ config NETFS_STATS config NETFS_DEBUG bool "Enable dynamic debugging netfslib and FS-Cache" - depends on NETFS + depends on NETFS_SUPPORT help This permits debugging to be dynamically enabled in the local caching management module. If this is set, the debugging output may be From 3b5bbe798b2451820e74243b738268f51901e7d0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 31 Jul 2024 12:01:12 +0200 Subject: [PATCH 0791/1523] pidfd: prevent creation of pidfds for kthreads It's currently possible to create pidfds for kthreads but it is unclear what that is supposed to mean. Until we have use-cases for it and we figured out what behavior we want block the creation of pidfds for kthreads. Link: https://lore.kernel.org/r/20240731-gleis-mehreinnahmen-6bbadd128383@brauner Fixes: 32fcb426ec00 ("pid: add pidfd_open()") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- kernel/fork.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f201..18bdc87209d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,11 +2053,24 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - bool thread = flags & PIDFD_THREAD; - - if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + if (!pid) return -EINVAL; + scoped_guard(rcu) { + struct task_struct *tsk; + + if (flags & PIDFD_THREAD) + tsk = pid_task(pid, PIDTYPE_PID); + else + tsk = pid_task(pid, PIDTYPE_TGID); + if (!tsk) + return -EINVAL; + + /* Don't create pidfds for kernel threads for now. */ + if (tsk->flags & PF_KTHREAD) + return -EINVAL; + } + return __pidfd_prepare(pid, flags, ret); } @@ -2403,6 +2416,12 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; + /* Don't create pidfds for kernel threads for now. */ + if (args->kthread) { + retval = -EINVAL; + goto bad_fork_free_pid; + } + /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) From 86509e38a80da34d7800985fa2be183475242c8c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 9 Aug 2024 15:50:35 +0200 Subject: [PATCH 0792/1523] file: fix typo in take_fd() comment The explanatory comment above take_fd() contains a typo, fix that to not confuse readers. Signed-off-by: Mathias Krause Link: https://lore.kernel.org/r/20240809135035.748109-1-minipli@grsecurity.net Signed-off-by: Christian Brauner --- include/linux/file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739..59b146a14dca 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), * * f = dentry_open(&path, O_RDONLY, current_cred()); * if (IS_ERR(f)) - * return PTR_ERR(fd); + * return PTR_ERR(f); * * fd_install(fd, f); * return take_fd(fd); From 8e5ced7804cb9184c4a23f8054551240562a8eda Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2024 17:01:40 +0100 Subject: [PATCH 0793/1523] netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" This reverts commit ae678317b95e760607c7b20b97c9cd4ca9ed6e1a. Revert the patch that removes the deprecated use of PG_private_2 in netfslib for the moment as Ceph is actually still using this to track data copied to the cache. Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag") Reported-by: Max Kellermann Signed-off-by: David Howells cc: Ilya Dryomov cc: Xiubo Li cc: Jeff Layton cc: Matthew Wilcox cc: ceph-devel@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org https: //lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 19 ++++- fs/netfs/buffered_read.c | 8 +- fs/netfs/io.c | 144 +++++++++++++++++++++++++++++++++++ include/trace/events/netfs.h | 1 + 4 files changed, 170 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c16bc5250ef..73b5a07bf94d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -498,6 +498,11 @@ const struct netfs_request_ops ceph_netfs_ops = { }; #ifdef CONFIG_CEPH_FSCACHE +static void ceph_set_page_fscache(struct page *page) +{ + folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ +} + static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; @@ -515,6 +520,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b ceph_fscache_write_terminated, inode, true, caching); } #else +static inline void ceph_set_page_fscache(struct page *page) +{ +} + static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } @@ -706,6 +715,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = wlen; set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { @@ -789,6 +800,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ + err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 @@ -1062,7 +1075,8 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page)) { + if (PageWriteback(page) || + PagePrivate2(page) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", page); unlock_page(page); @@ -1070,6 +1084,7 @@ get_more_pages: } doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { @@ -1254,6 +1269,8 @@ new_request: } set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a688d4c75d99..424048f9ed1f 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -466,7 +466,7 @@ retry: if (!netfs_is_cache_enabled(ctx) && netfs_skip_folio_read(folio, pos, len, false)) { netfs_stat(&netfs_n_rh_write_zskip); - goto have_folio; + goto have_folio_no_wait; } rreq = netfs_alloc_request(mapping, file, @@ -507,6 +507,12 @@ retry: netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: + if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) { + ret = folio_wait_private_2_killable(folio); + if (ret < 0) + goto error; + } +have_folio_no_wait: *_folio = folio; _leave(" = 0"); return 0; diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b98368..c179a1c73fa7 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -98,6 +98,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete); } +/* + * [DEPRECATED] Deal with the completion of writing the data to the cache. We + * have to clear the PG_fscache bits on the folios involved and release the + * caller's ref. + * + * May be called in softirq mode and we inherit a ref from the caller. + */ +static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, + bool was_async) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t unlocked = 0; + bool have_unlocked = false; + + rcu_read_lock(); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); + + xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + + /* We might have multiple writes from the same huge + * folio, but we mustn't unlock a folio more than once. + */ + if (have_unlocked && folio->index <= unlocked) + continue; + unlocked = folio_next_index(folio) - 1; + trace_netfs_folio(folio, netfs_folio_trace_end_copy); + folio_end_private_2(folio); + have_unlocked = true; + } + } + + rcu_read_unlock(); + netfs_rreq_completed(rreq, was_async); +} + +static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) /* [DEPRECATED] */ +{ + struct netfs_io_subrequest *subreq = priv; + struct netfs_io_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) { + netfs_stat(&netfs_n_rh_write_failed); + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_copy_to_cache); + } else { + netfs_stat(&netfs_n_rh_write_done); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); + + /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, was_async); + + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); +} + +/* + * [DEPRECATED] Perform any outstanding writes to the cache. We inherit a ref + * from the caller. + */ +static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct netfs_io_subrequest *subreq, *next, *p; + struct iov_iter iter; + int ret; + + trace_netfs_rreq(rreq, netfs_rreq_trace_copy); + + /* We don't want terminating writes trying to wake us up whilst we're + * still going through the list. + */ + atomic_inc(&rreq->nr_copy_ops); + + list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { + list_del_init(&subreq->rreq_link); + netfs_put_subrequest(subreq, false, + netfs_sreq_trace_put_no_copy); + } + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + /* Amalgamate adjacent writes */ + while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + next = list_next_entry(subreq, rreq_link); + if (next->start != subreq->start + subreq->len) + break; + subreq->len += next->len; + list_del_init(&next->rreq_link); + netfs_put_subrequest(next, false, + netfs_sreq_trace_put_merged); + } + + ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, + subreq->len, rreq->i_size, true); + if (ret < 0) { + trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); + trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); + continue; + } + + iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, + subreq->start, subreq->len); + + atomic_inc(&rreq->nr_copy_ops); + netfs_stat(&netfs_n_rh_write); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); + trace_netfs_sreq(subreq, netfs_sreq_trace_write); + cres->ops->write(cres, subreq->start, &iter, + netfs_rreq_copy_terminated, subreq); + } + + /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, false); +} + +static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */ +{ + struct netfs_io_request *rreq = + container_of(work, struct netfs_io_request, work); + + netfs_rreq_do_write_to_cache(rreq); +} + +static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */ +{ + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); +} + /* * Handle a short read. */ @@ -275,6 +415,10 @@ again: clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) && + test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) + return netfs_rreq_write_to_cache(rreq); + netfs_rreq_completed(rreq, was_async); } diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index da23484268df..24ec3434d32e 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -145,6 +145,7 @@ EM(netfs_folio_trace_clear_g, "clear-g") \ EM(netfs_folio_trace_clear_s, "clear-s") \ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ From 7b589a9b45ae32aa9d7bece597490e141198d7a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Aug 2024 19:38:46 +0100 Subject: [PATCH 0794/1523] netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used correctly. The problem is that we try to set them up in the request initialisation, but we the cache may be in the process of setting up still, and so the state may not be correct. Further, we secondarily sample the cache state and make contradictory decisions later. The issue arises because we set up the cache resources, which allows the cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which triggers cache writing even if we didn't set the flags when allocating. Fix this in the following way: (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in ->init_request() rather than trying to juggle that in netfs_alloc_request(). (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is to be done, then PG_private_2 is to be used rather than only setting it if we decide to cache and then having netfs_rreq_unlock_folios() set the non-PG_private_2 writeback-to-cache if it wasn't set. (3) Split netfs_rreq_unlock_folios() into two functions, one of which contains the deprecated code for using PG_private_2 to avoid accidentally doing the writeback path - and always use it if USE_PGPRIV2 is set. (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always wait for PG_private_2. This function is deprecated and only used by ceph anyway, and so label it so. (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use fscache_operation_valid() on the cache_resources instead. This has the advantage of picking up the result of netfs_begin_cache_read() and fscache_begin_write_operation() - which are called after the object is initialised and will wait for the cache to come to a usable state. Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be applied on top of that. Without this as well, things like: rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { and: WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386 may happen, along with some UAFs due to PG_private_2 not getting used to wait on writeback completion. Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty") Reported-by: Max Kellermann Signed-off-by: David Howells cc: Ilya Dryomov cc: Xiubo Li cc: Hristo Venev cc: Jeff Layton cc: Matthew Wilcox cc: ceph-devel@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1] Link: https://lore.kernel.org/r/1173209.1723152682@warthog.procyon.org.uk Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 3 + fs/ceph/inode.c | 2 - fs/netfs/buffered_read.c | 125 ++++++++++++++++++++++++++++++----- fs/netfs/objects.c | 10 --- fs/netfs/write_issue.c | 4 +- fs/nfs/fscache.c | 2 + fs/nfs/fscache.h | 2 - include/linux/netfs.h | 3 - include/trace/events/netfs.h | 1 + 9 files changed, 116 insertions(+), 36 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 73b5a07bf94d..cc0a2240de98 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -424,6 +424,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) struct ceph_netfs_request_data *priv; int ret = 0; + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + if (rreq->origin != NETFS_READAHEAD) return 0; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8f8de8f33abb..71cd70514efa 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); spin_lock_init(&ci->i_ceph_lock); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 424048f9ed1f..27c750d39476 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -9,6 +9,97 @@ #include #include "internal.h" +/* + * [DEPRECATED] Unlock the folios in a read operation for when the filesystem + * is using PG_private_2 and direct writing to the cache from here rather than + * marking the page for writeback. + * + * Note that we don't touch folio->private in this code. + */ +static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, + size_t *account) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + /* Walk through the pagecache and the I/O request lists simultaneously. + * We may have a mixture of cached and uncached sections and we only + * really want to write out the uncached sections. This is slightly + * complicated by the possibility that we might have huge pages with a + * mixture inside. + */ + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); + + rcu_read_lock(); + xas_for_each(&xas, folio, last_page) { + loff_t pg_end; + bool pg_failed = false; + bool folio_started = false; + + if (xas_retry(&xas, folio)) + continue; + + pg_end = folio_pos(folio) + folio_size(folio) - 1; + + for (;;) { + loff_t sreq_end; + + if (!subreq) { + pg_failed = true; + break; + } + + if (!folio_started && + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && + fscache_operation_valid(&rreq->cache_resources)) { + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); + folio_start_private_2(folio); + folio_started = true; + } + + pg_failed |= subreq_failed; + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) + break; + + *account += subreq->transferred; + if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + + if (pg_end == sreq_end) + break; + } + + if (!pg_failed) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) + _debug("no unlock"); + else + folio_unlock(folio); + } + } + rcu_read_unlock(); +} + /* * Unlock the folios in a read operation. We need to set PG_writeback on any * folios we're going to write back before we unlock them. @@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } } + /* Handle deprecated PG_private_2 case. */ + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { + netfs_rreq_unlock_folios_pgpriv2(rreq, &account); + goto out; + } + /* Walk through the pagecache and the I/O request lists simultaneously. * We may have a mixture of cached and uncached sections and we only * really want to write out the uncached sections. This is slightly @@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) loff_t pg_end; bool pg_failed = false; bool wback_to_cache = false; - bool folio_started = false; if (xas_retry(&xas, folio)) continue; @@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, - &subreq->flags)) { - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); - folio_start_private_2(folio); - folio_started = true; - } - } else { - wback_to_cache |= - test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); - } + + wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) @@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } rcu_read_unlock(); +out: task_io_account_read(account); if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -395,7 +483,7 @@ zero_out: } /** - * netfs_write_begin - Helper to prepare for writing + * netfs_write_begin - Helper to prepare for writing [DEPRECATED] * @ctx: The netfs context * @file: The file to read from * @mapping: The mapping to read from @@ -426,6 +514,9 @@ zero_out: * inode before calling this. * * This is usable whether or not caching is enabled. + * + * Note that this should be considered deprecated and netfs_perform_write() + * used instead. */ int netfs_write_begin(struct netfs_inode *ctx, struct file *file, struct address_space *mapping, @@ -507,11 +598,9 @@ retry: netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: - if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) { - ret = folio_wait_private_2_killable(folio); - if (ret < 0) - goto error; - } + ret = folio_wait_private_2_killable(folio); + if (ret < 0) + goto error; have_folio_no_wait: *_folio = folio; _leave(" = 0"); diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index f4a642727479..0294df70c3ff 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct netfs_io_request *rreq; mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; struct kmem_cache *cache = mempool->pool_data; - bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || - origin == NETFS_DIO_READ || - origin == NETFS_DIO_WRITE); - bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); int ret; for (;;) { @@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, refcount_set(&rreq->ref, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (cached) { - __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); - if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) - /* Filesystem uses deprecated PG_private_2 marking. */ - __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); - } if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 9258d30cffe3..3f7e37e50c7d 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, { struct netfs_io_request *wreq; struct netfs_inode *ictx; + bool is_buffered = (origin == NETFS_WRITEBACK || + origin == NETFS_WRITETHROUGH); wreq = netfs_alloc_request(mapping, file, start, 0, origin); if (IS_ERR(wreq)) @@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, _enter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) + if (is_buffered && netfs_is_cache_enabled(ictx)) fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); wreq->contiguity = wreq->start; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7202ce84d0eb..bf29a65c5027 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi { rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); return 0; } diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index fbed0027996f..e8adae1bc260 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d0288938cc2..983816608f15 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,8 +73,6 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark - * write to cache on read */ }; /* @@ -269,7 +267,6 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 24ec3434d32e..606b4a0f92da 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -51,6 +51,7 @@ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ From 6b9935da2a6b2a72774c15c844ae201a3fc362ac Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 10 Aug 2024 13:27:00 +0900 Subject: [PATCH 0795/1523] scsi: mpi3mr: Add missing spin_lock_init() for mrioc->trigger_lock Commit fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") added the spinlock trigger_lock to the struct mpi3mr_ioc. However, spin_lock_init() call was not added for it, then the lock does not work as expected. Also, the kernel reports the message below when lockdep is enabled. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? To fix the issue and to avoid the INFO message, add the missing spin_lock_init() call. Fixes: fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240810042701.661841-2-shinichiro.kawasaki@wdc.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index ca8f132e03ae..616894571c6a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5234,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); From fdad456cbcca739bae1849549c7a999857c56f88 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 28 Jul 2024 19:46:11 +0800 Subject: [PATCH 0796/1523] bpf: Fix updating attached freplace prog in prog_array map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") fixed a NULL pointer dereference panic, but didn't fix the issue that fails to update attached freplace prog to prog_array map. Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"), freplace prog and its target prog are able to tail call each other. And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach") sets prog->aux->dst_prog as NULL after attaching freplace prog to its target prog. After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS. Then, after attaching freplace its prog->aux->dst_prog is NULL. Then, while updating freplace in prog_array the bpf_prog_map_compatible() incorrectly returns false because resolve_prog_type() returns BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS. After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS and update to prog_array can succeed. Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") Cc: Toke Høiland-Jørgensen Cc: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6503c85b10a3..7b776dae36e5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,8 +856,8 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? - prog->aux->dst_prog->type : prog->type; + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? + prog->aux->saved_dst_prog_type : prog->type; } static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) From 21ff3a16e92e2fa4f906a61d148aca1423c58298 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 12 Aug 2024 19:11:17 +0530 Subject: [PATCH 0797/1523] drm/xe/xe2hpg: Add Wa_14021821874 Wa_14021821874 applies to xe2_hpg V2(Himal): - Use space after define Cc: Matt Roper Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240812134117.813670-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 2c8c4d4218db..5f33dee35f8a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -451,6 +451,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 564e32e44e3b..28b7f95b6c2f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -542,6 +542,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, /* Xe2_LPM */ From 6c17ea1f3eaa330d445ac14a9428402ce4e3055e Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 31 Jul 2024 08:31:12 +0530 Subject: [PATCH 0798/1523] cpu/SMT: Enable SMT only if a core is online If a core is offline then enabling SMT should not online CPUs of this core. By enabling SMT, what is intended is either changing the SMT value from "off" to "on" or setting the SMT level (threads per core) from a lower to higher value. On PowerPC the ppc64_cpu utility can be used, among other things, to perform the following functions: ppc64_cpu --cores-on # Get the number of online cores ppc64_cpu --cores-on=X # Put exactly X cores online ppc64_cpu --offline-cores=X[,Y,...] # Put specified cores offline ppc64_cpu --smt={on|off|value} # Enable, disable or change SMT level If the user has decided to offline certain cores, enabling SMT should not online CPUs in those cores. This patch fixes the issue and changes the behaviour as described, by introducing an arch specific function topology_is_core_online(). It is currently implemented only for PowerPC. Fixes: 73c58e7e1412 ("powerpc: Add HOTPLUG_SMT support") Reported-by: Tyrel Datwyler Closes: https://groups.google.com/g/powerpc-utils-devel/c/wrwVzAAnRlI/m/5KJSoqP4BAAJ Signed-off-by: Nysal Jan K.A Reviewed-by: Shrikanth Hegde Reviewed-by: Thomas Gleixner Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731030126.956210-2-nysal@linux.ibm.com --- Documentation/ABI/testing/sysfs-devices-system-cpu | 3 ++- kernel/cpu.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71..de725ca3be82 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026..b1fd2a3db91a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) From 227bbaabe64b6f9cd98aa051454c1d4a194a8c6a Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 31 Jul 2024 08:31:13 +0530 Subject: [PATCH 0799/1523] powerpc/topology: Check if a core is online topology_is_core_online() checks if the core a CPU belongs to is online. The core is online if at least one of the sibling CPUs is online. The first CPU of an online core is also online in the common case, so this should be fairly quick. Fixes: 73c58e7e1412 ("powerpc: Add HOTPLUG_SMT support") Signed-off-by: Nysal Jan K.A Reviewed-by: Shrikanth Hegde Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731030126.956210-3-nysal@linux.ibm.com --- arch/powerpc/include/asm/topology.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b7..16bacfe8c7a2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include +#include #include static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ From 8c6b808c8c2a9de21503944bd6308979410fd812 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 10 Aug 2024 13:27:01 +0900 Subject: [PATCH 0800/1523] scsi: mpi3mr: Avoid MAX_PAGE_ORDER WARNING for buffer allocations Commit fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") added mpi3mr_alloc_diag_bufs() which calls dma_alloc_coherent() to allocate the trace buffer and the firmware buffer. mpi3mr_alloc_diag_bufs() decides the buffer sizes from the driver configuration. In my environment, the sizes are 8MB. With the sizes, dma_alloc_coherent() fails and report this WARNING: WARNING: CPU: 4 PID: 438 at mm/page_alloc.c:4676 __alloc_pages_noprof+0x52f/0x640 The WARNING indicates that the order of the allocation size is larger than MAX_PAGE_ORDER. After this failure, mpi3mr_alloc_diag_bufs() reduces the buffer sizes and retries dma_alloc_coherent(). In the end, the buffer allocations succeed with 4MB size in my environment, which corresponds to MAX_PAGE_ORDER=10. Though the allocations succeed, the WARNING message is misleading and should be avoided. To avoid the WARNING, check the orders of the buffer allocation sizes before calling dma_alloc_coherent(). If the orders are larger than MAX_PAGE_ORDER, fall back to the retry path. Fixes: fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240810042701.661841-3-shinichiro.kawasaki@wdc.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36..01f035f9330e 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", From bed2eb964c70b780fb55925892a74f26cb590b25 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 12 Aug 2024 14:48:47 -0700 Subject: [PATCH 0801/1523] bpf: Fix a kernel verifier crash in stacksafe() Daniel Hodges reported a kernel verifier crash when playing with sched-ext. Further investigation shows that the crash is due to invalid memory access in stacksafe(). More specifically, it is the following code: if (exact != NOT_EXACT && old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) return false; The 'i' iterates old->allocated_stack. If cur->allocated_stack < old->allocated_stack the out-of-bound access will happen. To fix the issue add 'i >= cur->allocated_stack' check such that if the condition is true, stacksafe() should fail. Otherwise, cur->stack[spi].slot_type[i % BPF_REG_SIZE] memory access is legal. Fixes: 2793a8b015f7 ("bpf: exact states comparison for iterator convergence checks") Cc: Eduard Zingerman Reported-by: Daniel Hodges Acked-by: Eduard Zingerman Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240812214847.213612-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4cb5441ad75f..d8520095ca03 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16884,8 +16884,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; if (exact != NOT_EXACT && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) From 662c3e2db00f92e50c26e9dc4fe47c52223d9982 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 12 Aug 2024 14:48:52 -0700 Subject: [PATCH 0802/1523] selftests/bpf: Add a test to verify previous stacksafe() fix A selftest is added such that without the previous patch, a crash can happen. With the previous patch, the test can run successfully. The new test is written in a way which mimics original crash case: main_prog static_prog_1 static_prog_2 where static_prog_1 has different paths to static_prog_2 and some path has stack allocated and some other path does not. A stacksafe() checking in static_prog_2() triggered the crash. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240812214852.214037-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 16bdc3e25591..ef70b88bccb2 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx) return sum; } +__u32 upper, select_n, result; +__u64 global; + +static __noinline bool nest_2(char *str) +{ + /* some insns (including branch insns) to ensure stacksafe() is triggered + * in nest_2(). This way, stacksafe() can compare frame associated with nest_1(). + */ + if (str[0] == 't') + return true; + if (str[1] == 'e') + return true; + if (str[2] == 's') + return true; + if (str[3] == 't') + return true; + return false; +} + +static __noinline bool nest_1(int n) +{ + /* case 0: allocate stack, case 1: no allocate stack */ + switch (n) { + case 0: { + char comm[16]; + + if (bpf_get_current_comm(comm, 16)) + return false; + return nest_2(comm); + } + case 1: + return nest_2((char *)&global); + default: + return false; + } +} + +SEC("raw_tp") +__success +int iter_subprog_check_stacksafe(const void *ctx) +{ + long i; + + bpf_for(i, 0, upper) { + if (!nest_1(select_n)) { + result = 1; + return 0; + } + } + + result = 2; + return 0; +} + char _license[] SEC("license") = "GPL"; From 6e1918ff680527ce4be77426aa537012b5aa997c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 7 Aug 2024 21:00:21 -0700 Subject: [PATCH 0803/1523] net: macb: Use rcu_dereference() for idev->ifa_list in macb_suspend(). In macb_suspend(), idev->ifa_list is fetched with rcu_access_pointer() and later the pointer is dereferenced as ifa->ifa_local. So, idev->ifa_list must be fetched with rcu_dereference(). Fixes: 0cb8de39a776 ("net: macb: Add ARP support to WOL") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240808040021.6971-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11665be3a22c..dcd3f54ed0cf 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5250,8 +5250,8 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wol & MACB_WOL_ENABLED) { /* Check for IP address in WOL ARP mode */ idev = __in_dev_get_rcu(bp->dev); - if (idev && idev->ifa_list) - ifa = rcu_access_pointer(idev->ifa_list); + if (idev) + ifa = rcu_dereference(idev->ifa_list); if ((bp->wolopts & WAKE_ARP) && !ifa) { netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); return -EOPNOTSUPP; From 046667c4d3196938e992fba0dfcde570aa85cd0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Jul 2024 14:45:08 -0400 Subject: [PATCH 0804/1523] memcg_write_event_control(): fix a user-triggerable oops we are *not* guaranteed that anything past the terminating NUL is mapped (let alone initialized with anything sane). Fixes: 0dea116876ee ("cgroup: implement eventfd-based generic API for notifications") Cc: stable@vger.kernel.org Cc: Andrew Morton Acked-by: Michal Hocko Signed-off-by: Al Viro --- mm/memcontrol-v1.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e..417c96f2da28 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) From 3beddef84d90590270465a907de1cfe2539ac70d Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 13 Aug 2024 12:37:48 +0800 Subject: [PATCH 0805/1523] ALSA: hda/tas2781: fix wrong calibrated data order Wrong calibration data order cause sound too low in some device. Fix wrong calibrated data order, add calibration data converssion by get_unaligned_be32() after reading from UEFI. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Cc: Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20240813043749.108-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 49bd7097d892..7dbfc92d9d55 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -2,10 +2,12 @@ // // TAS2781 HDA I2C driver // -// Copyright 2023 Texas Instruments, Inc. +// Copyright 2023 - 2024 Texas Instruments, Inc. // // Author: Shenghao Ding +// Current maintainer: Baojun Xu +#include #include #include #include @@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) static const unsigned char rgno_array[CALIB_MAX] = { 0x74, 0x0c, 0x14, 0x70, 0x7c, }; - unsigned char *data; + int offset = 0; int i, j, rc; + __be32 data; for (i = 0; i < tas_priv->ndev; i++) { - data = tas_priv->cali_data.data + - i * TASDEVICE_SPEAKER_CALIBRATION_SIZE; for (j = 0; j < CALIB_MAX; j++) { + data = get_unaligned_be32( + &tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), - &(data[4 * j]), 4); + (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); + offset += 4; } } } From db639278e6217173c21bf8bd52eff2e9a0d6919e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:34:15 +0300 Subject: [PATCH 0806/1523] drm/i915: use pdev_to_i915() instead of pci_get_drvdata() directly We have a helper for converting pci device to i915 device, use it. v2: Also convert i915_pci_probe() (Gustavo) Reviewed-by: Gustavo Sousa Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240812103415.1540096-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ce4dfd65fafa..94c89ca2ef30 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -880,7 +880,7 @@ static void i915_pci_remove(struct pci_dev *pdev) { struct drm_i915_private *i915; - i915 = pci_get_drvdata(pdev); + i915 = pdev_to_i915(pdev); if (!i915) /* driver load aborted, nothing to cleanup */ return; @@ -1003,7 +1003,7 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (i915_inject_probe_failure(pci_get_drvdata(pdev))) { + if (i915_inject_probe_failure(pdev_to_i915(pdev))) { i915_pci_remove(pdev); return -ENODEV; } @@ -1025,7 +1025,7 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static void i915_pci_shutdown(struct pci_dev *pdev) { - struct drm_i915_private *i915 = pci_get_drvdata(pdev); + struct drm_i915_private *i915 = pdev_to_i915(pdev); i915_driver_shutdown(i915); } From d2dfed310aae0739dc87b68c660357e6a4f29819 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Tue, 6 Aug 2024 11:46:03 +1200 Subject: [PATCH 0807/1523] platform/x86: asus-wmi: Add quirk for ROG Ally X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new ROG Ally X functions the same as the previus model so we can use the same method to ensure the MCU USB devices wake and reconnect correctly. Given that two devices marks the start of a trend, this patch also adds a quirk table to make future additions easier if the MCU is the same. Signed-off-by: Luke D. Jones Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240805234603.38736-1-luke@ljones.dev Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b..37636e5a38e3 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; From 9c8e022567bbec53bee8ae75c44b3d6cd2080d42 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:44 +0200 Subject: [PATCH 0808/1523] platform/surface: aggregator_registry: Add Support for Surface Pro 10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Pro 10. It seems to use the same SAM client devices as the Surface Pro 9, so re-use its node group. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-2-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9..fa5b896e5f4e 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -324,7 +324,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +365,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, From ed235163c3f02329d5e37ed4485bbc39ed2568d4 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:45 +0200 Subject: [PATCH 0809/1523] platform/surface: aggregator_registry: Add support for Surface Laptop Go 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Go 3. It seems to use the same SAM client devices as the Surface Laptop Go 1 and 2, so re-use their node group. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-3-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index fa5b896e5f4e..4d36810c2308 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -398,6 +398,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Studio */ { "MSHW0123", (unsigned long)ssam_node_group_sls }, From 28d04b4a2cc20981c95787f9c449e6fc51d904f9 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:46 +0200 Subject: [PATCH 0810/1523] platform/surface: aggregator_registry: Add support for Surface Laptop Studio 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Studio 2 (SLS2). The SLS2 is quite similar to the SLS1, but it does not provide the touchpad as a SAM-HID device. Therefore, add a new node group for the SLS2 and update the comments accordingly. In addition, it uses the new fan control interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-4-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../surface/surface_aggregator_registry.c | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 4d36810c2308..892ba9549f6a 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -273,8 +273,8 @@ static const struct software_node *ssam_node_group_sl5[] = { NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +289,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -401,8 +417,11 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop Go 3 */ { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; From 002adda09bc1c983c75c82a7e12285c7423aec31 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:47 +0200 Subject: [PATCH 0811/1523] platform/surface: aggregator_registry: Add fan and thermal sensor support for Surface Laptop 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EC on the Surface Laptop 5 exposes the fan interface. With the recently introduced driver for it, we can now also enable it here. In addition, also enable the thermal sensor interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-5-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 892ba9549f6a..4d3f5b3111ba 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,7 +265,9 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, From 99ae7b9ba047ad029a0a23b2bd51608ce79c8e97 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:48 +0200 Subject: [PATCH 0812/1523] platform/surface: aggregator_registry: Add support for Surface Laptop 6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Studio 6 (SL6). The SL6 is similar to the SL5, with the typical battery/AC, platform profile, and HID nodes. It also has support for the newly supported fan interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-6-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../surface/surface_aggregator_registry.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 4d3f5b3111ba..a23dff35f8ca 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -275,6 +275,22 @@ static const struct software_node *ssam_node_group_sl5[] = { NULL, }; +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Studio 1. */ static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, @@ -410,6 +426,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, From ccbde4b128ef9c73d14d0d7817d68ef795f6d131 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Thu, 1 Aug 2024 15:11:26 +0300 Subject: [PATCH 0813/1523] char: xillybus: Don't destroy workqueue from work item running on it Triggered by a kref decrement, destroy_workqueue() may be called from within a work item for destroying its own workqueue. This illegal situation is averted by adding a module-global workqueue for exclusive use of the offending work item. Other work items continue to be queued on per-device workqueues to ensure performance. Reported-by: syzbot+91dbdfecdd3287734d8e@syzkaller.appspotmail.com Cc: stable Closes: https://lore.kernel.org/lkml/0000000000000ab25a061e1dfe9f@google.com/ Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240801121126.60183-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8c..33ca0f4af390 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -2258,6 +2255,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,11 +2266,16 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { + destroy_workqueue(wakeup_wq); + usb_deregister(&xillyusb_driver); } From b8cdc47adf059f60a39555eeba1db92c503a5061 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 00:03:47 +0200 Subject: [PATCH 0814/1523] drm/xe/migrate: Parameterize ccs and bo data clear in xe_migrate_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameterize clearing ccs and bo data in xe_migrate_clear() which higher layers can utilize. This patch will be used later on when doing bo data clear for igfx as well. v2: Replace multiple params with flags in xe_migrate_clear (Matt B) v3: s/CLEAR_BO_DATA_FLAG_*/XE_MIGRATE_CLEAR_FLAG_* and move to xe_migrate.h. other nits(Matt B) Cc: Himal Prasad Ghimiray Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Akshata Jahagirdar Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240809220347.25330-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/tests/xe_bo.c | 3 ++- drivers/gpu/drm/xe/tests/xe_migrate.c | 12 ++++++++---- drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++++-- drivers/gpu/drm/xe/xe_migrate.c | 27 +++++++++++++++++++-------- drivers/gpu/drm/xe/xe_migrate.h | 7 ++++++- 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 1768483da1b7..df9fd907edd4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4344a1724029..47ae9d0b8864 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - fence = xe_migrate_clear(m, remote, remote->ttm.resource); + fence = xe_migrate_clear(m, remote, remote->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : "Clearing remote small bo", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); @@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); expected = 0; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); expected = 0; - fence = xe_migrate_clear(m, big, big->ttm.resource); + fence = xe_migrate_clear(m, big, big->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; @@ -603,7 +606,8 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Clear vram buffer object\n"); expected = 0x0000000000000000; - fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) return; dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3295bc92d7aa..56a089aa3916 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -793,8 +793,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } } else { - if (move_lacks_source) - fence = xe_migrate_clear(migrate, bo, new_mem); + if (move_lacks_source) { + u32 flags = 0; + + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; + + fence = xe_migrate_clear(migrate, bo, new_mem, flags); + } else fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, handle_system_ccs); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6f24aaf58252..a2d0ce3c59bf 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1037,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. * - * Clear the contents of @dst to zero. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destinations. + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -1048,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst) + struct ttm_resource *dst, + u32 clear_flags) { bool clear_vram = mem_type_is_vram(dst->mem_type); + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; + if (WARN_ON(!clear_bo_data && !clear_ccs)) + return NULL; + + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) + clear_only_system_ccs = true; + if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else @@ -1085,7 +1096,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, avail_pts); if (xe_migrate_needs_ccs_emit(xe)) @@ -1107,13 +1118,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - if (!clear_system_ccs) + if (clear_bo_data) emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); if (xe_migrate_needs_ccs_emit(xe)) { @@ -1172,7 +1183,7 @@ err_sync: return ERR_PTR(err); } - if (clear_system_ccs) + if (clear_ccs) bo->ccs_cleared = true; return fence; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 453e0ecf5034..7929cc2425e8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -102,9 +102,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) +#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) +#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst); + struct ttm_resource *dst, + u32 clear_flags); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); From bc923d594db21bee0ead128eb4bb78f7e77467a4 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 14:46:44 +0200 Subject: [PATCH 0815/1523] platform/surface: aggregator: Fix warning when controller is destroyed in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a small window in ssam_serial_hub_probe() where the controller is initialized but has not been started yet. Specifically, between ssam_controller_init() and ssam_controller_start(). Any failure in this window, for example caused by a failure of serdev_device_open(), currently results in an incorrect warning being emitted. In particular, any failure in this window results in the controller being destroyed via ssam_controller_destroy(). This function checks the state of the controller and, in an attempt to validate that the controller has been cleanly shut down before we try and deallocate any resources, emits a warning if that state is not SSAM_CONTROLLER_STOPPED. However, since we have only just initialized the controller and have not yet started it, its state is SSAM_CONTROLLER_INITIALIZED. Note that this is the only point at which the controller has this state, as it will change after we start the controller with ssam_controller_start() and never revert back. Further, at this point no communication has taken place and the sender and receiver threads have not been started yet (and we may not even have an open serdev device either). Therefore, it is perfectly safe to call ssam_controller_destroy() with a state of SSAM_CONTROLLER_INITIALIZED. This, however, means that the warning currently being emitted is incorrect. Fix it by extending the check. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811124645.246016-1-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/aggregator/controller.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487..7e89f547999b 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous From dcdb52d948f3a17ccd3fce757d9bd981d7c32039 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2024 15:44:07 +0300 Subject: [PATCH 0816/1523] usb: xhci: Check for xhci->interrupters being allocated in xhci_mem_clearup() If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop up the damage. If it fails early enough, before xhci->interrupters is allocated but after xhci->max_interrupters has been set, which happens in most (all?) cases, things get uglier, as xhci_mem_cleanup() unconditionally derefences xhci->interrupters. With prejudice. Gate the interrupt freeing loop with a check on xhci->interrupters being non-NULL. Found while debugging a DMA allocation issue that led the XHCI driver on this exact path. Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Cc: Mathias Nyman Cc: Wesley Cheng Cc: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org # 6.8+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240809124408.505786-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475daf..937ce5fd5809 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); From 741b41b48faf41c0bcf3c26fbb3448b0fda4fc5d Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Fri, 9 Aug 2024 15:44:08 +0300 Subject: [PATCH 0817/1523] usb: xhci: fix duplicate stall handling in handle_tx_event() Stall handling is managed in the 'process_*' functions, which are called right before the 'goto' stall handling code snippet. Thus, there should be a return after the 'process_*' functions. Otherwise, the stall code may run twice. Fixes: 1b349f214ac7 ("usb: xhci: add 'goto' for halted endpoint check in handle_tx_event()") Reported-by: Michal Pecio Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240809124408.505786-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059..4ea2c3e072a9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) From d209d1634e6562eafc369b28f8a1f67a2e9e5222 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 9 Aug 2024 18:03:43 +0300 Subject: [PATCH 0818/1523] usb: typec: ucsi: Fix the return value of ucsi_run_command() The command execution routines need to return the amount of data that was transferred when succesful. This fixes an issue where the alternate modes and the power delivery capabilities are not getting registered. Fixes: 5e9c1662a89b ("usb: typec: ucsi: rework command execution functions") Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240809150343.286942-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 432a2d6266d7..4039851551c1 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) From 21ea1ce37fc267dc45fe27517bbde926211683df Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 9 Aug 2024 19:29:01 +0800 Subject: [PATCH 0819/1523] Revert "usb: typec: tcpm: clear pd_event queue in PORT_RESET" This reverts commit bf20c69cf3cf9c6445c4925dd9a8a6ca1b78bfdf. During tcpm_init() stage, if the VBUS is still present after tcpm_reset_port(), then we assume that VBUS will off and goto safe0v after a specific discharge time. Following a TCPM_VBUS_EVENT event if VBUS reach to off state. TCPM_VBUS_EVENT event may be set during PORT_RESET handling stage. If pd_events reset to 0 after TCPM_VBUS_EVENT set, we will lost this VBUS event. Then the port state machine may stuck at one state. Before: [ 2.570172] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev1 NONE_AMS] [ 2.570179] state change PORT_RESET -> PORT_RESET_WAIT_OFF [delayed 100 ms] [ 2.570182] pending state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED @ 920 ms [rev1 NONE_AMS] [ 3.490213] state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED [delayed 920 ms] [ 3.490220] Start toggling [ 3.546050] CC1: 0 -> 0, CC2: 0 -> 2 [state TOGGLING, polarity 0, connected] [ 3.546057] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] After revert this patch, we can see VBUS off event and the port will goto expected state. [ 2.441992] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev1 NONE_AMS] [ 2.441999] state change PORT_RESET -> PORT_RESET_WAIT_OFF [delayed 100 ms] [ 2.442002] pending state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED @ 920 ms [rev1 NONE_AMS] [ 2.442122] VBUS off [ 2.442125] state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED [rev1 NONE_AMS] [ 2.442127] VBUS VSAFE0V [ 2.442351] CC1: 0 -> 0, CC2: 0 -> 0 [state SNK_UNATTACHED, polarity 0, disconnected] [ 2.442357] Start toggling [ 2.491850] CC1: 0 -> 0, CC2: 0 -> 2 [state TOGGLING, polarity 0, connected] [ 2.491858] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] [ 2.491863] pending state change SRC_ATTACH_WAIT -> SNK_TRY @ 200 ms [rev1 NONE_AMS] [ 2.691905] state change SRC_ATTACH_WAIT -> SNK_TRY [delayed 200 ms] Fixes: bf20c69cf3cf ("usb: typec: tcpm: clear pd_event queue in PORT_RESET") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240809112901.535072-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index cce39818e99a..4b02d6474259 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else From 3ed486e383ccee9b0c8d727608f12a937c6603ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Aug 2024 11:50:38 +0200 Subject: [PATCH 0820/1523] usb: misc: ljca: Add Lunar Lake ljca GPIO HID to ljca_gpio_hids[] Add LJCA GPIO support for the Lunar Lake platform. New HID taken from out of tree ivsc-driver git repo. Link: https://github.com/intel/ivsc-driver/commit/47e7c4a446c8ea8c741ff5a32fa7b19f9e6fd47e Cc: stable Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240812095038.555837-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb-ljca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be306..1a8d5e80b9ae 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; From 92567a5f92bc947fb7aa4351979db1b7b71a554c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 8 Aug 2024 22:06:19 +0800 Subject: [PATCH 0821/1523] iommu: Remove unused declaration iommu_sva_unbind_gpasid() Commit 0c9f17877891 ("iommu: Remove guest pasid related interfaces and definitions") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240808140619.2498535-1-yuehaibing@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311..04cbdae0052e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, From dc98d76a15bc29a9a4e76f2f65f39f3e590fb15c Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 8 Aug 2024 22:03:25 +0800 Subject: [PATCH 0822/1523] tty: serial: fsl_lpuart: mark last busy before uart_add_one_port With "earlycon initcall_debug=1 loglevel=8" in bootargs, kernel sometimes boot hang. It is because normal console still is not ready, but runtime suspend is called, so early console putchar will hang in waiting TRDE set in UARTSTAT. The lpuart driver has auto suspend delay set to 3000ms, but during uart_add_one_port, a child device serial ctrl will added and probed with its pm runtime enabled(see serial_ctrl.c). The runtime suspend call path is: device_add |-> bus_probe_device |->device_initial_probe |->__device_attach |-> pm_runtime_get_sync(dev->parent); |-> pm_request_idle(dev); |-> pm_runtime_put(dev->parent); So in the end, before normal console ready, the lpuart get runtime suspended. And earlycon putchar will hang. To address the issue, mark last busy just after pm_runtime_enable, three seconds is long enough to switch from bootconsole to normal console. Fixes: 43543e6f539b ("tty: serial: fsl_lpuart: Add runtime pm support") Cc: stable Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20240808140325.580105-1-peng.fan@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e..77efa7ee6eda 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) From 7258fdd7d7459616b3fe1a603e33900584b10c13 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 10 Aug 2024 01:07:20 +0900 Subject: [PATCH 0823/1523] tty: vt: conmakehash: remove non-portable code printing comment header Commit 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") included , which invoked another (wrong) patch that tried to address a build error on macOS. According to the specification [1], the correct header to use PATH_MAX is . The minimal fix would be to replace with . However, the following commits seem questionable to me: - 3bd85c6c97b2 ("tty: vt: conmakehash: Don't mention the full path of the input in output") - 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") These commits made too many efforts to cope with a comment header in drivers/tty/vt/consolemap_deftbl.c: /* * Do not edit this file; it was automatically generated by * * conmakehash drivers/tty/vt/cp437.uni > [this file] * */ With this commit, the header part of the generate C file will be simplified as follows: /* * Automatically generated file; Do not edit. */ BTW, another series of excessive efforts for a comment header can be seen in the following: - 5ef6dc08cfde ("lib/build_OID_registry: don't mention the full path of the script in output") - 2fe29fe94563 ("lib/build_OID_registry: avoid non-destructive substitution for Perl < 5.13.2 compat") [1]: https://pubs.opengroup.org/onlinepubs/009695399/basedefs/limits.h.html Fixes: 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") Cc: stable Reported-by: Daniel Gomez Closes: https://lore.kernel.org/all/20240807-macos-build-support-v1-11-4cd1ded85694@samsung.com/ Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240809160853.1269466-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/conmakehash.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index 82d9db68b2ce..a931fcde7ad9 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -11,8 +11,6 @@ * Copyright (C) 1995-1997 H. Peter Anvin */ -#include -#include #include #include #include @@ -79,7 +77,6 @@ int main(int argc, char *argv[]) { FILE *ctbl; const char *tblname; - char base_tblname[PATH_MAX]; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -245,20 +242,15 @@ int main(int argc, char *argv[]) for ( i = 0 ; i < fontlen ; i++ ) nuni += unicount[i]; - strncpy(base_tblname, tblname, PATH_MAX); - base_tblname[PATH_MAX - 1] = 0; printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include \n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", basename(base_tblname), fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { From c9f6613b16123989f2c3bd04b1d9b2365d6914e7 Mon Sep 17 00:00:00 2001 From: Mathieu Othacehe Date: Thu, 8 Aug 2024 08:06:37 +0200 Subject: [PATCH 0824/1523] tty: atmel_serial: use the correct RTS flag. In RS485 mode, the RTS pin is driven high by hardware when the transmitter is operating. This behaviour cannot be changed. This means that the driver should claim that it supports SER_RS485_RTS_ON_SEND and not SER_RS485_RTS_AFTER_SEND. Otherwise, when configuring the port with the SER_RS485_RTS_ON_SEND, one get the following warning: kern.warning kernel: atmel_usart_serial atmel_usart_serial.2.auto: ttyS1 (1): invalid RTS setting, using RTS_AFTER_SEND instead which is contradictory with what's really happening. Signed-off-by: Mathieu Othacehe Cc: stable Tested-by: Alexander Dahl Fixes: af47c491e3c7 ("serial: atmel: Fill in rs485_supported") Link: https://lore.kernel.org/r/20240808060637.19886-1-othacehe@gnu.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d10..09b246c9e389 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; From 50680d1698f4d4c9651822398805cb943b7c04aa Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:19 +0300 Subject: [PATCH 0825/1523] drm/xe/tests: remove unused leftover xe_call_for_each_device() xe_call_for_each_device() has been unused since commit 57ecead343e7 ("drm/xe/tests: Convert xe_mocs live tests"). Remove it and the related dev_to_xe_device_fn() and struct kunit_test_data. Cc: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/fa3bb23d005313c9797f557e1211fde09fcb59cc.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/tests/xe_pci.c | 52 -------------------------- drivers/gpu/drm/xe/tests/xe_pci_test.h | 1 - 2 files changed, 53 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 577ee7d14381..67404863087e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,58 +12,6 @@ #include #include -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Number of devices iterated or - * the error code of a call to @xe_fn returning an error code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret ?: data.ndevs; -} - /** * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs * @xe_fn: Function to call for each device. diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3e2558bc3c90..ede46800aff1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); -int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); From ec0796e6446352e9e4ecb1804bf470d1ca47c5f9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:20 +0300 Subject: [PATCH 0826/1523] drm/xe: use pdev_to_xe_device() instead of pci_get_drvdata() directly We have a helper for converting pci device to xe device, use it. Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/1b87c2e56200e001ce3a5d2f4a93eb26b294df32.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c4a3c91377a..bc575bbee42d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -747,7 +747,7 @@ static void xe_pci_remove(struct pci_dev *pdev) { struct xe_device *xe; - xe = pci_get_drvdata(pdev); + xe = pdev_to_xe_device(pdev); if (!xe) /* driver load aborted, nothing to cleanup */ return; From d408d6f8cbbb5ad92b383f33d091f027f5740aea Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:21 +0300 Subject: [PATCH 0827/1523] drm/xe: add kdev_to_xe_device() helper and use it There are enough users for kernel device to xe device conversion, add a helper for it. Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/38c80846e70c7e410850530426384e17cff9d031.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_device.h | 5 +++++ drivers/gpu/drm/xe/xe_gsc_proxy.c | 9 ++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index db6cc8d0d6b8..2c96f1b2aafd 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -15,6 +15,11 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev) return container_of(dev, struct xe_device, drm); } +static inline struct xe_device *kdev_to_xe_device(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) { return pci_get_drvdata(pdev); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index aa812a2bc3ed..28e6a7a1d282 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } -static inline struct xe_device *kdev_to_xe(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) static int xe_gsc_proxy_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev, static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; From abfceba0a7a246ac082bf569807738ff7416f59f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Aug 2024 16:20:20 -0700 Subject: [PATCH 0828/1523] ARM: riscpc: ecard: Fix the build Fix a recently introduced build failure. Cc: Russell King Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240805232026.65087-2-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-rpc/ecard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c52..9f7454b8efa7 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); From cdd1fa91a6b8c7cd93b3abf9f3ef05b8ce741b61 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Aug 2024 16:20:21 -0700 Subject: [PATCH 0829/1523] mips: sgi-ip22: Fix the build Fix a recently introduced build failure. Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240805232026.65087-3-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/sgi-ip22/ip22-gio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd..d20eec742bfa 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); From 479ffee68d59c599f8aed8fa2dcc8e13e7bd13c3 Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Mon, 12 Aug 2024 12:45:41 +0200 Subject: [PATCH 0830/1523] wifi: mt76: mt7921: fix NULL pointer access in mt7921_ipv6_addr_change When disabling wifi mt7921_ipv6_addr_change() is called as a notifier. At this point mvif->phy is already NULL so we cannot use it here. Signed-off-by: Bert Karwatzki Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240812104542.80760-1-spasswolf@web.de --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 1bab93d049df..23b228804289 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1183,7 +1183,7 @@ static void mt7921_ipv6_addr_change(struct ieee80211_hw *hw, struct inet6_dev *idev) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - struct mt792x_dev *dev = mvif->phy->dev; + struct mt792x_dev *dev = mt792x_hw_dev(hw); struct inet6_ifaddr *ifa; struct in6_addr ns_addrs[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; struct sk_buff *skb; From 38c8d02501c09454e4fbf0f67de03de35e94d384 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 12 Aug 2024 13:06:40 +0200 Subject: [PATCH 0831/1523] wifi: iwlwifi: correctly lookup DMA address in SG table The code to lookup the scatter gather table entry assumed that it was possible to use sg_virt() in order to lookup the DMA address in a mapped scatter gather table. However, this assumption is incorrect as the DMA mapping code may merge multiple entries into one. In that case, the DMA address space may have e.g. two consecutive pages which is correctly represented by the scatter gather list entry, however the virtual addresses for these two pages may differ and the relationship cannot be resolved anymore. Avoid this problem entirely by working with the offset into the mapped area instead of using virtual addresses. With that we only use the DMA length and DMA address from the scatter gather list entries. The underlying DMA/IOMMU code is therefore free to merge two entries into one even if the virtual addresses space for the area is not continuous. Fixes: 90db50755228 ("wifi: iwlwifi: use already mapped data when TXing an AMSDU") Reported-by: Chris Bainbridge Closes: https://lore.kernel.org/r/ZrNRoEbdkxkKFMBi@debian.local Signed-off-by: Benjamin Berg Tested-by: Chris Bainbridge Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240812110640.460514-1-benjamin@sipsolutions.net --- .../wireless/intel/iwlwifi/pcie/internal.h | 3 +- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 5 ++- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 32 +++++++++++++------ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b59de4f80b4b..27a7e0b5b3d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -639,7 +639,8 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, bool cmd_queue); -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr); +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, u8 **hdr, unsigned int hdr_room); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 2e780fb2da42..b1846abb99b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -168,6 +168,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; dma_addr_t start_hdr_phys; u16 length, amsdu_pad; u8 *start_hdr; @@ -260,7 +261,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, int ret; tb_len = min_t(unsigned int, tso.size, data_left); - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, + tb_len); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) goto out_err; @@ -272,6 +274,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, goto out_err; data_left -= tb_len; + data_offset += tb_len; tso_build_data(skb, &tso, tb_len); } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 22d482ae53d9..9fe050f0ddc1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1814,23 +1814,31 @@ out: /** * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list * @sgt: scatter gather table - * @addr: Virtual address + * @offset: Offset into the mapped memory (i.e. SKB payload data) + * @len: Length of the area * - * Find the entry that includes the address for the given address and return - * correct physical address for the TB entry. + * Find the DMA address that corresponds to the SKB payload data at the + * position given by @offset. * * Returns: Address for TB entry */ -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr) +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len) { struct scatterlist *sg; + unsigned int sg_offset = 0; int i; + /* + * Search the mapped DMA areas in the SG for the area that contains the + * data at offset with the given length. + */ for_each_sgtable_dma_sg(sgt, sg, i) { - if (addr >= sg_virt(sg) && - (u8 *)addr < (u8 *)sg_virt(sg) + sg_dma_len(sg)) - return sg_dma_address(sg) + - ((unsigned long)addr - (unsigned long)sg_virt(sg)); + if (offset >= sg_offset && + offset + len <= sg_offset + sg_dma_len(sg)) + return sg_dma_address(sg) + offset - sg_offset; + + sg_offset += sg_dma_len(sg); } WARN_ON_ONCE(1); @@ -1875,7 +1883,9 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + /* Only map the data, not the header (it is copied to the TSO page) */ + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), + skb->data_len); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1900,6 +1910,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; u16 length, iv_len, amsdu_pad; dma_addr_t start_hdr_phys; u8 *start_hdr, *pos_hdr; @@ -2000,7 +2011,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, data_left); dma_addr_t tb_phys; - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, size); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) return -EINVAL; @@ -2011,6 +2022,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, tb_phys, size); data_left -= size; + data_offset += size; tso_build_data(skb, &tso, size); } } From 92b6c2f0076c50aaa919d16b595f34f3e9967bea Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Jun 2024 14:50:38 +0300 Subject: [PATCH 0832/1523] KVM: SVM: Fix uninitialized variable bug If snp_lookup_rmpentry() fails then "assigned" is printed in the error message but it was never initialized. Initialize it to false. Fixes: dee5a47cc7a4 ("KVM: SEV: Add KVM_SEV_SNP_LAUNCH_UPDATE command") Signed-off-by: Dan Carpenter Message-ID: <20240612115040.2423290-3-dan.carpenter@linaro.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 532df12b43c5..393f450adbc3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2276,7 +2276,7 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { struct sev_data_snp_launch_update fw_args = {0}; - bool assigned; + bool assigned = false; int level; ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); From cd2d00606553e631e9b5d11cca7da38fc95433e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Jun 2024 14:50:39 +0300 Subject: [PATCH 0833/1523] KVM: SVM: Fix an error code in sev_gmem_post_populate() The copy_from_user() function returns the number of bytes which it was not able to copy. Return -EFAULT instead. Fixes: dee5a47cc7a4 ("KVM: SEV: Add KVM_SEV_SNP_LAUNCH_UPDATE command") Signed-off-by: Dan Carpenter Message-ID: <20240612115040.2423290-4-dan.carpenter@linaro.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 393f450adbc3..714c517dd4b7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2290,9 +2290,10 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf if (src) { void *vaddr = kmap_local_pfn(pfn + i); - ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE); - if (ret) + if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { + ret = -EFAULT; goto err; + } kunmap_local(vaddr); } From 58a63729c957621f1990c3494c702711188ca347 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 9 Aug 2024 08:58:58 -0700 Subject: [PATCH 0834/1523] net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings After napi_complete_done() is called when NAPI is polling in the current process context, another NAPI may be scheduled and start running in softirq on another CPU and may ring the doorbell before the current CPU does. When combined with unnecessary rings when there is no need to arm the CQ, it triggers error paths in the hardware. This patch fixes this by calling napi_complete_done() after doorbell rings. It limits the number of unnecessary rings when there is no need to arm. MANA hardware specifies that there must be one doorbell ring every 8 CQ wraparounds. This driver guarantees one doorbell ring as soon as the number of consumed CQEs exceeds 4 CQ wraparounds. In practical workloads, the 4 CQ wraparounds proves to be big enough that it rarely exceeds this limit before all the napi weight is consumed. To implement this, add a per-CQ counter cq->work_done_since_doorbell, and make sure the CQ is armed as soon as passing 4 wraparounds of the CQ. Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Reviewed-by: Haiyang Zhang Signed-off-by: Long Li Link: https://patch.msgid.link/1723219138-29887-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 22 ++++++++++++------- include/net/mana/mana.h | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ae717d06e66f..39f56973746d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1792,7 +1792,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1803,16 +1802,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; + cq->work_done_since_doorbell += w; - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 6439fd8b437b..7caa334f4888 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -275,6 +275,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; From 12d82c7b0a612372f594e4ff00983a1da3a1d929 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 13 Aug 2024 12:07:50 +0100 Subject: [PATCH 0835/1523] ALSA: hda: cs35l56: Remove redundant call to hda_cs_dsp_control_remove() The driver doesn't create any ALSA controls for firmware controls, so it shouldn't be calling hda_cs_dsp_control_remove(). commit 34e1b1bb7324 ("ALSA: hda: cs35l56: Stop creating ALSA controls for firmware coefficients") removed the call to hda_cs_dsp_add_controls() but didn't remove the call for destroying those controls. Signed-off-by: Richard Fitzgerald Fixes: 34e1b1bb7324 ("ALSA: hda: cs35l56: Stop creating ALSA controls for firmware coefficients") Link: https://patch.msgid.link/20240813110750.2814-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 31cc92bac89a..a9dfd62637cf 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -413,7 +413,7 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) } static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, From 42fac187b5c746227c92d024f1caf33bc1d337e4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Apr 2024 16:41:20 -0400 Subject: [PATCH 0836/1523] btrfs: check delayed refs when we're checking if a ref exists In the patch 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete resume") I added some code to handle file systems that had been corrupted by a bug that incorrectly skipped updating the drop progress key while dropping a snapshot. This code would check to see if we had already deleted our reference for a child block, and skip the deletion if we had already. Unfortunately there is a bug, as the check would only check the on-disk references. I made an incorrect assumption that blocks in an already deleted snapshot that was having the deletion resume on mount wouldn't be modified. If we have 2 pending deleted snapshots that share blocks, we can easily modify the rules for a block. Take the following example subvolume a exists, and subvolume b is a snapshot of subvolume a. They share references to block 1. Block 1 will have 2 full references, one for subvolume a and one for subvolume b, and it belongs to subvolume a (btrfs_header_owner(block 1) == subvolume a). When deleting subvolume a, we will drop our full reference for block 1, and because we are the owner we will drop our full reference for all of block 1's children, convert block 1 to FULL BACKREF, and add a shared reference to all of block 1's children. Then we will start the snapshot deletion of subvolume b. We look up the extent info for block 1, which checks delayed refs and tells us that FULL BACKREF is set, so sets parent to the bytenr of block 1. However because this is a resumed snapshot deletion, we call into check_ref_exists(). Because check_ref_exists() only looks at the disk, it doesn't find the shared backref for the child of block 1, and thus returns 0 and we skip deleting the reference for the child of block 1 and continue. This orphans the child of block 1. The fix is to lookup the delayed refs, similar to what we do in btrfs_lookup_extent_info(). However we only care about whether the reference exists or not. If we fail to find our reference on disk, go look up the bytenr in the delayed refs, and if it exists look for an existing ref in the delayed ref head. If that exists then we know we can delete the reference safely and carry on. If it doesn't exist we know we have to skip over this block. This bug has existed since I introduced this fix, however requires having multiple deleted snapshots pending when we unmount. We noticed this in production because our shutdown path stops the container on the system, which deletes a bunch of subvolumes, and then reboots the box. This gives us plenty of opportunities to hit this issue. Looking at the history we've seen this occasionally in production, but we had a big spike recently thanks to faster machines getting jobs with multiple subvolumes in the job. Chris Mason wrote a reproducer which does the following mount /dev/nvme4n1 /btrfs btrfs subvol create /btrfs/s1 simoop -E -f 4k -n 200000 -z /btrfs/s1 while(true) ; do btrfs subvol snap /btrfs/s1 /btrfs/s2 simoop -f 4k -n 200000 -r 10 -z /btrfs/s2 btrfs subvol snap /btrfs/s2 /btrfs/s3 btrfs balance start -dusage=80 /btrfs btrfs subvol del /btrfs/s2 /btrfs/s3 umount /btrfs btrfsck /dev/nvme4n1 || exit 1 mount /dev/nvme4n1 /btrfs done On the second loop this would fail consistently, with my patch it has been running for hours and hasn't failed. I also used dm-log-writes to capture the state of the failure so I could debug the problem. Using the existing failure case to test my patch validated that it fixes the problem. Fixes: 78c52d9eb6b7 ("btrfs: check for refs on snapshot delete resume") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 67 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/delayed-ref.h | 2 ++ fs/btrfs/extent-tree.c | 51 ++++++++++++++++++++++++++++---- 3 files changed, 114 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2ac9296edccb..06a9e0542d70 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ef15e998be03..05f634eb472d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ff9f0d41987e..feec49e6f9c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5472,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 parent, int level) { + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *head; struct btrfs_path *path; struct btrfs_extent_inline_ref *iref; int ret; + bool exists = false; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: ret = lookup_extent_backref(trans, path, &iref, bytenr, root->fs_info->nodesize, parent, btrfs_root_id(root), level, 0); + if (ret != -ENOENT) { + /* + * If we get 0 then we found our reference, return 1, else + * return the error if it's not -ENOENT; + */ + btrfs_free_path(path); + return (ret < 0 ) ? ret : 1; + } + + /* + * We could have a delayed ref with this reference, so look it up while + * we're holding the path open to make sure we don't race with the + * delayed ref running. + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + if (!head) + goto out; + if (!mutex_trylock(&head->mutex)) { + /* + * We're contended, means that the delayed ref is running, get a + * reference and wait for the ref head to be complete and then + * try again. + */ + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref_head(head); + goto again; + } + + exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + mutex_unlock(&head->mutex); +out: + spin_unlock(&delayed_refs->lock); btrfs_free_path(path); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - return 1; + return exists ? 1 : 0; } /* From 31723c9542dba1681cc3720571fdf12ffe0eddd9 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 12 Aug 2024 08:52:44 +0930 Subject: [PATCH 0837/1523] btrfs: tree-checker: reject BTRFS_FT_UNKNOWN dir type [REPORT] There is a bug report that kernel is rejecting a mismatching inode mode and its dir item: [ 1881.553937] BTRFS critical (device dm-0): inode mode mismatch with dir: inode mode=040700 btrfs type=2 dir type=0 [CAUSE] It looks like the inode mode is correct, while the dir item type 0 is BTRFS_FT_UNKNOWN, which should not be generated by btrfs at all. This may be caused by a memory bit flip. [ENHANCEMENT] Although tree-checker is not able to do any cross-leaf verification, for this particular case we can at least reject any dir type with BTRFS_FT_UNKNOWN. So here we enhance the dir type check from [0, BTRFS_FT_MAX), to (0, BTRFS_FT_MAX). Although the existing corruption can not be fixed just by such enhanced checking, it should prevent the same 0x2->0x0 bitflip for dir type to reach disk in the future. Reported-by: Kota Link: https://lore.kernel.org/linux-btrfs/CACsxjPYnQF9ZF-0OhH16dAx50=BXXOcP74MxBc3BG+xae4vTTw@mail.gmail.com/ CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index a825fa598e3c..6f1e2f2215d9 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -569,9 +569,10 @@ static int check_dir_item(struct extent_buffer *leaf, /* dir type check */ dir_type = btrfs_dir_ftype(leaf, di); - if (unlikely(dir_type >= BTRFS_FT_MAX)) { + if (unlikely(dir_type <= BTRFS_FT_UNKNOWN || + dir_type >= BTRFS_FT_MAX)) { dir_item_err(leaf, slot, - "invalid dir item type, have %u expect [0, %u)", + "invalid dir item type, have %u expect (0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; } From 8475a1d9bb7acf1cb15842dd24baab0e8ea4e4ff Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 13 Aug 2024 12:32:09 +0100 Subject: [PATCH 0838/1523] ALSA: hda: cs35l41: Remove redundant call to hda_cs_dsp_control_remove() The driver doesn't create any ALSA controls for firmware controls, so it shouldn't be calling hda_cs_dsp_control_remove(). commit 312c04cee408 ("ALSA: hda: cs35l41: Stop creating ALSA Controls for firmware coefficients") removed the call to hda_cs_dsp_add_controls() but didn't remove the call for destroying those controls. Signed-off-by: Richard Fitzgerald Fixes: 312c04cee408 ("ALSA: hda: cs35l41: Stop creating ALSA Controls for firmware coefficients") Link: https://patch.msgid.link/20240813113209.648-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 3a92e98da72d..d68bf7591d90 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -134,7 +134,7 @@ static const struct reg_sequence cs35l41_hda_mute[] = { }; static const struct cs_dsp_client_ops client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l41_request_tuning_param_file(struct cs35l41_hda *cs35l41, char *tuning_filename, From ae1e766f623f7a2a889a0b09eb076dd9a60efbe9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 11 Aug 2024 11:53:42 +0100 Subject: [PATCH 0839/1523] btrfs: only run the extent map shrinker from kswapd tasks Currently the extent map shrinker can be run by any task when attempting to allocate memory and there's enough memory pressure to trigger it. To avoid too much latency we stop iterating over extent maps and removing them once the task needs to reschedule. This logic was introduced in commit b3ebb9b7e92a ("btrfs: stop extent map shrinker if reschedule is needed"). While that solved high latency problems for some use cases, it's still not enough because with a too high number of tasks entering the extent map shrinker code, either due to memory allocations or because they are a kswapd task, we end up having a very high level of contention on some spin locks, namely: 1) The fs_info->fs_roots_radix_lock spin lock, which we need to find roots to iterate over their inodes; 2) The spin lock of the xarray used to track open inodes for a root (struct btrfs_root::inodes) - on 6.10 kernels and below, it used to be a red black tree and the spin lock was root->inode_lock; 3) The fs_info->delayed_iput_lock spin lock since the shrinker adds delayed iputs (calls btrfs_add_delayed_iput()). Instead of allowing the extent map shrinker to be run by any task, make it run only by kswapd tasks. This still solves the problem of running into OOM situations due to an unbounded extent map creation, which is simple to trigger by direct IO writes, as described in the changelog of commit 956a17d9d050 ("btrfs: add a shrinker for extent maps"), and by a similar case when doing buffered IO on files with a very large number of holes (keeping the file open and creating many holes, whose extent maps are only released when the file is closed). Reported-by: kzd Link: https://bugzilla.kernel.org/show_bug.cgi?id=219121 Reported-by: Octavia Togami Link: https://lore.kernel.org/linux-btrfs/CAHPNGSSt-a4ZZWrtJdVyYnJFscFjP9S7rMcvEMaNSpR556DdLA@mail.gmail.com/ Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps") CC: stable@vger.kernel.org # 6.10+ Tested-by: kzd Tested-by: Octavia Togami Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 22 ++++++---------------- fs/btrfs/super.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23b65dc73c00..10ac5f657e38 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c return 0; /* - * We want to be fast because we can be called from any path trying to - * allocate memory, so if the lock is busy we don't want to spend time + * We want to be fast so if the lock is busy we don't want to spend time * waiting for it - either some task is about to do IO for the inode or * we may have another task shrinking extent maps, here in this code, so * skip this inode. @@ -1191,9 +1190,7 @@ next: /* * Stop if we need to reschedule or there's contention on the * lock. This is to avoid slowing other tasks trying to take the - * lock and because the shrinker might be called during a memory - * allocation path and we want to avoid taking a very long time - * and slowing down all sorts of tasks. + * lock. */ if (need_resched() || rwlock_needbreak(&tree->lock)) break; @@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx if (ctx->scanned >= ctx->nr_to_scan) break; - /* - * We may be called from memory allocation paths, so we don't - * want to take too much time and slowdown tasks. - */ - if (need_resched()) - break; + cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) ctx.last_ino); } - /* - * We may be called from memory allocation paths, so we don't want to - * take too much time and slowdown tasks, so stop if we need reschedule. - */ - while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { + while (ctx.scanned < ctx.nr_to_scan) { struct btrfs_root *root; unsigned long count; + cond_resched(); + spin_lock(&fs_info->fs_roots_radix_lock); count = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)&root, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 83478deada3b..11044e9e2cb1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "messages.h" #include "delayed-inode.h" #include "ctree.h" @@ -2409,6 +2410,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); + /* + * We may be called from any task trying to allocate memory and we don't + * want to slow it down with scanning and dropping extent maps. It would + * also cause heavy lock contention if many tasks concurrently enter + * here. Therefore only allow kswapd tasks to scan and drop extent maps. + */ + if (!current_is_kswapd()) + return 0; + return btrfs_free_extent_maps(fs_info, nr_to_scan); } From 779bac9994452f6a894524f70c00cfb0cd4b6364 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Aug 2024 15:08:04 +0200 Subject: [PATCH 0840/1523] Revert "ACPI: EC: Evaluate orphan _REG under EC device" This reverts commit 0e6b6dedf168 ("Revert "ACPI: EC: Evaluate orphan _REG under EC device") because the problem addressed by it will be addressed differently in what follows. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Cc: All applicable Link: https://patch.msgid.link/3236716.5fSG56mABF@rjwysocki.net --- drivers/acpi/acpica/acevents.h | 4 --- drivers/acpi/acpica/evregion.c | 6 +++- drivers/acpi/acpica/evxfregn.c | 54 ---------------------------------- drivers/acpi/ec.c | 3 -- include/acpi/acpixf.h | 4 --- 5 files changed, 5 insertions(+), 66 deletions(-) diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 2133085deda7..ddd072cbc738 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -191,10 +191,6 @@ void acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, acpi_adr_space_type space_id, u32 function); -void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, - acpi_adr_space_type space_id); - acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index dc6004daf624..18fdf2bc2d49 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,6 +20,10 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ +static void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, + acpi_adr_space_type space_id); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -814,7 +818,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -void +static void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 624361a5f34d..3197e6303c5b 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -306,57 +306,3 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) - -/******************************************************************************* - * - * FUNCTION: acpi_execute_orphan_reg_method - * - * PARAMETERS: device - Handle for the device - * space_id - The address space ID - * - * RETURN: Status - * - * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI - * device. This is a _REG method that has no corresponding region - * within the device's scope. - * - ******************************************************************************/ -acpi_status -acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) -{ - struct acpi_namespace_node *node; - acpi_status status; - - ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); - - /* Parameter validation */ - - if (!device) { - return_ACPI_STATUS(AE_BAD_PARAMETER); - } - - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Convert and validate the device handle */ - - node = acpi_ns_validate_handle(device); - if (node) { - - /* - * If an "orphan" _REG method is present in the device's scope - * for the given address space ID, run it. - */ - - acpi_ev_execute_orphan_reg_method(node, space_id); - } else { - status = AE_BAD_PARAMETER; - } - - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - return_ACPI_STATUS(status); -} - -ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 299ec653388c..68dd17f96f63 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1507,9 +1507,6 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); - if (scope_handle != ec->handle) - acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); - set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d527..94d0fc3bd412 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -662,10 +662,6 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, From 5d61841c74db8b5bbbf9403f1bd4879f614617d2 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 9 Aug 2024 16:15:39 -0400 Subject: [PATCH 0841/1523] spi: zynqmp-gqspi: Scale timeout by data size Large blocks of data time out when reading because we don't wait long enough for the transfer to complete. Scale our timeouts based on the amount of data we are tranferring, with a healthy dose of pessimism. Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20240809201540.3363243-1-sean.anderson@linux.dev Signed-off-by: Mark Brown --- drivers/spi/spi-zynqmp-gqspi.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 99524a3c9f38..558c466135a5 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1033,6 +1033,18 @@ static int __maybe_unused zynqmp_runtime_resume(struct device *dev) return 0; } +static unsigned long zynqmp_qspi_timeout(struct zynqmp_qspi *xqspi, u8 bits, + unsigned long bytes) +{ + unsigned long timeout; + + /* Assume we are at most 2x slower than the nominal bus speed */ + timeout = mult_frac(bytes, 2 * 8 * MSEC_PER_SEC, + bits * xqspi->speed_hz); + /* And add 100 ms for scheduling delays */ + return msecs_to_jiffies(timeout + 100); +} + /** * zynqmp_qspi_exec_op() - Initiates the QSPI transfer * @mem: The SPI memory @@ -1049,6 +1061,7 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, { struct zynqmp_qspi *xqspi = spi_controller_get_devdata (mem->spi->controller); + unsigned long timeout; int err = 0, i; u32 genfifoentry = 0; u16 opcode = op->cmd.opcode; @@ -1077,8 +1090,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST, GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->cmd.buswidth, + op->cmd.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1104,8 +1119,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_TXEMPTY_MASK | GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->addr.buswidth, + op->addr.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1173,8 +1190,9 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_RXEMPTY_MASK); } } - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) + timeout = zynqmp_qspi_timeout(xqspi, op->data.buswidth, + op->data.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, timeout)) err = -ETIMEDOUT; } From cdf65d73e001fde600b18d7e45afadf559425ce5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Aug 2024 15:11:42 +0200 Subject: [PATCH 0842/1523] ACPICA: Add a depth argument to acpi_execute_reg_methods() A subsequent change will need to pass a depth argument to acpi_execute_reg_methods(), so prepare that function for it. No intentional functional changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Cc: All applicable Link: https://patch.msgid.link/8451567.NyiUUSuA9g@rjwysocki.net --- drivers/acpi/acpica/acevents.h | 2 +- drivers/acpi/acpica/evregion.c | 6 ++++-- drivers/acpi/acpica/evxfregn.c | 10 +++++++--- drivers/acpi/ec.c | 2 +- include/acpi/acpixf.h | 1 + 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index ddd072cbc738..1c5218b79fc2 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -188,7 +188,7 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, u8 acpi_ns_is_locked); void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function); acpi_status diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 18fdf2bc2d49..cf53b9535f18 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -65,6 +65,7 @@ acpi_status acpi_ev_initialize_op_regions(void) acpi_gbl_default_address_spaces [i])) { acpi_ev_execute_reg_methods(acpi_gbl_root_node, + ACPI_UINT32_MAX, acpi_gbl_default_address_spaces [i], ACPI_REG_CONNECT); } @@ -672,6 +673,7 @@ cleanup1: * FUNCTION: acpi_ev_execute_reg_methods * * PARAMETERS: node - Namespace node for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * function - Passed to _REG: On (1) or Off (0) * @@ -683,7 +685,7 @@ cleanup1: ******************************************************************************/ void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function) { struct acpi_reg_walk_info info; @@ -717,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, * regions and _REG methods. (i.e. handlers must be installed for all * regions of this Space ID before we can run any _REG methods) */ - (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, + (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &info, NULL); diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 3197e6303c5b..95f78383bbdb 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -85,7 +85,8 @@ acpi_install_address_space_handler_internal(acpi_handle device, /* Run all _REG methods for this address space */ if (run_reg) { - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id, + ACPI_REG_CONNECT); } unlock_and_exit: @@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * FUNCTION: acpi_execute_reg_methods * * PARAMETERS: device - Handle for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * * RETURN: Status @@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * ******************************************************************************/ acpi_status -acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +acpi_execute_reg_methods(acpi_handle device, u32 max_depth, + acpi_adr_space_type space_id) { struct acpi_namespace_node *node; acpi_status status; @@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, max_depth, space_id, + ACPI_REG_CONNECT); } else { status = AE_BAD_PARAMETER; } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 68dd17f96f63..d9c12db80f11 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1506,7 +1506,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); + acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 94d0fc3bd412..9f1c1d225e32 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,6 +660,7 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status From 71bf41b8e913ec9fc91f0d39ab8fb320229ec604 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Aug 2024 15:16:21 +0200 Subject: [PATCH 0843/1523] ACPI: EC: Evaluate _REG outside the EC scope more carefully Commit 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root") caused _REG methods for EC operation regions outside the EC device scope to be evaluated which on some systems leads to the evaluation of _REG methods in the scopes of device objects representing devices that are not present and not functional according to the _STA return values. Some of those device objects represent EC "alternatives" and if _REG is evaluated for their operation regions, the platform firmware may be confused and the platform may start to behave incorrectly. To avoid this problem, only evaluate _REG for EC operation regions located in the scopes of device objects representing known-to-be-present devices. For this purpose, partially revert commit 60fa6ae6e6d0 and trigger the evaluation of _REG for EC operation regions from acpi_bus_attach() for the known-valid devices. Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root") Link: https://lore.kernel.org/linux-acpi/1f76b7e2-1928-4598-8037-28a1785c2d13@redhat.com Link: https://bugzilla.redhat.com/show_bug.cgi?id=2298938 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2302253 Reported-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Cc: All applicable Link: https://patch.msgid.link/23612351.6Emhk5qWAg@rjwysocki.net --- drivers/acpi/ec.c | 11 +++++++++-- drivers/acpi/internal.h | 1 + drivers/acpi/scan.c | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index d9c12db80f11..38d2f6e6b12b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { - acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; acpi_status status; acpi_ec_start(ec, false); if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { + acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; + acpi_ec_enter_noirq(ec); status = acpi_install_address_space_handler_no_reg(scope_handle, ACPI_ADR_SPACE_EC, @@ -1506,7 +1507,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); + acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } @@ -1721,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_device *device) } } +void acpi_ec_register_opregions(struct acpi_device *adev) +{ + if (first_ec && first_ec->handle != adev->handle) + acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC); +} + static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 601b670356e5..aadd4c218b32 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); +void acpi_ec_register_opregions(struct acpi_device *adev); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 59771412686b..22ae7829a915 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2273,6 +2273,8 @@ static int acpi_bus_attach(struct acpi_device *device, void *first_pass) if (device->handler) goto ok; + acpi_ec_register_opregions(device); + if (!device->flags.initialized) { device->flags.power_manageable = device->power.states[ACPI_STATE_D0].flags.valid; From 46a6e10a1ab16cc71d4a3cab73e79aabadd6b8ea Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 12 Aug 2024 14:18:06 +0100 Subject: [PATCH 0844/1523] btrfs: send: allow cloning non-aligned extent if it ends at i_size If we a find that an extent is shared but its end offset is not sector size aligned, then we don't clone it and issue write operations instead. This is because the reflink (remap_file_range) operation does not allow to clone unaligned ranges, except if the end offset of the range matches the i_size of the source and destination files (and the start offset is sector size aligned). While this is not incorrect because send can only guarantee that a file has the same data in the source and destination snapshots, it's not optimal and generates confusion and surprising behaviour for users. For example, running this test: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV mount $DEV $MNT # Use a file size not aligned to any possible sector size. file_size=$((1 * 1024 * 1024 + 5)) # 1MB + 5 bytes dd if=/dev/random of=$MNT/foo bs=$file_size count=1 cp --reflink=always $MNT/foo $MNT/bar btrfs subvolume snapshot -r $MNT/ $MNT/snap rm -f /tmp/send-test btrfs send -f /tmp/send-test $MNT/snap umount $MNT mkfs.btrfs -f $DEV mount $DEV $MNT btrfs receive -vv -f /tmp/send-test $MNT xfs_io -r -c "fiemap -v" $MNT/snap/bar umount $MNT Gives the following result: (...) mkfile o258-7-0 rename o258-7-0 -> bar write bar - offset=0 length=49152 write bar - offset=49152 length=49152 write bar - offset=98304 length=49152 write bar - offset=147456 length=49152 write bar - offset=196608 length=49152 write bar - offset=245760 length=49152 write bar - offset=294912 length=49152 write bar - offset=344064 length=49152 write bar - offset=393216 length=49152 write bar - offset=442368 length=49152 write bar - offset=491520 length=49152 write bar - offset=540672 length=49152 write bar - offset=589824 length=49152 write bar - offset=638976 length=49152 write bar - offset=688128 length=49152 write bar - offset=737280 length=49152 write bar - offset=786432 length=49152 write bar - offset=835584 length=49152 write bar - offset=884736 length=49152 write bar - offset=933888 length=49152 write bar - offset=983040 length=49152 write bar - offset=1032192 length=16389 chown bar - uid=0, gid=0 chmod bar - mode=0644 utimes bar utimes BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=06d640da-9ca1-604c-b87c-3375175a8eb3, stransid=7 /mnt/sdi/snap/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2055]: 26624..28679 2056 0x1 There's no clone operation to clone extents from the file foo into file bar and fiemap confirms there's no shared flag (0x2000). So update send_write_or_clone() so that it proceeds with cloning if the source and destination ranges end at the i_size of the respective files. After this changes the result of the test is: (...) mkfile o258-7-0 rename o258-7-0 -> bar clone bar - source=foo source offset=0 offset=0 length=1048581 chown bar - uid=0, gid=0 chmod bar - mode=0644 utimes bar utimes BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=582420f3-ea7d-564e-bbe5-ce440d622190, stransid=7 /mnt/sdi/snap/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2055]: 26624..28679 2056 0x2001 A test case for fstests will also follow up soon. Link: https://github.com/kdave/btrfs-progs/issues/572#issuecomment-2282841416 CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 52 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ca711a773ef..7fc692fc76e1 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 offset = key->offset; u64 end; u64 bs = sctx->send_root->fs_info->sectorsize; + struct btrfs_file_extent_item *ei; + u64 disk_byte; + u64 data_offset; + u64 num_bytes; + struct btrfs_inode_info info = { 0 }; end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); if (offset >= end) return 0; - if (clone_root && IS_ALIGNED(end, bs)) { - struct btrfs_file_extent_item *ei; - u64 disk_byte; - u64 data_offset; + num_bytes = end - offset; - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); - data_offset = btrfs_file_extent_offset(path->nodes[0], ei); - ret = clone_range(sctx, path, clone_root, disk_byte, - data_offset, offset, end - offset); - } else { - ret = send_extent_data(sctx, path, offset, end - offset); - } + if (!clone_root) + goto write_data; + + if (IS_ALIGNED(end, bs)) + goto clone_data; + + /* + * If the extent end is not aligned, we can clone if the extent ends at + * the i_size of the inode and the clone range ends at the i_size of the + * source inode, otherwise the clone operation fails with -EINVAL. + */ + if (end != sctx->cur_inode_size) + goto write_data; + + ret = get_inode_info(clone_root->root, clone_root->ino, &info); + if (ret < 0) + return ret; + + if (clone_root->offset + num_bytes == info.size) + goto clone_data; + +write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); + sctx->cur_inode_next_write_offset = end; + return ret; + +clone_data: + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); + data_offset = btrfs_file_extent_offset(path->nodes[0], ei); + ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset, + num_bytes); sctx->cur_inode_next_write_offset = end; return ret; } From 1e1fd567d32fcf7544c6e09e0e5bc6c650da6e23 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 13 Aug 2024 12:38:51 +0200 Subject: [PATCH 0845/1523] dm suspend: return -ERESTARTSYS instead of -EINTR This commit changes device mapper, so that it returns -ERESTARTSYS instead of -EINTR when it is interrupted by a signal (so that the ioctl can be restarted). The manpage signal(7) says that the ioctl function should be restarted if the signal was handled with SA_RESTART. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97fab2087df8..87bb90303435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2737,7 +2737,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } @@ -2762,7 +2762,7 @@ static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_st break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } From 7a636b4f03af9d541205f69e373672e7b2b60a8a Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Tue, 13 Aug 2024 12:39:52 +0200 Subject: [PATCH 0846/1523] dm resume: don't return EINVAL when signalled If the dm_resume method is called on a device that is not suspended, the method will suspend the device briefly, before resuming it (so that the table will be swapped). However, there was a bug that the return value of dm_suspended_md was not checked. dm_suspended_md may return an error when it is interrupted by a signal. In this case, do_resume would call dm_swap_table, which would return -EINVAL. This commit fixes the logic, so that error returned by dm_suspend is checked and the resume operation is undone. Signed-off-by: Mikulas Patocka Signed-off-by: Khazhismel Kumykov Cc: stable@vger.kernel.org --- drivers/md/dm-ioctl.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c2c07bfa6471..f299ff393a6a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *param) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; - if (!dm_suspended_md(md)) - dm_suspend(md, suspend_flags); + if (!dm_suspended_md(md)) { + r = dm_suspend(md, suspend_flags); + if (r) { + down_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (hc && !hc->new_map) { + hc->new_map = new_map; + new_map = NULL; + } else { + r = -ENXIO; + } + up_write(&_hash_lock); + if (new_map) { + dm_sync_table(md); + dm_table_destroy(new_map); + } + dm_put(md); + return r; + } + } old_size = dm_get_size(md); old_map = dm_swap_table(md, new_map); From 2a0629834cd82f05d424bbc193374f9a43d1f87d Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 9 Aug 2024 11:16:28 +0800 Subject: [PATCH 0847/1523] vfs: Don't evict inode under the inode lru traversing context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inode reclaiming process(See function prune_icache_sb) collects all reclaimable inodes and mark them with I_FREEING flag at first, at that time, other processes will be stuck if they try getting these inodes (See function find_inode_fast), then the reclaiming process destroy the inodes by function dispose_list(). Some filesystems(eg. ext4 with ea_inode feature, ubifs with xattr) may do inode lookup in the inode evicting callback function, if the inode lookup is operated under the inode lru traversing context, deadlock problems may happen. Case 1: In function ext4_evict_inode(), the ea inode lookup could happen if ea_inode feature is enabled, the lookup process will be stuck under the evicting context like this: 1. File A has inode i_reg and an ea inode i_ea 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea 3. Then, following three processes running like this: PA PB echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // i_reg is added into lru, lru->i_ea->i_reg prune_icache_sb list_lru_walk_one inode_lru_isolate i_ea->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(i_reg) spin_unlock(&i_reg->i_lock) spin_unlock(lru_lock) rm file A i_reg->nlink = 0 iput(i_reg) // i_reg->nlink is 0, do evict ext4_evict_inode ext4_xattr_delete_inode ext4_xattr_inode_dec_ref_all ext4_xattr_inode_iget ext4_iget(i_ea->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(i_ea) ----→ AA deadlock dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&i_ea->i_state) Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file deleting process holds BASEHD's wbuf->io_mutex while getting the xattr inode, which could race with inode reclaiming process(The reclaiming process could try locking BASEHD's wbuf->io_mutex in inode evicting function), then an ABBA deadlock problem would happen as following: 1. File A has inode ia and a xattr(with inode ixa), regular file B has inode ib and a xattr. 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa 3. Then, following three processes running like this: PA PB PC echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // ib and ia are added into lru, lru->ixa->ib->ia prune_icache_sb list_lru_walk_one inode_lru_isolate ixa->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(ib) spin_unlock(&ib->i_lock) spin_unlock(lru_lock) rm file B ib->nlink = 0 rm file A iput(ia) ubifs_evict_inode(ia) ubifs_jnl_delete_inode(ia) ubifs_jnl_write_inode(ia) make_reservation(BASEHD) // Lock wbuf->io_mutex ubifs_iget(ixa->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(ixa) | iput(ib) // ib->nlink is 0, do evict | ubifs_evict_inode | ubifs_jnl_delete_inode(ib) ↓ ubifs_jnl_write_inode ABBA deadlock ←-----make_reservation(BASEHD) dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&ixa->i_state) Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING to pin the inode in memory while inode_lru_isolate() reclaims its pages instead of using ordinary inode reference. This way inode deletion cannot be triggered from inode_lru_isolate() thus avoiding the deadlock. evict() is made to wait for I_LRU_ISOLATING to be cleared before proceeding with inode cleanup. Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022 Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files") Cc: stable@vger.kernel.org Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com Reviewed-by: Jan Kara Suggested-by: Jan Kara Suggested-by: Mateusz Guzik Signed-off-by: Christian Brauner --- fs/inode.c | 39 +++++++++++++++++++++++++++++++++++++-- include/linux/fs.h | 5 +++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 86670941884b..10c4619faeef 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode) this_cpu_dec(nr_unused); } +static void inode_pin_lru_isolating(struct inode *inode) +{ + lockdep_assert_held(&inode->i_lock); + WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode->i_state |= I_LRU_ISOLATING; +} + +static void inode_unpin_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); + inode->i_state &= ~I_LRU_ISOLATING; + smp_mb(); + wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); +} + +static void inode_wait_for_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_state & I_LRU_ISOLATING) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); + __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_LRU_ISOLATING); + } + spin_unlock(&inode->i_lock); +} + /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add @@ -657,6 +690,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_wait_for_lru_isolating(inode); + /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since @@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * be under pressure before the cache inside the highmem zone. */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { - __iget(inode); + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); if (remove_inode_buffers(inode)) { @@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, __count_vm_events(PGINODESTEAL, reap); mm_account_reclaimed_pages(reap); } - iput(inode); + inode_unpin_lru_isolating(inode); spin_lock(lru_lock); return LRU_RETRY; } diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..fb0426f349fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) From e3786b29c54cdae3490b07180a54e2461f42144c Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 8 Aug 2024 14:29:38 +0100 Subject: [PATCH 0848/1523] 9p: Fix DIO read through netfs If a program is watching a file on a 9p mount, it won't see any change in size if the file being exported by the server is changed directly in the source filesystem, presumably because 9p doesn't have change notifications, and because netfs skips the reads if the file is empty. Fix this by attempting to read the full size specified when a DIO read is requested (such as when 9p is operating in unbuffered mode) and dealing with a short read if the EOF was less than the expected read. To make this work, filesystems using netfslib must not set NETFS_SREQ_CLEAR_TAIL if performing a DIO read where that read hit the EOF. I don't want to mandatorily clear this flag in netfslib for DIO because, say, ceph might make a read from an object that is not completely filled, but does not reside at the end of file - and so we need to clear the excess. This can be tested by watching an empty file over 9p within a VM (such as in the ktest framework): while true; do read content; if [ -n "$content" ]; then echo $content; break; fi; done < /host/tmp/foo then writing something into the empty file. The watcher should immediately display the file content and break out of the loop. Without this fix, it remains in the loop indefinitely. Fixes: 80105ed2fd27 ("9p: Use netfslib read/write_iter") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218916 Signed-off-by: David Howells Link: https://lore.kernel.org/r/1229195.1723211769@warthog.procyon.org.uk cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Christian Schoenebeck cc: Marc Dionne cc: Ilya Dryomov cc: Steve French cc: Paulo Alcantara cc: Trond Myklebust cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Christian Brauner --- fs/9p/vfs_addr.c | 3 ++- fs/afs/file.c | 3 ++- fs/ceph/addr.c | 6 ++++-- fs/netfs/io.c | 17 +++++++++++------ fs/nfs/fscache.c | 3 ++- fs/smb/client/file.c | 3 ++- 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a97ceb105cd8..24fdc74caeba 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, err ?: total, false); } diff --git a/fs/afs/file.c b/fs/afs/file.c index c3f0c45ae9a9..ec1be0091fdb 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op) req->error = error; if (subreq) { - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, error ?: req->actual_len, false); req->subreq = NULL; } else if (req->done) { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index cc0a2240de98..c4744a02db75 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (err >= 0) { if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); - if (err < subreq->len) + if (err < subreq->len && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { err = ceph_fscrypt_decrypt_extents(inode, @@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); if (subreq->start >= inode->i_size) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c179a1c73fa7..5367caf3fa28 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -530,7 +530,8 @@ incomplete: if (transferred_or_error == 0) { if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { - subreq->error = -ENODATA; + if (rreq->origin != NETFS_DIO_READ) + subreq->error = -ENODATA; goto failed; } } else { @@ -601,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, } if (subreq->len > ictx->zero_point - subreq->start) subreq->len = ictx->zero_point - subreq->start; + + /* We limit buffered reads to the EOF, but let the + * server deal with larger-than-EOF DIO/unbuffered + * reads. + */ + if (subreq->len > rreq->i_size - subreq->start) + subreq->len = rreq->i_size - subreq->start; } - if (subreq->len > rreq->i_size - subreq->start) - subreq->len = rreq->i_size - subreq->start; if (rreq->rsize && subreq->len > rreq->rsize) subreq->len = rreq->rsize; @@ -739,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) do { _debug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); - if (rreq->origin == NETFS_DIO_READ && - rreq->start + rreq->submitted >= rreq->i_size) - break; if (!netfs_rreq_submit_slice(rreq, &io_iter)) break; + if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags)) + break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index bf29a65c5027..7a558dea75c4 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -363,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4..3f3842e7b44a 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) goto out; } - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); rc = rdata->server->ops->async_readv(rdata); out: From 810ee43d9cd245d138a2733d87a24858a23f577d Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 12 Aug 2024 00:28:21 +0100 Subject: [PATCH 0849/1523] Squashfs: sanity check symbolic link size Syzkiller reports a "KMSAN: uninit-value in pick_link" bug. This is caused by an uninitialised page, which is ultimately caused by a corrupted symbolic link size read from disk. The reason why the corrupted symlink size causes an uninitialised page is due to the following sequence of events: 1. squashfs_read_inode() is called to read the symbolic link from disk. This assigns the corrupted value 3875536935 to inode->i_size. 2. Later squashfs_symlink_read_folio() is called, which assigns this corrupted value to the length variable, which being a signed int, overflows producing a negative number. 3. The following loop that fills in the page contents checks that the copied bytes is less than length, which being negative means the loop is skipped, producing an uninitialised page. This patch adds a sanity check which checks that the symbolic link size is not larger than expected. -- Signed-off-by: Phillip Lougher Link: https://lore.kernel.org/r/20240811232821.13903-1-phillip@squashfs.org.uk Reported-by: Lizhi Xu Reported-by: syzbot+24ac24ff58dc5b0d26b9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a90e8c061e86a76b@google.com/ V2: fix spelling mistake. Signed-off-by: Christian Brauner --- fs/squashfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 16bd693d0b3a..d5918eba27e3 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; From e4956dc7a84da074fd8dc10f7abd147f15b3ae58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Aug 2024 06:10:59 -0600 Subject: [PATCH 0850/1523] io_uring/sqpoll: annotate debug task == current with data_race() There's a debug check in io_sq_thread_park() checking if it's the SQPOLL thread itself calling park. KCSAN warns about this, as we should not be reading sqd->thread outside of sqd->lock. Just silence this with data_race(). The pointer isn't used for anything but this debug check. Reported-by: syzbot+2b946a3fd80caf971b21@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index b3722e5275e7..3b50dc9586d1 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -44,7 +44,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(data_race(sqd->thread) == current); atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); From bcc954c6caba01fca143162d5fbb90e46aa1ad80 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Mon, 12 Aug 2024 16:27:03 +0900 Subject: [PATCH 0851/1523] printk/panic: Allow cpu backtraces to be written into ringbuffer during panic commit 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") disabled non-panic CPUs to further write messages to ringbuffer after panicked. Since the commit, non-panicked CPU's are not allowed to write to ring buffer after panicked and CPU backtrace which is triggered after panicked to sample non-panicked CPUs' backtrace no longer serves its function as it has nothing to print. Fix the issue by allowing non-panicked CPUs to write into ringbuffer while CPU backtrace is in flight. Fixes: 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") Signed-off-by: Ryo Takakura Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240812072703.339690-1-takakura@valinux.co.jp Signed-off-by: Petr Mladek --- include/linux/panic.h | 1 + kernel/panic.c | 8 +++++++- kernel/printk/printk.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b..54d90b6c5f47 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925..2a0449144f82 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fd..c22b07049c38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { From b098495e69491c2225681f43228312d32477217b Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sat, 3 Aug 2024 19:32:33 +0800 Subject: [PATCH 0852/1523] KVM: x86: hyper-v: Remove unused inline function kvm_hv_free_pa_page() There is no caller in tree since introduction in commit b4f69df0f65e ("KVM: x86: Make Hyper-V emulation optional") Signed-off-by: Yue Haibing Message-ID: <20240803113233.128185-1-yuehaibing@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 923e64903da9..913bfc96959c 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return HV_STATUS_ACCESS_DENIED; } static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {} -static inline void kvm_hv_free_pa_page(struct kvm *kvm) {} static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector) { return false; From 6252690f7e1b173b86a4c27dfc046b351ab423e7 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 9 Aug 2024 16:54:22 +0900 Subject: [PATCH 0853/1523] btrfs: fix invalid mapping of extent xarray state In __extent_writepage_io(), we call btrfs_set_range_writeback() -> folio_start_writeback(), which clears PAGECACHE_TAG_DIRTY mark from the mapping xarray if the folio is not dirty. This worked fine before commit 97713b1a2ced ("btrfs: do not clear page dirty inside extent_write_locked_range()"). After the commit, however, the folio is still dirty at this point, so the mapping DIRTY tag is not cleared anymore. Then, __extent_writepage_io() calls btrfs_folio_clear_dirty() to clear the folio's dirty flag. That results in the page being unlocked with a "strange" state. The page is not PageDirty, but the mapping tag is set as PAGECACHE_TAG_DIRTY. This strange state looks like causing a hang with a call trace below when running fstests generic/091 on a null_blk device. It is waiting for a folio lock. While I don't have an exact relation between this hang and the strange state, fixing the state also fixes the hang. And, that state is worth fixing anyway. This commit reorders btrfs_folio_clear_dirty() and btrfs_set_range_writeback() in __extent_writepage_io(), so that the PAGECACHE_TAG_DIRTY tag is properly removed from the xarray. [464.274] task:fsx state:D stack:0 pid:3034 tgid:3034 ppid:2853 flags:0x00004002 [464.286] Call Trace: [464.291] [464.295] __schedule+0x10ed/0x6260 [464.301] ? __pfx___blk_flush_plug+0x10/0x10 [464.308] ? __submit_bio+0x37c/0x450 [464.314] ? __pfx___schedule+0x10/0x10 [464.321] ? lock_release+0x567/0x790 [464.327] ? __pfx_lock_acquire+0x10/0x10 [464.334] ? __pfx_lock_release+0x10/0x10 [464.340] ? __pfx_lock_acquire+0x10/0x10 [464.347] ? __pfx_lock_release+0x10/0x10 [464.353] ? do_raw_spin_lock+0x12e/0x270 [464.360] schedule+0xdf/0x3b0 [464.365] io_schedule+0x8f/0xf0 [464.371] folio_wait_bit_common+0x2ca/0x6d0 [464.378] ? folio_wait_bit_common+0x1cc/0x6d0 [464.385] ? __pfx_folio_wait_bit_common+0x10/0x10 [464.392] ? __pfx_filemap_get_folios_tag+0x10/0x10 [464.400] ? __pfx_wake_page_function+0x10/0x10 [464.407] ? __pfx___might_resched+0x10/0x10 [464.414] ? do_raw_spin_unlock+0x58/0x1f0 [464.420] extent_write_cache_pages+0xe49/0x1620 [btrfs] [464.428] ? lock_acquire+0x435/0x500 [464.435] ? __pfx_extent_write_cache_pages+0x10/0x10 [btrfs] [464.443] ? btrfs_do_write_iter+0x493/0x640 [btrfs] [464.451] ? orc_find.part.0+0x1d4/0x380 [464.457] ? __pfx_lock_release+0x10/0x10 [464.464] ? __pfx_lock_release+0x10/0x10 [464.471] ? btrfs_do_write_iter+0x493/0x640 [btrfs] [464.478] btrfs_writepages+0x1cc/0x460 [btrfs] [464.485] ? __pfx_btrfs_writepages+0x10/0x10 [btrfs] [464.493] ? is_bpf_text_address+0x6e/0x100 [464.500] ? kernel_text_address+0x145/0x160 [464.507] ? unwind_get_return_address+0x5e/0xa0 [464.514] ? arch_stack_walk+0xac/0x100 [464.521] do_writepages+0x176/0x780 [464.527] ? lock_release+0x567/0x790 [464.533] ? __pfx_do_writepages+0x10/0x10 [464.540] ? __pfx_lock_acquire+0x10/0x10 [464.546] ? __pfx_stack_trace_save+0x10/0x10 [464.553] ? do_raw_spin_lock+0x12e/0x270 [464.560] ? do_raw_spin_unlock+0x58/0x1f0 [464.566] ? _raw_spin_unlock+0x23/0x40 [464.573] ? wbc_attach_and_unlock_inode+0x3da/0x7d0 [464.580] filemap_fdatawrite_wbc+0x113/0x180 [464.587] ? prepare_pages.constprop.0+0x13c/0x5c0 [btrfs] [464.596] __filemap_fdatawrite_range+0xaf/0xf0 [464.603] ? __pfx___filemap_fdatawrite_range+0x10/0x10 [464.611] ? trace_irq_enable.constprop.0+0xce/0x110 [464.618] ? kasan_quarantine_put+0xd7/0x1e0 [464.625] btrfs_start_ordered_extent+0x46f/0x570 [btrfs] [464.633] ? __pfx_btrfs_start_ordered_extent+0x10/0x10 [btrfs] [464.642] ? __clear_extent_bit+0x2c0/0x9d0 [btrfs] [464.650] btrfs_lock_and_flush_ordered_range+0xc6/0x180 [btrfs] [464.659] ? __pfx_btrfs_lock_and_flush_ordered_range+0x10/0x10 [btrfs] [464.669] btrfs_read_folio+0x12a/0x1d0 [btrfs] [464.676] ? __pfx_btrfs_read_folio+0x10/0x10 [btrfs] [464.684] ? __pfx_filemap_add_folio+0x10/0x10 [464.691] ? __pfx___might_resched+0x10/0x10 [464.698] ? __filemap_get_folio+0x1c5/0x450 [464.705] prepare_uptodate_page+0x12e/0x4d0 [btrfs] [464.713] prepare_pages.constprop.0+0x13c/0x5c0 [btrfs] [464.721] ? fault_in_iov_iter_readable+0xd2/0x240 [464.729] btrfs_buffered_write+0x5bd/0x12f0 [btrfs] [464.737] ? __pfx_btrfs_buffered_write+0x10/0x10 [btrfs] [464.745] ? __pfx_lock_release+0x10/0x10 [464.752] ? generic_write_checks+0x275/0x400 [464.759] ? down_write+0x118/0x1f0 [464.765] ? up_write+0x19b/0x500 [464.770] btrfs_direct_write+0x731/0xba0 [btrfs] [464.778] ? __pfx_btrfs_direct_write+0x10/0x10 [btrfs] [464.785] ? __pfx___might_resched+0x10/0x10 [464.792] ? lock_acquire+0x435/0x500 [464.798] ? lock_acquire+0x435/0x500 [464.804] btrfs_do_write_iter+0x494/0x640 [btrfs] [464.811] ? __pfx_btrfs_do_write_iter+0x10/0x10 [btrfs] [464.819] ? __pfx___might_resched+0x10/0x10 [464.825] ? rw_verify_area+0x6d/0x590 [464.831] vfs_write+0x5d7/0xf50 [464.837] ? __might_fault+0x9d/0x120 [464.843] ? __pfx_vfs_write+0x10/0x10 [464.849] ? btrfs_file_llseek+0xb1/0xfb0 [btrfs] [464.856] ? lock_release+0x567/0x790 [464.862] ksys_write+0xfb/0x1d0 [464.867] ? __pfx_ksys_write+0x10/0x10 [464.873] ? _raw_spin_unlock+0x23/0x40 [464.879] ? btrfs_getattr+0x4af/0x670 [btrfs] [464.886] ? vfs_getattr_nosec+0x79/0x340 [464.892] do_syscall_64+0x95/0x180 [464.898] ? __do_sys_newfstat+0xde/0xf0 [464.904] ? __pfx___do_sys_newfstat+0x10/0x10 [464.911] ? trace_irq_enable.constprop.0+0xce/0x110 [464.918] ? syscall_exit_to_user_mode+0xac/0x2a0 [464.925] ? do_syscall_64+0xa1/0x180 [464.931] ? trace_irq_enable.constprop.0+0xce/0x110 [464.939] ? trace_irq_enable.constprop.0+0xce/0x110 [464.946] ? syscall_exit_to_user_mode+0xac/0x2a0 [464.953] ? btrfs_file_llseek+0xb1/0xfb0 [btrfs] [464.960] ? do_syscall_64+0xa1/0x180 [464.966] ? btrfs_file_llseek+0xb1/0xfb0 [btrfs] [464.973] ? trace_irq_enable.constprop.0+0xce/0x110 [464.980] ? syscall_exit_to_user_mode+0xac/0x2a0 [464.987] ? __pfx_btrfs_file_llseek+0x10/0x10 [btrfs] [464.995] ? trace_irq_enable.constprop.0+0xce/0x110 [465.002] ? __pfx_btrfs_file_llseek+0x10/0x10 [btrfs] [465.010] ? do_syscall_64+0xa1/0x180 [465.016] ? lock_release+0x567/0x790 [465.022] ? __pfx_lock_acquire+0x10/0x10 [465.028] ? __pfx_lock_release+0x10/0x10 [465.034] ? trace_irq_enable.constprop.0+0xce/0x110 [465.042] ? syscall_exit_to_user_mode+0xac/0x2a0 [465.049] ? do_syscall_64+0xa1/0x180 [465.055] ? syscall_exit_to_user_mode+0xac/0x2a0 [465.062] ? do_syscall_64+0xa1/0x180 [465.068] ? syscall_exit_to_user_mode+0xac/0x2a0 [465.075] ? do_syscall_64+0xa1/0x180 [465.081] ? clear_bhb_loop+0x25/0x80 [465.087] ? clear_bhb_loop+0x25/0x80 [465.093] ? clear_bhb_loop+0x25/0x80 [465.099] entry_SYSCALL_64_after_hwframe+0x76/0x7e [465.106] RIP: 0033:0x7f093b8ee784 [465.111] RSP: 002b:00007ffc29d31b28 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [465.122] RAX: ffffffffffffffda RBX: 0000000000006000 RCX: 00007f093b8ee784 [465.131] RDX: 000000000001de00 RSI: 00007f093b6ed200 RDI: 0000000000000003 [465.141] RBP: 000000000001de00 R08: 0000000000006000 R09: 0000000000000000 [465.150] R10: 0000000000023e00 R11: 0000000000000202 R12: 0000000000006000 [465.160] R13: 0000000000023e00 R14: 0000000000023e00 R15: 0000000000000001 [465.170] [465.174] INFO: lockdep is turned off. Reported-by: Shinichiro Kawasaki Fixes: 97713b1a2ced ("btrfs: do not clear page dirty inside extent_write_locked_range()") Reviewed-by: Qu Wenruo Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa7f8148cd0d..c73cd4f89015 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, free_extent_map(em); em = NULL; + /* + * Although the PageDirty bit might be cleared before entering + * this function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * page for range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); btrfs_set_range_writeback(inode, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(inode->root->fs_info, @@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, page->index, cur, end); } - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * page for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); submit_extent_page(bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page)); From df934abb185c71c9f2fa07a5013672d0cbd36560 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 9 Aug 2024 12:36:12 -0400 Subject: [PATCH 0854/1523] mlxbf_gige: disable RX filters until RX path initialized A recent change to the driver exposed a bug where the MAC RX filters (unicast MAC, broadcast MAC, and multicast MAC) are configured and enabled before the RX path is fully initialized. The result of this bug is that after the PHY is started packets that match these MAC RX filters start to flow into the RX FIFO. And then, after rx_init() is completed, these packets will go into the driver RX ring as well. If enough packets are received to fill the RX ring (default size is 128 packets) before the call to request_irq() completes, the driver RX function becomes stuck. This bug is intermittent but is most likely to be seen where the oob_net0 interface is connected to a busy network with lots of broadcast and multicast traffic. All the MAC RX filters must be disabled until the RX path is ready, i.e. all initialization is done and all the IRQs are installed. Fixes: f7442a634ac0 ("mlxbf_gige: call request_irq() after NAPI initialized") Reviewed-by: Asmaa Mnebhi Signed-off-by: David Thompson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240809163612.12852-1-davthompson@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 8 +++ .../mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++ .../mellanox/mlxbf_gige/mlxbf_gige_regs.h | 2 + .../mellanox/mlxbf_gige/mlxbf_gige_rx.c | 50 ++++++++++++++++--- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index bc94e75a7aeb..e7777700ee18 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -40,6 +40,7 @@ */ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 +#define MLXBF_GIGE_MAX_FILTER_IDX 3 /* Define for broadcast MAC literal */ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF @@ -175,6 +176,13 @@ enum mlxbf_gige_res { int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv); void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); + +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 dmac); void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index b157f0f1c5a8..385a56ac7348 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev) if (err) goto napi_deinit; + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); + mlxbf_gige_enable_multicast_rx(priv); + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) void __iomem *plu_base; void __iomem *base; int addr, phy_irq; + unsigned int i; int err; base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); @@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++) + mlxbf_gige_disable_mac_rx_filter(priv, i); + mlxbf_gige_disable_multicast_rx(priv); + mlxbf_gige_disable_promisc(priv); + /* Write initial MAC address to hardware */ mlxbf_gige_initial_mac(priv); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 98a8681c21b9..4d14cb13fd64 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -62,6 +62,8 @@ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 +#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL 0x0530 +#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index 699984358493..eb62620b63c7 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -11,22 +11,60 @@ #include "mlxbf_gige.h" #include "mlxbf_gige_regs.h" -void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, - unsigned int index, u64 dmac) +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 data; + + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 data; + + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) { void __iomem *base = priv->base; u64 control; - /* Write destination MAC to specified MAC RX filter */ - writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + - (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); - /* Enable MAC receive filter mask for specified index */ control = readq(base + MLXBF_GIGE_CONTROL); control |= (MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); writeq(control, base + MLXBF_GIGE_CONTROL); } +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; + + /* Disable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); + control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); + writeq(control, base + MLXBF_GIGE_CONTROL); +} + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac) +{ + void __iomem *base = priv->base; + + /* Write destination MAC to specified MAC RX filter */ + writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); +} + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 *dmac) { From 34e087e8920e635c62e2ed6a758b0cd27f836d13 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Thu, 18 Jul 2024 16:38:50 +0800 Subject: [PATCH 0855/1523] drm/amdgpu/mes: fix mes ring buffer overflow wait memory room until enough before writing mes packets to avoid ring buffer overflow. v2: squash in sched_hw_submission fix Fixes: de3246254156 ("drm/amdgpu: cleanup MES11 command submission") Fixes: fffe347e1478 ("drm/amdgpu: cleanup MES12 command submission") Signed-off-by: Jack Xiao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18 ++++++++++++++---- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 18 ++++++++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8c39bf7e1fac..690976665cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -214,6 +214,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..1a5ad5be33bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -168,7 +168,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -196,6 +196,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,8 +215,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,6 +258,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..249e5a66205c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -154,7 +154,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -182,6 +182,13 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,8 +201,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -215,7 +221,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -238,6 +244,10 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); From 237193e21b29d4aa0617ffeea3d6f49e72999708 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 6 Aug 2024 09:55:55 -0400 Subject: [PATCH 0856/1523] drm/amd/display: fix s2idle entry for DCN3.5+ To be able to get to the lowest power state when suspending systems with DCN3.5+, we must be in IPS before the display hardware is put into D3cold. So, to ensure that the system always reaches the lowest power state while suspending, force systems that support IPS to enter idle optimizations before entering D3cold. Reviewed-by: Roman Li Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ec6064d40dbf..fe8a88a7fe59 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2904,6 +2904,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); From 15e1c3d65975524c5c792fcd59f7d89f00402261 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 2 Aug 2024 13:16:30 -0700 Subject: [PATCH 0857/1523] KVM: x86: Use this_cpu_ptr() instead of per_cpu_ptr(smp_processor_id()) Use this_cpu_ptr() instead of open coding the equivalent in various user return MSR helpers. Signed-off-by: Isaku Yamahata Reviewed-by: Chao Gao Reviewed-by: Yuan Yao [sean: massage changelog] Signed-off-by: Sean Christopherson Reviewed-by: Pankaj Gupta Message-ID: <20240802201630.339306-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef3d3511e4af..70219e406987 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void) int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); int err; value = (value & mask) | (msrs->values[slot].host & ~mask); @@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); static void drop_user_return_notifiers(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); if (msrs->registered) kvm_on_user_return(&msrs->urn); From 35c628774e50b3784c59e8ca7973f03bcb067132 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:00:33 -0400 Subject: [PATCH 0858/1523] drm/amdgpu/jpeg2: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From c6c2e8b6a427d4fecc7c36cffccb908185afcab2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:06:05 -0400 Subject: [PATCH 0859/1523] drm/amdgpu/jpeg4: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..f4662920c653 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -782,11 +782,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From 0cee47cde41e22712c034ae961076067d4ac13a0 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 31 Jul 2024 12:10:40 +0800 Subject: [PATCH 0860/1523] drm/amd/amdgpu: Properly tune the size of struct The struct assertion is failed because sparse cannot parse `#pragma pack(push, 1)` and `#pragma pack(pop)` correctly. GCC's output is still 1-byte-aligned. No harm to memory layout. The error can be filtered out by sparse-diff, but sometimes multiple lines queezed into one, making the sparse-diff thinks its a new error. I'm trying to aviod this by fixing errors. Link: https://lore.kernel.org/all/20230620045919.492128-1-suhui@nfschina.com/ Link: https://lore.kernel.org/all/93d10611-9fbb-4242-87b8-5860b2606042@suswa.mountain/ Fixes: 1721bc1b2afa ("drm/amdgpu: Update VF2PF interface") Cc: Dan Carpenter Cc: wenlunpeng Reported-by: Su Hui Signed-off-by: WangYuli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index fb2b394bb9c5..6e9eeaeb3de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -213,7 +213,7 @@ struct amd_sriov_msg_pf2vf_info { uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; -}; +} __packed; struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte */ @@ -273,7 +273,7 @@ struct amd_sriov_msg_vf2pf_info { uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; -}; +} __packed; /* mailbox message send from guest to host */ enum amd_sriov_mailbox_request_message { From 7b3a4e1d559e892ea563f46f45c504a60c82d70f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Aug 2024 12:42:52 +0100 Subject: [PATCH 0861/1523] drm/amd/display: remove extraneous ; after statements There are a several statements with two following semicolons, replace these with just one semicolon. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 006667aa961b..710a25dcfef0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1031,7 +1031,7 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) { struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;; + double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { /* invalid register set index */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index cbecdc9f253a..c3c4d8d9525c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7218,7 +7218,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(DV_BUILD) // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { - lb_buffer_size_bits_luma = 34620 * 57;; + lb_buffer_size_bits_luma = 34620 * 57; lb_buffer_size_bits_chroma = 13560 * 57; } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index c54c29711a65..8f3c1c0b1cc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -6464,8 +6464,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 7655501e75d4..9e8ff3a9718e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -421,7 +421,7 @@ unsigned int dml2_calc_max_scaled_time( void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) { - int i, j = 0;; + int i, j = 0; struct mcif_arb_params *wb_arb_params = NULL; struct dcn_bw_writeback *bw_writeback = NULL; enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ From 3834ce360067b4ee98fdef14571923500a0499a4 Mon Sep 17 00:00:00 2001 From: Remington Brasga Date: Wed, 31 Jul 2024 05:54:51 +0000 Subject: [PATCH 0862/1523] drm/amdgpu/uvd4: fix mask and shift definitions A few define's are listed twice with different, incorrect values. This fix sets them appropriately. Signed-off-by: Remington Brasga Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h index 8ee3149df5b7..2ef1273e65ab 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h @@ -340,8 +340,6 @@ #define UVD_LMI_CTRL__REQ_MODE_MASK 0x00000200L #define UVD_LMI_CTRL__REQ_MODE__SHIFT 0x00000009 #define UVD_LMI_CTRL__RFU_MASK 0xf8000000L -#define UVD_LMI_CTRL__RFU_MASK 0xfc000000L -#define UVD_LMI_CTRL__RFU__SHIFT 0x0000001a #define UVD_LMI_CTRL__RFU__SHIFT 0x0000001b #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK 0x00200000L #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN__SHIFT 0x00000015 From 9a12b1c7a0595736d398b24712dc1ce79072662e Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:27 +0200 Subject: [PATCH 0863/1523] drm/amd: Make amd_ip_funcs static for SDMA v5.0 The struct can be static, as it is only used in this translation unit. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index d5f0dc132a47..3e48ea38385d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1820,7 +1820,7 @@ static void sdma_v5_0_dump_ip_state(void *handle) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_0_ip_funcs = { +static const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, .late_init = NULL, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h index d4e3c2e696f6..2ab71f21755a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_0_H__ #define __SDMA_V5_0_H__ -extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; #endif /* __SDMA_V5_0_H__ */ From 8641b817392bfb12fb1e71ebb68c31783297bfbd Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:28 +0200 Subject: [PATCH 0864/1523] drm/amd: Make amd_ip_funcs static for SDMA v5.2 The struct can be static, as it is only used in this translation unit. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 93890f83e270..d740255edf5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1776,7 +1776,7 @@ static void sdma_v5_2_dump_ip_state(void *handle) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_2_ip_funcs = { +static const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, .late_init = NULL, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h index b70414fef2a1..863145b3a77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_2_H__ #define __SDMA_V5_2_H__ -extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; #endif /* __SDMA_V5_2_H__ */ From 17d30ed33c8a9e7b866dd7c2ceb4a6858cfcaa81 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 4 Aug 2024 15:56:29 +0200 Subject: [PATCH 0865/1523] drm/amdgpu/swsmu: fix SMU11 typos (memlk -> memclk) No functional changes. Signed-off-by: Tobias Jakobi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 076620fa3ef5..16af1a329621 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1989,7 +1989,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor.Mem_FPS, activity_monitor.Mem_MinFreqStep, activity_monitor.Mem_MinActiveFreqType, @@ -2051,7 +2051,7 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u activity_monitor.Soc_PD_Data_error_coeff = input[8]; activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor.Mem_FPS = input[1]; activity_monitor.Mem_MinFreqStep = input[2]; activity_monitor.Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0d3e1a121b67..9c3c48297cba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1691,7 +1691,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor->Mem_FPS, activity_monitor->Mem_MinFreqStep, activity_monitor->Mem_MinActiveFreqType, @@ -1756,7 +1756,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * activity_monitor->Fclk_PD_Data_error_coeff = input[8]; activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor->Mem_FPS = input[1]; activity_monitor->Mem_MinFreqStep = input[2]; activity_monitor->Mem_MinActiveFreqType = input[3]; From 020620424b27bababf7f53d00692ab919c357a3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:08:28 +0200 Subject: [PATCH 0866/1523] drm/amd: Use a constant format string for amdgpu_ucode_request Multiple files in amdgpu call amdgpu_ucode_request() with a fw_name variable that the compiler cannot check for being a valid format string, as seen by enabling the (default-disabled) -Wformat-security option: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c: In function 'amdgpu_mes_init_microcode': drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1517:61: error: format not a string literal and no format arguments [-Werror=format-security] 1517 | r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); | ^~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c: In function 'amdgpu_uvd_sw_init': drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:9: error: format not a string literal and no format arguments [-Werror=format-security] 263 | r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c: In function 'amdgpu_vce_sw_init': drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c:161:9: error: format not a string literal and no format arguments [-Werror=format-security] 161 | r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c: In function 'amdgpu_umsch_mm_init_microcode': drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c:590:9: error: format not a string literal and no format arguments [-Werror=format-security] 590 | r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c: In function 'amdgpu_cgs_get_firmware_info': drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:417:72: error: format not a string literal and no format arguments [-Werror=format-security] 417 | err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); | ^~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 'load_dmcu_fw': drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2221:9: error: format not a string literal and no format arguments [-Werror=format-security] 2221 | r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); | ^ drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 'dm_init_microcode': drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:5147:9: error: format not a string literal and no format arguments [-Werror=format-security] 5147 | r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); | ^ Change these all to use a "%s" format with the actual name as an argument, to let the compiler prove this to be correct. Fixes: e5a7d047f41b ("drm/amd: Use `amdgpu_ucode_*` helpers for CGS") Fixes: 52215e2a5d4a ("drm/amd: Use `amdgpu_ucode_*` helpers for VCE") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c3d89088123d..16153d275d7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..1b1e94b5b977 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1514,7 +1514,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index fbc2852278e1..6162582d0aa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 07d930339b07..775c09d57222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 968ca2c84ef7..51b045de409d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fe8a88a7fe59..454d205e4501 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2220,7 +2220,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -5157,7 +5157,7 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); return r; } From c6dbab46324b1742b50dc2fb5c1fee2c28129439 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Jul 2024 17:58:12 +0200 Subject: [PATCH 0867/1523] drm/radeon/r100: Handle unknown family in r100_cp_init_microcode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With -Werror: In function ‘r100_cp_init_microcode’, inlined from ‘r100_cp_init’ at drivers/gpu/drm/radeon/r100.c:1136:7: include/linux/printk.h:465:44: error: ‘%s’ directive argument is null [-Werror=format-overflow=] 465 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) | ^ include/linux/printk.h:437:17: note: in definition of macro ‘printk_index_wrap’ 437 | _p_func(_fmt, ##__VA_ARGS__); \ | ^~~~~~~ include/linux/printk.h:508:9: note: in expansion of macro ‘printk’ 508 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~ drivers/gpu/drm/radeon/r100.c:1062:17: note: in expansion of macro ‘pr_err’ 1062 | pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name); | ^~~~~~ Fix this by converting the if/else if/... construct into a proper switch() statement with a default to handle the error case. As a bonus, the generated code is ca. 100 bytes smaller (with gcc 11.4.0 targeting arm32). Signed-off-by: Geert Uytterhoeven Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r100.c | 70 ++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index d7d7d23bf9a1..80703417d8a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1016,45 +1016,65 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) DRM_DEBUG_KMS("\n"); - if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || - (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) { + switch (rdev->family) { + case CHIP_R100: + case CHIP_RV100: + case CHIP_RV200: + case CHIP_RS100: + case CHIP_RS200: DRM_INFO("Loading R100 Microcode\n"); fw_name = FIRMWARE_R100; - } else if ((rdev->family == CHIP_R200) || - (rdev->family == CHIP_RV250) || - (rdev->family == CHIP_RV280) || - (rdev->family == CHIP_RS300)) { + break; + + case CHIP_R200: + case CHIP_RV250: + case CHIP_RV280: + case CHIP_RS300: DRM_INFO("Loading R200 Microcode\n"); fw_name = FIRMWARE_R200; - } else if ((rdev->family == CHIP_R300) || - (rdev->family == CHIP_R350) || - (rdev->family == CHIP_RV350) || - (rdev->family == CHIP_RV380) || - (rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { + break; + + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_RS400: + case CHIP_RS480: DRM_INFO("Loading R300 Microcode\n"); fw_name = FIRMWARE_R300; - } else if ((rdev->family == CHIP_R420) || - (rdev->family == CHIP_R423) || - (rdev->family == CHIP_RV410)) { + break; + + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: DRM_INFO("Loading R400 Microcode\n"); fw_name = FIRMWARE_R420; - } else if ((rdev->family == CHIP_RS690) || - (rdev->family == CHIP_RS740)) { + break; + + case CHIP_RS690: + case CHIP_RS740: DRM_INFO("Loading RS690/RS740 Microcode\n"); fw_name = FIRMWARE_RS690; - } else if (rdev->family == CHIP_RS600) { + break; + + case CHIP_RS600: DRM_INFO("Loading RS600 Microcode\n"); fw_name = FIRMWARE_RS600; - } else if ((rdev->family == CHIP_RV515) || - (rdev->family == CHIP_R520) || - (rdev->family == CHIP_RV530) || - (rdev->family == CHIP_R580) || - (rdev->family == CHIP_RV560) || - (rdev->family == CHIP_RV570)) { + break; + + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_R580: + case CHIP_RV560: + case CHIP_RV570: DRM_INFO("Loading R500 Microcode\n"); fw_name = FIRMWARE_R520; + break; + + default: + DRM_ERROR("Unsupported Radeon family %u\n", rdev->family); + return -EINVAL; } err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); From c6b86421f1f9ddf9d706f2453159813ee39d0cf9 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 6 Aug 2024 22:27:32 +0200 Subject: [PATCH 0868/1523] drm/amdgpu: Actually check flags for all context ops. Missing validation ... Checked libdrm and it clears all the structs, so we should be safe to just check everything. Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: From c30fb344a2f7fb5d553e98577185d9f8147598e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 10:49:33 -0400 Subject: [PATCH 0869/1523] drm/amdgpu/mes: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1b1e94b5b977..b2a9df202913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -819,6 +819,30 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct mes_reset_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + queue_input.vmid = vmid; + + r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset legacy queue\n"); + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..174283a0fc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -279,6 +279,16 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +struct mes_reset_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t vmid; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -347,6 +357,9 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); + + int (*reset_legacy_queue)(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -381,6 +394,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, From 45a2a4514320f9c835eccb661601357cb1fefd82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 11:07:57 -0400 Subject: [PATCH 0870/1523] drm/amdgpu/mes11: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1a5ad5be33bf..61b8cb39826d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -595,6 +595,38 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } +static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -603,6 +635,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, + .reset_legacy_queue = mes_v11_0_reset_legacy_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, From 947c0808693e267185c5471f87f83146e4300561 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 May 2024 11:44:31 -0400 Subject: [PATCH 0871/1523] drm/amdgpu/mes12: add API for legacy queue reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add API for resetting kernel queues. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 249e5a66205c..5e06a982eb54 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -646,6 +646,38 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } +static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -654,6 +686,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, + .reset_legacy_queue = mes_v12_0_reset_legacy_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, From a46a7bef7d41ee7787c246f47a656fbafe02f122 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:17:11 +0530 Subject: [PATCH 0872/1523] drm/amdgpu: add vcn_v5_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v5_0. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 170 +++++++++++++++++++++++- 1 file changed, 169 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..6e6eaf2358d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,134 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC_VMIDS_MULTI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_CTRL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_LOW), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_VCPU_CACHE_OFFSET0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_VMID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_CLK_EN_VCPU_REPORT), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SCRATCH1) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +211,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +267,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +311,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1437,34 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,7 +1483,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, .print_ip_state = NULL, }; From 3df34334147e73b05480db6cf8353a405597d04a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:29:09 +0530 Subject: [PATCH 0873/1523] Revert "drm/amdgpu: add vcn_v5_0 ip dump support" This reverts commit a46a7bef7d41ee7787c246f47a656fbafe02f122. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 170 +----------------------- 1 file changed, 1 insertion(+), 169 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6e6eaf2358d2..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,134 +37,6 @@ #include -static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET1), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_CONFIG), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_IPX_DLDO_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_VCPU_CACHE_OFFSET0), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_VMID), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_CLK_EN_VCPU_REPORT), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL2), - SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SCRATCH1) -}; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -211,8 +83,6 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -267,14 +137,6 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } return 0; } @@ -311,8 +173,6 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); - kfree(adev->vcn.ip_dump); - return r; } @@ -1437,34 +1297,6 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v5_0_dump_ip_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1483,7 +1315,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = vcn_v5_0_dump_ip_state, + .dump_ip_state = NULL, .print_ip_state = NULL, }; From 2ab5dc59177419d8a49e89585e82ff41524270fc Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:43:45 +0800 Subject: [PATCH 0874/1523] drm/amdgpu/mes12: update mes_v12_api_def.h Update mes12 api definition. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/mes_v12_api_def.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; From c7d4355648ffa02a1551495b05c71ea6c884d29c Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:53:35 +0800 Subject: [PATCH 0875/1523] drm/amdgpu/mes: add multiple mes ring instances support Add multiple mes ring instances in mes structure to support multiple mes pipes. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 34 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 34 ++++++++++++------------ 9 files changed, 47 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 5c9f36f01db0..28bd2098a65e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -998,7 +998,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1071,7 +1071,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index b2a9df202913..be2156bf0252 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 174283a0fc07..d87d068952e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -82,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 26efce9aa410..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 61b8cb39826d..4c7899e527fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,7 +162,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -191,7 +191,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -221,7 +221,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -263,7 +263,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1058,7 +1058,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1072,7 +1072,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1114,7 +1114,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1167,7 +1167,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1259,12 +1259,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1375,9 +1375,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1395,7 +1395,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1440,7 +1440,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 5e06a982eb54..ac6209a0029c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -148,7 +148,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +177,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,7 +207,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); @@ -249,7 +249,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1128,7 +1128,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1147,7 +1147,7 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1193,7 +1193,7 @@ static int mes_v12_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v12_0_ring_funcs; @@ -1246,7 +1246,7 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1335,12 +1335,12 @@ static int mes_v12_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1384,7 +1384,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1448,9 +1448,9 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1463,7 +1463,7 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq || adev->enable_uni_mes) { @@ -1515,7 +1515,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; From 2f93ec07ab54cae66155d0a09182843f358da178 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:33:05 +0530 Subject: [PATCH 0876/1523] Revert "drm/amdgpu: add print support for vcn_v3_0 ip dump" This reverts commit cd162ae9bc3ba91eb630a1321afd3d1dde5f2000. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 34 +-------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c2278cc49dd5..0d871859690a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2324,38 +2324,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2403,7 +2371,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = vcn_v3_0_print_ip_state, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From 434b3554d6435dc4e19083a2214dee40a88e09e1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:34:58 +0530 Subject: [PATCH 0877/1523] Revert "drm/amdgpu: add vcn_v3_0 ip dump support" This reverts commit 58d283801d06d4434df6625ed6e6b8d2ba47fe65. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 151 +------------------------- 1 file changed, 1 insertion(+), 150 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 0d871859690a..24f947751c46 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,115 +60,6 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 -static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RBC_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_RBC_IB_VMID), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_NC_VMIDS_MULTI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SOFT_RESET2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_CTRL), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_STATUS2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SUVD_CGC_GATE2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_DBW_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_STATUS), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_LOW), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_LMI_MIF_REF_LUMA_64BIT_BAR_HIGH), - SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_SCRATCH1) -}; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -235,8 +126,6 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -357,15 +246,6 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - return 0; } @@ -404,7 +284,6 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); - kfree(adev->vcn.ip_dump); return r; } @@ -2324,34 +2203,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_dump_ip_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2370,7 +2221,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = vcn_v3_0_dump_ip_state, + .dump_ip_state = NULL, .print_ip_state = NULL, }; From 311f2b587461f86e3c30e7ac28df38be4a862ac7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 23:36:26 +0530 Subject: [PATCH 0878/1523] Revert "drm/amdgpu: add vcn ip dump ptr in vcn global struct" This reverts commit f3392e662efdc095f10109f588aa4f3be86f7eb5. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f127eccf59d7..1a5439abd1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,9 +330,6 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; - - /* IP reg dump */ - uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From 0fe20258b4989b9112b5e9470df33a0939403fd4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 2 Aug 2024 12:20:36 +0530 Subject: [PATCH 0879/1523] drm/amd/display: Add null check for 'afb' in amdgpu_dm_update_cursor (v2) This commit adds a null check for the 'afb' variable in the amdgpu_dm_update_cursor function. Previously, 'afb' was assumed to be null at line 8388, but was used later in the code without a null check. This could potentially lead to a null pointer dereference. Changes since v1: - Moved the null check for 'afb' to the line where 'afb' is used. (Alex) Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8433 amdgpu_dm_update_cursor() error: we previously assumed 'afb' could be null (see line 8388) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Co-developed-by: Alex Hung Signed-off-by: Alex Hung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 454d205e4501..7d999e352df3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8696,7 +8696,8 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, From 596a4ec72876f1061f0ef4be47076b093f03b4f9 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 27 May 2024 10:30:45 -0400 Subject: [PATCH 0880/1523] drm/amd/display: fix minor coding errors where dml21 phase 5 uses wrong variables [why & how] There is a coding error which causes incorrect variables to be assigned in DML21 phase 5. Reviewed-by: Austin Zheng Signed-off-by: Wenjing Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index e9b40a45ffdd..a9c3ed6d50ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -274,7 +274,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o /* * Phase 5: Optimize for Stutter */ - memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); l->stutter_phase.dml = dml; l->stutter_phase.display_config = &l->base_display_config_with_meta; l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; @@ -287,7 +287,7 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o if (stutter_success) { memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage5.success = true; } /* From 782cef7fc367542ccc851d93edbed166defdfc27 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 31 May 2024 11:37:15 -0400 Subject: [PATCH 0881/1523] drm/amd/display: apply vmin optimization even if it doesn't reach vmin level [why] Based on power measurement result, in most cases when display clock is higher than Vmin display clock, lowering display clock using dynamic ODM will improve overall power consumption by 0 to 4 watts even if we can't reach Vmin. [how] Allow vmin optimization applied even if dispclk can't reach Vmin. Reviewed-by: Austin Zheng Signed-off-by: Wenjing Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 14 +++++++++----- .../display/dc/dml2/dml21/src/dml2_top/dml_top.c | 13 +++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 06e786995390..68b333b68933 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -717,6 +717,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -737,28 +739,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index a9c3ed6d50ef..f9f8869cd8b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -266,9 +266,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* From c20da89edb69f05ab78fe804899d50be2596b0f0 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Mon, 29 Jul 2024 14:35:45 -0400 Subject: [PATCH 0882/1523] drm/amd/display: 3DLUT non-DMA refactor [Why] Currently the handling for 3DLUT is found in multiple different places, which causes issues when the different functions are not in sync with each other. Frequently bugs occur because the LUT handling is broken up, and what has already been handled isn't kept track of well, which can cause earlier changes to the LUT params to be overridden. [How] Remove DMA LUT handling from DCN401 and refactor legacy LUT handling in one place to make it easier to keep track of what has and needs to be done. Reviewed-by: Ilya Bakoulin Signed-off-by: Relja Vojvodic Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn32/dcn32_init.c | 1 - .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 46 ++++++++----------- .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 968b010971ea..58bed01fc20e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -162,7 +162,6 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 77489bbcda02..44c1184868e0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -670,46 +670,40 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct dpp *dpp_base = pipe_ctx->plane_res.dpp; int mpcc_id = pipe_ctx->plane_res.hubp->inst; struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - bool result = true; + bool result; const struct pwl_params *lut_params = NULL; bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; // 1D LUT - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->blend_tf.type == TF_TYPE_HWPWL) - lut_params = &plane_state->blend_tf.pwl; - else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, - &dpp_base->regamma_params, false); - lut_params = rval ? &dpp_base->regamma_params : NULL; - } - result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); - lut_params = NULL; + if (plane_state->blend_tf.type == TF_TYPE_HWPWL) + lut_params = &plane_state->blend_tf.pwl; + else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, + &dpp_base->regamma_params, false); + lut_params = rval ? &dpp_base->regamma_params : NULL; } + result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); + lut_params = NULL; // Shaper - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) - lut_params = &plane_state->in_shaper_func.pwl; - else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { - // TODO: dpp_base replace - ASSERT(false); - rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, - &dpp_base->shaper_params, true); - lut_params = rval ? &dpp_base->shaper_params : NULL; - } - - result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); + if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) + lut_params = &plane_state->in_shaper_func.pwl; + else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { + // TODO: dpp_base replace + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, + &dpp_base->shaper_params, true); + lut_params = rval ? &dpp_base->shaper_params : NULL; } + result &= mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); // 3D - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (mpc->funcs->program_3dlut) { if (plane_state->lut3d_func.state.bits.initialized == 1) - result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); else - result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); } return result; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 457f4167e848..f4eda4a55ea7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -136,7 +136,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, + .populate_mcm_luts = NULL, }; void dcn401_hw_sequencer_init_functions(struct dc *dc) From 95d9e0803e51d5a24276b7643b244c7477daf463 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 29 Jul 2024 15:29:09 -0600 Subject: [PATCH 0883/1523] drm/amd/display: Check null pointers before using dc->clk_mgr [WHY & HOW] dc->clk_mgr is null checked previously in the same function, indicating it might be null. Passing "dc" to "dc->hwss.apply_idle_power_optimizations", which dereferences null "dc->clk_mgr". (The function pointer resolves to "dcn35_apply_idle_power_optimizations".) This fixes 1 FORWARD_NULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b1253e4c81a8..3ba2acfdae2a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5425,7 +5425,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (allow == dc->idle_optimizations_allowed) return; - if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow)) + if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL && + dc->hwss.apply_idle_power_optimizations(dc, allow)) dc->idle_optimizations_allowed = allow; } From 4af0d8ebf74ccbb60d33fdd410891283dd6cb109 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 30 Jul 2024 11:55:23 -0400 Subject: [PATCH 0884/1523] drm/amd/display: Unlock Pipes Based On DET Allocation [Why] DML21 does not allocate DET evenly between pipes. May result in underflow when unlocking the pipes as DET could be overallocated. [How] 1. Unlock pipes that have a decreased amount of DET allocation 2. Wait for the double buffer to be updated. 3. Unlock the remaining pipes. Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 28 ++++++ .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 23 +++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 91 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 2 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 1 + drivers/gpu/drm/amd/display/dc/inc/resource.h | 5 + 7 files changed, 151 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1c379a6b1b4c..b38340c690c6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5275,3 +5275,31 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes; dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes; } + +/* Returns number of DET segments allocated for a given OTG_MASTER pipe */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + + int dpp_count = 0; + int det_segments = 0; + + if (!otg_master->stream) + return 0; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &state->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &state->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) + det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size; + } + } + return det_segments; +} diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index d36f758971a8..37d26fa0b6fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1170,6 +1170,28 @@ static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned comp } } +static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + switch (hubp_inst) { + case 0: + REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100000); /* 1 vupdate at 10hz */ + break; + case 1: + REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100000); + break; + case 2: + REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100000); + break; + case 3: + REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100000); + break; + default: + break; + } +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1192,6 +1214,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .set_request_limit = hubbub32_set_request_limit, .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, + .wait_for_det_update = dcn401_wait_for_det_update, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 44c1184868e0..22c7afbcfc4e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1663,3 +1663,94 @@ void dcn401_hardware_release(struct dc *dc) } } +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + struct hubbub *hubbub = dc->res_pool->hubbub; + int dpp_count = 0; + + if (!otg_master->stream) + return; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &context->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &context->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) { + struct pipe_ctx *dpp_pipe = dpp_pipes[dpp_idx]; + if (dpp_pipe && hubbub && + dpp_pipe->plane_res.hubp && + hubbub->funcs->wait_for_det_update) + hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst); + } + } + } +} + +void dcn401_interdependent_update_lock(struct dc *dc, + struct dc_state *context, bool lock) +{ + unsigned int i = 0; + struct pipe_ctx *pipe = NULL; + struct timing_generator *tg = NULL; + bool pipe_unlocked[MAX_PIPES] = {0}; + + if (lock) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + dc->hwss.pipe_control_lock(dc, pipe, true); + } + } else { + /* Unlock pipes based on the change in DET allocation instead of pipe index + * Prevents over allocation of DET during unlock process + * e.g. 2 pipe config with different streams with a max of 20 DET segments + * Before: After: + * - Pipe0: 10 DET segments - Pipe0: 12 DET segments + * - Pipe1: 10 DET segments - Pipe1: 8 DET segments + * If Pipe0 gets updated first, 22 DET segments will be allocated + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + int current_pipe_idx = i; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + pipe_unlocked[i] = true; + continue; + } + + // If the same stream exists in old context, ensure the OTG_MASTER pipes for the same stream get compared + struct pipe_ctx *old_otg_master = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, pipe->stream); + + if (old_otg_master) + current_pipe_idx = old_otg_master->pipe_idx; + if (resource_calculate_det_for_stream(context, pipe) < + resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[current_pipe_idx])) { + dc->hwss.pipe_control_lock(dc, pipe, false); + pipe_unlocked[i] = true; + dcn401_wait_for_det_buffer_update(dc, context, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (pipe_unlocked[i]) + continue; + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwss.pipe_control_lock(dc, pipe, false); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 8e9c1c17aa66..3ecb1ebffcee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -81,4 +81,6 @@ void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index f4eda4a55ea7..b5f63675afcb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -38,7 +38,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn32_interdependent_update_lock, + .interdependent_update_lock = dcn401_interdependent_update_lock, .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dd2b2864876c..67c32401893e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -227,6 +227,7 @@ struct hubbub_funcs { void (*get_mall_en)(struct hubbub *hubbub, unsigned int *mall_in_use); void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); + void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 96d40d33a1f9..9cd80d3864c7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -639,4 +639,9 @@ struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx); * @dml2_options: struct to hold callbacks */ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options); + +/* + *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe + */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ From 680458d41aa46a009909482f58358205b5c4b438 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 30 Jul 2024 16:32:44 -0400 Subject: [PATCH 0885/1523] drm/amd/display: Update to using new dccg callbacks [Why and how] Update to using new dccg callbacks Reviewed-by: Chris Park Signed-off-by: Hansen Dsouza Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 7f91e48902e2..004c4fe3ddfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -2396,11 +2396,11 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk_be_new; (void)&dccg35_set_symclk32_le_root_clock_gating; (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_funcs_new; + (void)&dccg35_funcs; base = &dccg_dcn->base; base->ctx = ctx; - base->funcs = &dccg35_funcs; + base->funcs = &dccg35_funcs_new; dccg_dcn->regs = regs; dccg_dcn->dccg_shift = dccg_shift; From 8c4f9e466169b3e411947347fef09382c14e5733 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 30 Jul 2024 14:57:48 -0400 Subject: [PATCH 0886/1523] drm/amd/display: Add more logging for MALL static screen [why & how] print additional info for MALL related calculations and DMCUB messaging to aid debugging. Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 10 ++++++++-- drivers/gpu/drm/amd/display/include/logger_types.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 22c7afbcfc4e..b604c8886ef4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1309,8 +1309,10 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) for (i = 0; i < dc->current_state->stream_count; i++) { /* MALL SS messaging is not supported with PSR at this time */ if (dc->current_state->streams[i] != NULL && - dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) + dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) { + DC_LOG_MALL("MALL SS not supported with PSR at this time\n"); return false; + } } memset(&cmd, 0, sizeof(cmd)); @@ -1320,8 +1322,9 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (enable) { if (dcn401_check_no_memory_request_for_cab(dc)) { /* 1. Check no memory request case for CAB. - * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message + * If no memory request case, send CAB_ACTION NO_DCN_REQ DMUB message */ + DC_LOG_MALL("sending CAB action NO_DCN_REQ\n"); cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; } else { /* 2. Check if all surfaces can fit in CAB. @@ -1349,13 +1352,16 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; cmd.cab.cab_alloc_ways = ways; + DC_LOG_MALL("cab allocation: %d ways. CAB action: DCN_SS_FIT_IN_CAB\n", ways); } else { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB; + DC_LOG_MALL("frame does not fit in CAB: %d ways required. CAB action: DCN_SS_NOT_FIT_IN_CAB\n", ways); } } } else { /* Disable CAB */ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION; + DC_LOG_MALL("idle optimization disabled\n"); } dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 83479951732a..a48d564d1660 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -61,6 +61,7 @@ #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) +#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) #define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__) From ce4f9f79ff8cfc78a064c533f0aab563a5613d81 Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Wed, 31 Jul 2024 18:55:57 -0400 Subject: [PATCH 0887/1523] drm/amd/display: guard otg disable w/a for test [why & how] HW removed this w/a, but we will still keep it to avoid regression. but return in test mode. Reviewed-by: Charlene Liu Signed-off-by: Muhammad Ahmed Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index e075b2720f96..e2d906327e2e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -126,6 +126,9 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * struct dc *dc = clk_mgr_base->ctx->dc; int i; + if (dc->ctx->dce_environment == DCE_ENV_DIAG) + return; + for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; From 1b686053c06ffb9f4524b288110cf2a831ff7a25 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 30 Jul 2024 20:02:45 -0600 Subject: [PATCH 0888/1523] drm/amd/display: Check null pointer before try to access it [why & how] Change the order of the pipe_ctx->plane_state check to ensure that plane_state is not null before accessing it. Reviewed-by: Alex Hung Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 425432ca497f..a68da1a7092d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1932,6 +1932,11 @@ static void dcn20_program_pipe( (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult) || + pipe_ctx->update_flags.bits.enable) + hws->funcs.set_hdr_multiplier(pipe_ctx); + + if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, @@ -1939,13 +1944,13 @@ static void dcn20_program_pipe( pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } - if (pipe_ctx->update_flags.bits.enable || - (pipe_ctx->plane_state && + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.gamma_change) || (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.lut_3d)) + pipe_ctx->plane_state->update_flags.bits.lut_3d) || + pipe_ctx->update_flags.bits.enable) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish From 66e2d2d9a59f896def82a1c8684368be45cf4c06 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 31 Jul 2024 17:04:44 +0800 Subject: [PATCH 0889/1523] drm/amd/display: Check null pointer before try to access it [why & how] Make sure plane_state is not null before calling a function that dereferences it. Besides, remove redundant codes. Reviewed-by: Alex Hung Signed-off-by: Wayne Lin Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a68da1a7092d..a80c08582932 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1928,15 +1928,10 @@ static void dcn20_program_pipe( pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult)) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); - if ((pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.bits.hdr_mult) || - pipe_ctx->update_flags.bits.enable) - hws->funcs.set_hdr_multiplier(pipe_ctx); - - if (hws->funcs.populate_mcm_luts) { if (pipe_ctx->plane_state) { hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, @@ -1944,13 +1939,12 @@ static void dcn20_program_pipe( pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; } } - if ((pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change) || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.gamma_change) || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.lut_3d) || - pipe_ctx->update_flags.bits.enable) + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish From 07f4f9c00ec545dfa6251a44a09d2c48a76e7ee5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:21:53 +0530 Subject: [PATCH 0890/1523] drm/amdgpu: fix ptr check warning in gfx9 ip_dump Change if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 991f7c2fc1a2..ab10a05c7885 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2129,7 +2129,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -2142,7 +2142,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { From 98df5a7732e3b78bf8824d2938a8865a45cfc113 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:25:24 +0530 Subject: [PATCH 0891/1523] drm/amdgpu: fix ptr check warning in gfx10 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1b88528b512b..75a6ca645964 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4648,7 +4648,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -4661,7 +4661,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -4674,7 +4674,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From bd15f805cdc503ac229a14f5fe21db12e6e7f84a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:27:10 +0530 Subject: [PATCH 0892/1523] drm/amdgpu: fix ptr check warning in gfx11 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e7c160b9d0fe..22bb35278691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1502,7 +1502,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1515,7 +1515,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1528,7 +1528,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From 9b7e697839c2745c10f63fe5fd54c9e328fa2e3b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 7 Aug 2024 17:28:02 +0530 Subject: [PATCH 0893/1523] drm/amdgpu: fix ptr check warning in gfx12 ip_dump Change condition, if (ptr == NULL) to if (!ptr) for a better format and fix the warning. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0a71e216a7f5..df72fa125fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1281,7 +1281,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1294,7 +1294,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1307,7 +1307,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { From 70f83e7706e57200edb8ffa36883b2f43d214142 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 7 Aug 2024 15:33:41 -0400 Subject: [PATCH 0894/1523] drm/amdkfd: fix partition query when setting up recommended sdma engines When users dynamically set the partition mode through sysfs writes, this can lead to a double lock situation where the KFD is trying to take the partition lock when updating the recommended SDMA engines. Have the KFD reference its saved socket device number count instead. Also ensure we have enough SDMA xGMI engines to report the recommended engines in the first place. Fixes: e06b71b2313a ("drm/amdkfd: allow users to target recommended SDMA engines") Signed-off-by: Jonathan Kim Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 40771f8752cb..27d452e50ca9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1286,9 +1286,8 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, struct amdgpu_device *adev = gpu->adev; int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && - adev->aid_mask && num_xgmi_nodes && - (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == - AMDGPU_SPX_PARTITION_MODE) && + adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && + kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); if (support_rec_eng) { From 86cfa9a85fb04fa61e7c6b5a8ecf812437cdad78 Mon Sep 17 00:00:00 2001 From: Daniel Yang Date: Wed, 7 Aug 2024 02:01:21 -0700 Subject: [PATCH 0895/1523] Documentation: dm-crypt.rst warning + error fix While building kernel documention using make htmldocs command, I was getting unexpected indentation error. Single description was given for two module parameters with wrong indentation. So, I corrected the indentation of both parameters and the description. Signed-off-by: Shibu kumar Signed-off-by: Daniel Yang Signed-off-by: Mikulas Patocka Fixes: 0d815e3400e6 ("dm-crypt: limit the size of encryption requests") --- .../admin-guide/device-mapper/dm-crypt.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index e625830d335e..552c9155165d 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -162,13 +162,14 @@ iv_large_sectors Module parameters:: -max_read_size -max_write_size - Maximum size of read or write requests. When a request larger than this size - is received, dm-crypt will split the request. The splitting improves - concurrency (the split requests could be encrypted in parallel by multiple - cores), but it also causes overhead. The user should tune these parameters to - fit the actual workload. + + max_read_size + max_write_size + Maximum size of read or write requests. When a request larger than this size + is received, dm-crypt will split the request. The splitting improves + concurrency (the split requests could be encrypted in parallel by multiple + cores), but it also causes overhead. The user should tune these parameters to + fit the actual workload. Example scripts From a1fc9f584c4aaf8bc1ebfa459fc57a3f26a290d8 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 2 Aug 2024 11:28:45 -0400 Subject: [PATCH 0896/1523] drm/amdkfd: Handle queue destroy buffer access race Add helper function kfd_queue_unreference_buffers to reduce queue buffer refcount, separate it from release queue buffers. Because it is circular locking to hold dqm_lock to take vm lock, kfd_ioctl_destroy_queue should take vm lock, unreference queue buffers first, but not release queue buffers, to handle error in case failed to hold vm lock. Then hold dqm_lock to remove queue from queue list and then release queue buffers. Restore process worker restore queue hold dqm_lock, will always find the queue with valid queue buffers. v2 (Felix): - renamed kfd_queue_unreference_buffer(s) to kfd_queue_unref_bo_va(s) - added two FIXME comments for follow up Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 8 ++- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 66 ++++++++++++------- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0622ebd7e8ef..00350eccd571 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -400,6 +400,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: + kfd_queue_unref_bo_vas(pdd, &q_properties); kfd_queue_release_buffers(pdd, &q_properties); err_acquire_queue_buf: err_sdma_engine_id: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 057d20446c31..f7c12d4f0abb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1298,9 +1298,12 @@ void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, u64 expected_size); -void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +void kfd_queue_buffer_put(struct amdgpu_bo **bo); int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties); void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f732ee35b531..20ea745729ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -217,6 +217,7 @@ void pqm_uninit(struct process_queue_manager *pqm) list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); } @@ -512,7 +513,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { - retval = kfd_queue_release_buffers(pdd, &pqn->q->properties); + retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); if (retval) goto err_destroy_queue; @@ -526,7 +527,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } - + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } @@ -579,7 +580,8 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, return -EFAULT; } - kfd_queue_buffer_put(vm, &pqn->q->properties.ring_bo); + kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); + kfd_queue_buffer_put(&pqn->q->properties.ring_bo); amdgpu_bo_unreserve(vm->root.bo); pqn->q->properties.ring_bo = p->ring_bo; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index e0a073ae4a49..ad29634f8b44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -224,16 +224,9 @@ out_err: return -EINVAL; } -void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +/* FIXME: remove this function, just call amdgpu_bo_unref directly */ +void kfd_queue_buffer_put(struct amdgpu_bo **bo) { - if (*bo) { - struct amdgpu_bo_va *bo_va; - - bo_va = amdgpu_vm_bo_find(vm, *bo); - if (bo_va) - bo_va->queue_refcount--; - } - amdgpu_bo_unref(bo); } @@ -327,6 +320,10 @@ out_unreserve: out_err_unreserve: amdgpu_bo_unreserve(vm->root.bo); out_err_release: + /* FIXME: make a _locked version of this that can be called before + * dropping the VM reservation. + */ + kfd_queue_unref_bo_vas(pdd, properties); kfd_queue_release_buffers(pdd, properties); return err; } @@ -334,22 +331,13 @@ out_err_release: int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct kfd_topology_device *topo_dev; - struct amdgpu_vm *vm; u32 total_cwsr_size; - int err; - vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(vm->root.bo, false); - if (err) - return err; - - kfd_queue_buffer_put(vm, &properties->wptr_bo); - kfd_queue_buffer_put(vm, &properties->rptr_bo); - kfd_queue_buffer_put(vm, &properties->ring_bo); - kfd_queue_buffer_put(vm, &properties->eop_buf_bo); - kfd_queue_buffer_put(vm, &properties->cwsr_bo); - - amdgpu_bo_unreserve(vm->root.bo); + kfd_queue_buffer_put(&properties->wptr_bo); + kfd_queue_buffer_put(&properties->rptr_bo); + kfd_queue_buffer_put(&properties->ring_bo); + kfd_queue_buffer_put(&properties->eop_buf_bo); + kfd_queue_buffer_put(&properties->cwsr_bo); topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) @@ -362,6 +350,38 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope return 0; } +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va && bo_va->queue_refcount) + bo_va->queue_refcount--; + } +} + +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_unref_bo_va(vm, &properties->wptr_bo); + kfd_queue_unref_bo_va(vm, &properties->rptr_bo); + kfd_queue_unref_bo_va(vm, &properties->ring_bo); + kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo); + kfd_queue_unref_bo_va(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); + return 0; +} + #define SGPR_SIZE_PER_CU 0x4000 #define LDS_SIZE_PER_CU 0x10000 #define HWREG_SIZE_PER_CU 0x1000 From 3bb41f2e9134906e0814766c07dfbdd9de0bfaf5 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 30 Jul 2024 17:32:35 -0400 Subject: [PATCH 0897/1523] drm/amd/display: Fix print format specifiers in DC_LOG_IPS [Why] %d specifier is used for printing unsigned values. It can result in negative values in logs for unsigned variables. [How] Replace %d with %u for unsigned. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Roman Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 41270fade5f2..b1265124608b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1285,7 +1285,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) union dmub_shared_state_ips_driver_signals new_signals; DC_LOG_IPS( - "%s wait idle (ips1_commit=%d ips2_commit=%d)", + "%s wait idle (ips1_commit=%u ips2_commit=%u)", __func__, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1331,7 +1331,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } DC_LOG_IPS( - "%s send allow_idle=%d (ips1_commit=%d ips2_commit=%d)", + "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)", __func__, allow_idle, ips_fw->signals.bits.ips1_commit, @@ -1374,7 +1374,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%d ips2=%d) (commit ips1=%d ips2=%d) (count rcg=%d ips1=%d ips2=%d)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, @@ -1393,7 +1393,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) (!dc->debug.optimize_ips_handshake || ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1402,7 +1402,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (ips_fw->signals.bits.ips2_commit) { DC_LOG_IPS( - "exit IPS2 #1 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1410,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 entry delay (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1418,14 +1418,14 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); DC_LOG_IPS( - "exit IPS2 #2 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1433,7 +1433,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait hw_pwr_up (ips1_commit=%d ips2_commit=%d)", + "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1441,7 +1441,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); DC_LOG_IPS( - "resync inbox1 (ips1_commit=%d ips2_commit=%d)", + "resync inbox1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1452,7 +1452,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1460,7 +1460,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); } @@ -1469,7 +1469,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%d ips1=%d ips2=%d)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", __func__, rcg_exit_count, ips1_exit_count, From 027347d17a16562f3be272833243b835c21aa2a5 Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Thu, 18 Jul 2024 16:48:26 +0800 Subject: [PATCH 0898/1523] drm/amd/display: Optimize vstartup position for AS-SDP [Why] In current design, the vstartup position is adjusted to vblank start position when AS-SDP is enabled. However when the vblank length is too big, it may over vstartup boundary. [How] To adjust vstartup position to 1 line before vsync position. Reviewed-by: Anthony Koo Signed-off-by: Robin Chen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index efe337ebf7c8..e9fea9c2162e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1132,7 +1132,8 @@ static void dcn20_adjust_freesync_v_startup( patched_crtc_timing.v_addressable - patched_crtc_timing.v_border_top; - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + /* The newVStartUp is 1 line before vsync point */ + newVstartup = asic_blank_end + 1; *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); } From cd9e9e0852d501f169aa3bb34e4b413d2eb48c37 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 2 Aug 2024 12:35:13 +0530 Subject: [PATCH 0899/1523] drm/amd/display: Add null check for 'afb' in amdgpu_dm_plane_handle_cursor_update (v2) This commit adds a null check for the 'afb' variable in the amdgpu_dm_plane_handle_cursor_update function. Previously, 'afb' was assumed to be null, but was used later in the code without a null check. This could potentially lead to a null pointer dereference. Changes since v1: - Moved the null check for 'afb' to the line where 'afb' is used. (Alex) Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1298 amdgpu_dm_plane_handle_cursor_update() error: we previously assumed 'afb' could be null (see line 1252) Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Co-developed-by: Alex Hung Signed-off-by: Alex Hung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..1ff469ef51af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1372,7 +1372,8 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { mutex_lock(&adev->dm.dc_lock); From cc2991203c9d4e23051dbe5bcb1fc700fea26992 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 29 Jul 2024 18:17:55 -0400 Subject: [PATCH 0900/1523] drm/amd/display: Reduce redundant minimal transitions due to SubVP [WHY] Stream ID's associated with phantom pipes can change often as they are reconstructed on full updates, however they can remain identical depending on the required update. [HOW] In the case phantom streams and pipe topologies remain the same between updates, mark the transition as seamless. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c3bbbfd1be94..d75a811c90d9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1719,6 +1719,28 @@ void dcn32_blank_phantom(struct dc *dc, hws->funcs.wait_for_blank_complete(opp); } +/* phantom stream id's can change often, but can be identical between contexts. +* This function checks for the condition the streams are identical to avoid +* redundant pipe transitions. +*/ +static bool is_subvp_phantom_topology_transition_seamless( + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx, + const struct pipe_ctx *cur_pipe, + const struct pipe_ctx *new_pipe) +{ + enum mall_stream_type cur_pipe_type = dc_state_get_pipe_subvp_type(cur_ctx, cur_pipe); + enum mall_stream_type new_pipe_type = dc_state_get_pipe_subvp_type(new_ctx, new_pipe); + + const struct dc_stream_state *cur_paired_stream = dc_state_get_paired_subvp_stream(cur_ctx, cur_pipe->stream); + const struct dc_stream_state *new_paired_stream = dc_state_get_paired_subvp_stream(new_ctx, new_pipe->stream); + + return cur_pipe_type == SUBVP_PHANTOM && + cur_pipe_type == new_pipe_type && + cur_paired_stream && new_paired_stream && + cur_paired_stream->stream_id == new_paired_stream->stream_id; +} + bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx) @@ -1737,7 +1759,8 @@ bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, continue; else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) { if (resource_is_pipe_type(new_pipe, OTG_MASTER)) - if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id) + if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id || + is_subvp_phantom_topology_transition_seamless(cur_ctx, new_ctx, cur_pipe, new_pipe)) /* OTG master with the same stream is seamless */ continue; } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) { From 67ea53a4bd9d03a85eecd99875a2a794c886f788 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 1 Aug 2024 15:35:51 -0400 Subject: [PATCH 0901/1523] drm/amd/display: Disable DCN401 UCLK P-State support on full updates [WHY&HOW] It is not guaranteed even for HW exclusive P-State methods (like VActive) that P-state will be supported properly until optimize bandwidth is called, so unconditionally disable it on full updates. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b604c8886ef4..ac0a21ac318f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1401,8 +1401,8 @@ void dcn401_prepare_bandwidth(struct dc *dc, bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; unsigned int compbuf_size_kb = 0; - /* Any transition into or out of a FAMS config should disable MCLK switching first to avoid hangs */ - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ + if (p_state_change_support) { dc->optimized_required = true; context->bw_ctx.bw.dcn.clk.p_state_change_support = false; } @@ -1441,7 +1441,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, dcn401_fams2_global_control_lock(dc, context, false); } - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) { /* After disabling P-State, restore the original value to ensure we get the correct P-State * on the next optimize. */ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; From 7a1eb66809390d06b744aa13123b925b64b54c4c Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 1 Aug 2024 15:38:34 -0400 Subject: [PATCH 0902/1523] drm/amd/display: Perform outstanding programming on full updates [WHY] In certain scenarios DC can internally trigger back to back full updates which will miss some required programming that is normally deferred until post update via optimize_bandwidth. [HOW] In back to back update scenarios, wait for pending updates to complete and perform any strictly required outstanding programming. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 121 +---------------- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 123 ++++++++++++++++++ .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 10 ++ .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h | 4 + .../amd/display/dc/hwss/dcn32/dcn32_init.c | 1 + .../amd/display/dc/hwss/dcn35/dcn35_init.c | 1 + .../amd/display/dc/hwss/dcn351/dcn351_init.c | 1 + .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 18 ++- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 1 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 1 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 17 +++ 11 files changed, 176 insertions(+), 122 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3ba2acfdae2a..c8dabb081b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1352,80 +1352,6 @@ static void disable_vbios_mode_if_required( } } -/** - * wait_for_blank_complete - wait for all active OPPs to finish pending blank - * pattern updates - * - * @dc: [in] dc reference - * @context: [in] hardware context in use - */ -static void wait_for_blank_complete(struct dc *dc, - struct dc_state *context) -{ - struct pipe_ctx *opp_head; - struct dce_hwseq *hws = dc->hwseq; - int i; - - if (!hws->funcs.wait_for_blank_complete) - return; - - for (i = 0; i < MAX_PIPES; i++) { - opp_head = &context->res_ctx.pipe_ctx[i]; - - if (!resource_is_pipe_type(opp_head, OPP_HEAD) || - dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) - continue; - - hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); - } -} - -static void wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *otg_master; - struct timing_generator *tg; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - otg_master = &context->res_ctx.pipe_ctx[i]; - if (!resource_is_pipe_type(otg_master, OTG_MASTER) || - dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) - continue; - tg = otg_master->stream_res.tg; - if (tg->funcs->wait_odm_doublebuffer_pending_clear) - tg->funcs->wait_odm_doublebuffer_pending_clear(tg); - } - - /* ODM update may require to reprogram blank pattern for each OPP */ - wait_for_blank_complete(dc, context); -} - -static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) -{ - int i; - PERF_TRACE(); - for (i = 0; i < MAX_PIPES; i++) { - int count = 0; - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) - continue; - - /* Timeout 100 ms */ - while (count < 100000) { - /* Must set to false to start with, due to OR in update function */ - pipe->plane_state->status.is_flip_pending = false; - dc->hwss.update_pending_status(pipe); - if (!pipe->plane_state->status.is_flip_pending) - break; - udelay(1); - count++; - } - ASSERT(!pipe->plane_state->status.is_flip_pending); - } - PERF_TRACE(); -} - /* Public functions */ struct dc *dc_create(const struct dc_init_data *init_params) @@ -2109,12 +2035,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ - wait_for_no_pipes_pending(dc, context); + hwss_wait_for_no_pipes_pending(dc, context); /* * optimized dispclk depends on ODM setup. Need to wait for ODM * update pending complete before optimizing bandwidth. */ - wait_for_odm_update_pending_complete(dc, context); + hwss_wait_for_odm_update_pending_complete(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); /* Need to do otg sync again as otg could be out of sync due to otg @@ -3786,47 +3712,6 @@ static void commit_planes_for_stream_fast(struct dc *dc, top_pipe_to_program->stream->update_flags.raw = 0; } -static void wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) -{ -/* - * This function calls HWSS to wait for any potentially double buffered - * operations to complete. It should be invoked as a pre-amble prior - * to full update programming before asserting any HW locks. - */ - int pipe_idx; - int opp_inst; - int opp_count = dc->res_pool->res_cap->num_opp; - struct hubp *hubp; - int mpcc_inst; - const struct pipe_ctx *pipe_ctx; - - for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { - pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; - - if (!pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) - pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); - - hubp = pipe_ctx->plane_res.hubp; - if (!hubp) - continue; - - mpcc_inst = hubp->inst; - // MPCC inst is equal to pipe index in practice - for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { - if ((dc->res_pool->opps[opp_inst] != NULL) && - (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; - break; - } - } - } - wait_for_odm_update_pending_complete(dc, dc_context); -} - static void commit_planes_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -3850,7 +3735,7 @@ static void commit_planes_for_stream(struct dc *dc, dc_z10_restore(dc); if (update_type == UPDATE_TYPE_FULL) - wait_for_outstanding_hw_updates(dc, context); + hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 5f9b6e8ef428..9a569aac3c00 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -978,3 +978,126 @@ void get_surface_tile_visual_confirm_color( break; } } + +/** + * hwss_wait_for_blank_complete - wait for all active OPPs to finish pending blank + * pattern updates + * + * @dc: [in] dc reference + * @context: [in] hardware context in use + */ +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context) +{ + struct pipe_ctx *opp_head; + struct dce_hwseq *hws = dc->hwseq; + int i; + + if (!hws->funcs.wait_for_blank_complete) + return; + + for (i = 0; i < MAX_PIPES; i++) { + opp_head = &context->res_ctx.pipe_ctx[i]; + + if (!resource_is_pipe_type(opp_head, OPP_HEAD) || + dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) + continue; + + hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); + } +} + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *otg_master; + struct timing_generator *tg; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + otg_master = &context->res_ctx.pipe_ctx[i]; + if (!resource_is_pipe_type(otg_master, OTG_MASTER) || + dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) + continue; + tg = otg_master->stream_res.tg; + if (tg->funcs->wait_odm_doublebuffer_pending_clear) + tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + } + + /* ODM update may require to reprogram blank pattern for each OPP */ + hwss_wait_for_all_blank_complete(dc, context); +} + +void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < MAX_PIPES; i++) { + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + + /* Timeout 100 ms */ + while (count < 100000) { + /* Must set to false to start with, due to OR in update function */ + pipe->plane_state->status.is_flip_pending = false; + dc->hwss.update_pending_status(pipe); + if (!pipe->plane_state->status.is_flip_pending) + break; + udelay(1); + count++; + } + ASSERT(!pipe->plane_state->status.is_flip_pending); + } +} + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ +/* + * This function calls HWSS to wait for any potentially double buffered + * operations to complete. It should be invoked as a pre-amble prior + * to full update programming before asserting any HW locks. + */ + int pipe_idx; + int opp_inst; + int opp_count = dc->res_pool->res_cap->num_opp; + struct hubp *hubp; + int mpcc_inst; + const struct pipe_ctx *pipe_ctx; + + for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { + pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; + + if (!pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) + pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); + + hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + + mpcc_inst = hubp->inst; + // MPCC inst is equal to pipe index in practice + for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { + if ((dc->res_pool->opps[opp_inst] != NULL) && + (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { + dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); + dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; + break; + } + } + } + hwss_wait_for_odm_update_pending_complete(dc, dc_context); +} + +void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ + /* wait for outstanding updates */ + hwss_wait_for_outstanding_hw_updates(dc, dc_context); + + /* perform outstanding post update programming */ + if (dc->hwss.program_outstanding_updates) + dc->hwss.program_outstanding_updates(dc, dc_context); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index d75a811c90d9..6a40b2e2beb5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1846,3 +1846,13 @@ void dcn32_interdependent_update_lock(struct dc *dc, dc->hwss.pipe_control_lock(dc, pipe, false); } } + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_size) + hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index b1563e2c0491..cac4a08b92a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -133,4 +133,8 @@ void dcn32_prepare_bandwidth(struct dc *dc, void dcn32_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 58bed01fc20e..3422b564ae98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -120,6 +120,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .blank_phantom = dcn32_blank_phantom, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 55dc5799e725..2bbf1fef94fd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -123,6 +123,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn35_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index b1b2a58684e7..5da3069fc1ab 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -122,6 +122,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn351_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index ac0a21ac318f..0b743669f23b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1399,7 +1399,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, { struct hubbub *hubbub = dc->res_pool->hubbub; bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; - unsigned int compbuf_size_kb = 0; + unsigned int compbuf_size = 0; /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ if (p_state_change_support) { @@ -1429,10 +1429,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { - compbuf_size_kb = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; + dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); - hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size_kb, false); + hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } if (dc->debug.fams2_config.bits.enable) { @@ -1760,3 +1760,13 @@ void dcn401_interdependent_update_lock(struct dc *dc, } } } + +void dcn401_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_segments) + hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 3ecb1ebffcee..a27e62081685 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -83,4 +83,5 @@ void dcn401_update_odm(struct dc *dc, struct dc_state *context, void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); +void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index b5f63675afcb..a2ca07235c83 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -99,6 +99,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, + .program_outstanding_updates = dcn401_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index f50b2955ce8c..326854489802 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -459,6 +459,8 @@ struct hw_sequencer_funcs { bool enable); void (*fams2_global_control_lock_fast)(union block_sequence_params *params); void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); + void (*program_outstanding_updates)(struct dc *dc, + struct dc_state *context); }; void color_space_to_black_color( @@ -519,6 +521,21 @@ void hwss_build_fast_sequence(struct dc *dc, struct dc_stream_status *stream_status, struct dc_state *context); +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_no_pipes_pending(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + +void hwss_process_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + void hwss_send_dmcub_cmd(union block_sequence_params *params); void hwss_program_manual_trigger(union block_sequence_params *params); From 24f483ba49c9acc1139a0b4cbfd1b122fbe1ed7f Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 2 Aug 2024 13:50:10 -0400 Subject: [PATCH 0903/1523] drm/amd/display: Set max VTotal cap for dcn401 [WHY&HOW] Set max VTotal cap for dcn401 because VTotal register is only 16 bits wide on dcn401. Reviewed-by: Chris Park Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index ec676d269d33..02e63b95c36d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1822,6 +1822,7 @@ static bool dcn401_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; From 5b7813bc6218100d7bbc6d3a1b582bfd64034bf4 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 1 Aug 2024 18:18:20 -0400 Subject: [PATCH 0904/1523] drm/amd/display: remove redundant msg to pmfw at boot/resume [why & how] this is to remove redundant msg to pmfw at boot/resume since bios already power up dcn. Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e2f5c4d34a55..217344ccf644 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1845,6 +1845,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + struct dc_bios *dcb = dc->ctx->dc_bios; DC_LOGGER_INIT(); @@ -1921,13 +1922,15 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) hws->funcs.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1); } From d6ed53712f583423db61fbb802606759e023bf7b Mon Sep 17 00:00:00 2001 From: Loan Chen Date: Fri, 2 Aug 2024 13:57:40 +0800 Subject: [PATCH 0905/1523] drm/amd/display: Enable otg synchronization logic for DCN321 [Why] Tiled display cannot synchronize properly after S3. The fix for commit 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") is not enable in DCN321, which causes the otg is excluded from synchronization. [How] Enable otg synchronization logic in dcn321. Fixes: 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Alvin Lee Signed-off-by: Loan Chen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index a414ed60a724..827a94f84f10 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; dc->config.disable_hbr_audio_dp2 = true; From 12dbb3ed212fc7655fce421542a5add637f8af7a Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 29 Jul 2024 10:23:03 -0400 Subject: [PATCH 0906/1523] drm/amd/display: Fix MST BW calculation Regression [Why & How] Revert commit 8b2cb32cf0c6 ("drm/amd/display: FEC overhead should be checked once for mst slot nums") Because causes bw calculation regression Cc: mario.limonciello@amd.com Cc: alexander.deucher@amd.com Reported-by: jirislaby@kernel.org Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3495 Closes: https://bugzilla.suse.com/show_bug.cgi?id=1228093 Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 33 ++++++++++++++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 3 ++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 915eb2c08ece..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, From 5ac2557d75f94777076885b5119d3b6755e6761d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 2 Aug 2024 12:31:42 -0600 Subject: [PATCH 0907/1523] drm/amd/display: Remove unused field [why & how] Remove force_backlight_start_level since it is never used. Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7873daf72608..91a351f8711e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -306,8 +306,6 @@ struct dc_bug_wa { uint8_t dcfclk_ds: 1; } clock_update_disable_mask; bool skip_psr_ips_crtc_disable; - //Customer Specific WAs - uint32_t force_backlight_start_level; }; struct dc_dcc_surface_param { struct dc_size surface_size; From 17b6527dcfb3249401e037734ed3fd0f4752572f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 29 Jul 2024 11:59:25 -0600 Subject: [PATCH 0908/1523] drm/amd/display: Improve FAM control for DCN401 [why & how] When the commit 5324e2b205a2 ("drm/amd/display: Add driver support for future FAMS versions") was introduced, it missed some of the FAM2 code. This commit introduces the code that control the FAM enable and disable. Fixes: 5324e2b205a2 ("drm/amd/display: Add driver support for future FAMS versions") Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6a40b2e2beb5..a36e11606f90 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -985,8 +985,19 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - if (dc->ctx->dmub_srv->dmub->fw_version < + /* for DCN401 testing only */ + dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + if (dc->caps.dmub_caps.fams_ver == 2) { + /* FAMS2 is enabled */ + dc->debug.fams2_config.bits.enable &= true; + } else if (dc->ctx->dmub_srv->dmub->fw_version < DMUB_FW_VERSION(7, 0, 35)) { + /* FAMS2 is disabled */ + dc->debug.fams2_config.bits.enable = false; + if (dc->debug.using_dml2 && dc->res_pool->funcs->update_bw_bounding_box) { + /* update bounding box if FAMS2 disabled */ + dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + } dc->debug.force_disable_subvp = true; dc->debug.disable_fpo_optimizations = true; } From 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:29:40 -0700 Subject: [PATCH 0909/1523] KVM: x86: Make x2APIC ID 100% readonly Ignore the userspace provided x2APIC ID when fixing up APIC state for KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM. Commit a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID register"), which added the fixup, didn't intend to allow userspace to modify the x2APIC ID. In fact, that commit is when KVM first started treating the x2APIC ID as readonly, apparently to fix some race: static inline u32 kvm_apic_id(struct kvm_lapic *apic) { - return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff; + /* To avoid a race between apic_base and following APIC_ID update when + * switching to x2apic_mode, the x2apic mode returns initial x2apic id. + */ + if (apic_x2apic_mode(apic)) + return apic->vcpu->vcpu_id; + + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; } Furthermore, KVM doesn't support delivering interrupts to vCPUs with a modified x2APIC ID, but KVM *does* return the modified value on a guest RDMSR and for KVM_GET_LAPIC. I.e. no remotely sane setup can actually work with a modified x2APIC ID. Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map calculation, which expects the LDR to align with the x2APIC ID. WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm] CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014 RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm] Call Trace: kvm_apic_set_state+0x1cf/0x5b0 [kvm] kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm] kvm_vcpu_ioctl+0x663/0x8a0 [kvm] __x64_sys_ioctl+0xb8/0xf0 do_syscall_64+0x56/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fade8b9dd6f Unfortunately, the WARN can still trigger for other CPUs than the current one by racing against KVM_SET_LAPIC, so remove it completely. Reported-by: Michal Luczaj Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co Reported-by: Haoyu Wu Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com Reported-by: syzbot+545f1326f405db4e1c3e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com Signed-off-by: Sean Christopherson Message-ID: <20240802202941.344889-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4915acdbfcd8..5bb481aefcbc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* @@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, * split to ICR+ICR2 in userspace for backwards compatibility. */ if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; From 238d3d63d1e27c8d9733b48f7b682fc6aba86672 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 2 Aug 2024 13:29:41 -0700 Subject: [PATCH 0910/1523] KVM: selftests: Add a testcase to verify x2APIC is fully readonly Add a test to verify that userspace can't change a vCPU's x2APIC ID by abusing KVM_SET_LAPIC. KVM models the x2APIC ID (and x2APIC LDR) as readonly, and silently ignores userspace attempts to change the x2APIC ID for backwards compatibility. Signed-off-by: Michal Luczaj [sean: write changelog, add to existing test] Signed-off-by: Sean Christopherson Message-ID: <20240802202941.344889-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/xapic_state_test.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 69849acd95b0..618cd2442390 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -184,6 +184,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -211,4 +238,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } From c9b35a6f4edea698a5bb4dd8029e7104ee0a3726 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 11 Jul 2024 20:11:30 +0800 Subject: [PATCH 0911/1523] KVM: eventfd: Use synchronize_srcu_expedited() on shutdown When hot-unplug a device which has many queues, and guest CPU will has huge jitter, and unplugging is very slow. It turns out synchronize_srcu() in irqfd_shutdown() caused the guest jitter and unplugging latency, so replace synchronize_srcu() with synchronize_srcu_expedited(), to accelerate the unplugging, and reduce the guest OS jitter, this accelerates the VM reboot too. Signed-off-by: Li RongQing Message-ID: <20240711121130.38917-1-lirongqing@baidu.com> [Call it just once in irqfd_resampler_shutdown. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 229570059a1b..992f9beb3e7d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -97,18 +97,19 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); /* - * synchronize_srcu(&kvm->irq_srcu) already called + * synchronize_srcu_expedited(&kvm->irq_srcu) already called * in kvm_unregister_irq_ack_notifier(). */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); + } else { + synchronize_srcu_expedited(&kvm->irq_srcu); } mutex_unlock(&kvm->irqfds.resampler_lock); @@ -126,7 +127,7 @@ irqfd_shutdown(struct work_struct *work) u64 cnt; /* Make sure irqfd has been initialized in assign path. */ - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); /* * Synchronize with the wait-queue and unhook ourselves to prevent @@ -384,7 +385,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -523,7 +524,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); kvm_arch_post_irq_ack_notifier_list_update(kvm); } @@ -608,7 +609,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Take note of a change in irq routing. - * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. + * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm) { From 1fd2cf090096af8a25bf85564341cfc21cec659d Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 31 Jan 2023 15:05:46 -0100 Subject: [PATCH 0912/1523] drm/amd/display: fix cursor offset on rotation 180 [why & how] Cursor gets clipped off in the middle of the screen with hw rotation 180. Fix a miscalculation of cursor offset when it's placed near the edges in the pipe split case. Cursor bugs with hw rotation were reported on AMD issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 The issues on rotation 270 was fixed by: https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-Brian.Chang@amd.com/ that partially addressed the rotation 180 too. So, this patch is the final bits for rotation 180. Reported-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 Reviewed-by: Harry Wentland Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index a7b5b25e3f34..802902f54d09 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3594,7 +3594,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { From 8f9b23abbae5ffcd64856facd26a86b67195bc2f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Aug 2024 16:16:35 -0600 Subject: [PATCH 0913/1523] drm/amd/display: Adjust cursor position [why & how] When the commit 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") was introduced, it used the wrong calculation for the position copy for X. This commit uses the correct calculation for that based on the original patch. Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Cc: Mario Limonciello Cc: Alex Deucher Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 802902f54d09..01dffed4d30b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3687,7 +3687,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { From 267c5876c977d5f2d5a89f377d74adf42b5c38a5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 2 Aug 2024 12:33:20 -0600 Subject: [PATCH 0914/1523] drm/amd/display: Remove unnecessary call to REG_SEQ_SUBMIT|WAIT_DONE [why & how] Remove unnecessary call to REG_SEQ_SUBMIT and REG_SEQ_WAIT_DONE, since those macros are not necessary anymore at the dpp1 set degamma. Those are part of an old implementation. Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c index f2a2d53e9689..f8f6019d8304 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c @@ -684,9 +684,6 @@ void dpp1_set_degamma( BREAK_TO_DEBUGGER(); break; } - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); } void dpp1_degamma_ram_select( From bbec7cea6fa4a0463d4766ed0e6bb347773d3949 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Mon, 5 Aug 2024 11:00:14 -0400 Subject: [PATCH 0915/1523] drm/amd/display: Promote DAL to 3.2.296 This version brings along following fixes: - Fix some cursor issue - Fix print format specifiers in DC_LOG_IPS - Fix minor coding errors in dml21 phase 5 - Fix MST BW calculation Regression - Improve FAM control for DCN401 - Add null pointer checks for some code - Refactor 3DLUT for non-DMA - Optimize vstartup position for AS-SDP - Update to using new dccg callbacks - Enable otg synchronization logic for DCN321 - Disable DCN401 UCLK P-State support on full updates Acked-by: Wayne Lin Signed-off-by: Martin Leung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 91a351f8711e..6b036417a73a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.295" +#define DC_VER "3.2.296" #define MAX_SURFACES 3 #define MAX_PLANES 6 From ef6c2cb349c708676b7820c36a5beb75868ad544 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Wed, 7 Aug 2024 17:32:27 +0800 Subject: [PATCH 0916/1523] drm/amd/sriov: extend NV_MAILBOX_POLL_MSG_TIMEDOUT on MI300/MI308 UBB products, when doing mode1 reset, since 1 gpu need to wait all 8 gpus finish mode1 reset and then do re-init. As observed, sometimes the gpu which triggered the reset need to wait 15s for all gpus to finish. If poll msg timeout, guest driver will send the reset message again, and may mess up the following reinit sequence on other gpus. So extend the time to cover the maximum time needed to recover. Signed-off-by: Victor Zhao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index caf616a2c8a6..1d099ffb3a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -25,7 +25,7 @@ #define __MXGPU_NV_H__ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 -#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000 #define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define NV_MAILBOX_POLL_MSG_REP_MAX 11 From 61cffacb3a1c590b15c0e9ff987de02d293e0dd8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 8 Aug 2024 12:19:22 +0800 Subject: [PATCH 0917/1523] drm/amd/amdgpu: add HDP_SD support on gc 12.0.0/1 add HDP_SD support on gc 12.0.0/1 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc24.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | From 42b3a6f12aa56dcf789464d29fdf11f33bf0e793 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 7 Aug 2024 21:41:59 +0530 Subject: [PATCH 0918/1523] drm/amdkfd: Add node_id to location_id generically If there are multiple nodes per kfd device, add nodeid to location_id to differentiate. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 27d452e50ca9..3871591c9aec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2078,7 +2078,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + if (gpu->kfd->num_nodes > 1) dev->node_props.location_id |= dev->gpu->node_id; dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); From aa02486fb18cecbaca0c4fd393d1a03f1d4c3f9a Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Fri, 9 Aug 2024 17:20:26 -0400 Subject: [PATCH 0919/1523] drm/amdgpu: Update kmd_fw_shared for VCN5 kmd_fw_shared changed in VCN5 Signed-off-by: Yinjie Yao Reviewed-by: Ruijing Dong Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 From 35c7152202e111968b10140383f49da9159d2704 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 25 Jul 2024 09:51:56 -0400 Subject: [PATCH 0920/1523] Revert "drm/amdgpu: Extend KIQ reg polling wait for VF" KIQ timeouts no longer seen. This reverts commit 3a19a8af64eaff8a8b230796741a1a8277205344. Signed-off-by: Victor Skvortsov Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c54ddd3e68aa..f3980b40f2ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -347,9 +347,9 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ -#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 int amdgpu_device_ip_set_clockgating_state(void *dev, From f83cec3b3a7c968bbceb810b7acd1baf3fe8cd87 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 8 Aug 2024 13:22:34 -0400 Subject: [PATCH 0921/1523] drm/amdgpu: Disable dpm_enabled flag while VF is in reset VFs do not perform HW fini/suspend in FLR, so the dpm_enabled is incorrectly kept enabled. Add interface to disable it in virt_pre_reset call. v2: Made implementation generic for all asics v3: Re-order conditionals so PP_MP1_STATE_FLR is only evaluated on VF Signed-off-by: Victor Skvortsov Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 6 +++++- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 29a4adee9286..a6b8d0ba4758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (reset_context->reset_req_dev == adev) job = reset_context->job; - if (amdgpu_sriov_vf(adev)) { - /* stop the data exchange thread */ - amdgpu_virt_fini_data_exchange(adev); - } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_pre_reset(adev); amdgpu_fence_driver_isr_toggle(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b287a82e6177..b6397d3229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -33,6 +33,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "amdgpu_dpm.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +void amdgpu_virt_pre_reset(struct amdgpu_device *adev) +{ + /* stop the data exchange thread */ + amdgpu_virt_fini_data_exchange(adev); + amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR); +} + void amdgpu_virt_post_reset(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b42a8854dca0..b650a2032c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); +void amdgpu_virt_pre_reset(struct amdgpu_device *adev); void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 4b20e2274313..19a48d98830a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -218,6 +218,7 @@ enum pp_mp1_state { PP_MP1_STATE_SHUTDOWN, PP_MP1_STATE_UNLOAD, PP_MP1_STATE_RESET, + PP_MP1_STATE_FLR, }; enum pp_df_cstate { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 8b7d6ed7e2ed..9dc82f4d7c93 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -168,7 +168,11 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (pp_funcs && pp_funcs->set_mp1_state) { + if (mp1_state == PP_MP1_STATE_FLR) { + /* VF lost access to SMU */ + if (amdgpu_sriov_vf(adev)) + adev->pm.dpm_enabled = false; + } else if (pp_funcs && pp_funcs->set_mp1_state) { mutex_lock(&adev->pm.mutex); ret = pp_funcs->set_mp1_state( From e69c2dd7534f3fcabf7bb801db2a7ac71e7e5da6 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 13:19:59 +0800 Subject: [PATCH 0922/1523] drm/amdgpu/mes12: load unified mes fw on pipe0 and pipe1 Enable unified mes firmware to load on pipe0 and pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 27 +++---------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index be2156bf0252..8f6feb887a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1525,7 +1525,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index ac6209a0029c..1b6c8d9b6891 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -777,16 +777,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -800,8 +795,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -817,8 +811,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -833,10 +826,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1558,17 +1547,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; From 9c081c11c62112f1c30ff2426f755279a43fa1a1 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 12 Aug 2024 09:02:57 +0530 Subject: [PATCH 0923/1523] drm/amdgpu: Reorder to read EFI exported ROM first On EFI BIOSes, PCI ROM may be exported through EFI_PCI_IO_PROTOCOL and expansion ROM BARs may not be enabled. Choose to read from EFI exported ROM data before reading PCI Expansion ROM BAR. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 618e469e3622..42e64bce661e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -431,6 +431,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + if (amdgpu_read_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); goto success; @@ -446,11 +451,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } - dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); return false; From a85c3db6b3b088f63b5b8c4fd4352f56f0e4ce3d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 30 Jul 2024 12:52:20 -0400 Subject: [PATCH 0924/1523] drm/amdkfd: fallback to pipe reset on queue reset fail for gfx9 If queue reset fails, tell the CP to reset the pipe. Since queues multiplex context per pipe and we've issued a device wide preemption prior to the hang, we can assume the hung pipe only has one queue to reset on pipe reset. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 32f28c12077b..c63528a4e894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1173,12 +1173,30 @@ unlock_out: return queue_addr; } +/* assume queue acquired */ +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst, + unsigned int utimeout) +{ + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + while (true) { + uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + return 0; + + if (time_after(jiffies, end_jiffies)) + return -ETIME; + + usleep_range(500, 1000); + } +} + uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t inst, unsigned int utimeout) { - uint32_t low, high, temp; - unsigned long end_jiffies; + uint32_t low, high, pipe_reset_data = 0; uint64_t queue_addr = 0; kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); @@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, /* assume previous dequeue request issued will take affect after reset */ WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); - end_jiffies = (utimeout * HZ / 1000) + jiffies; - while (true) { - temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + goto unlock_out; - if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; + pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id); - if (time_after(jiffies, end_jiffies)) { - queue_addr = 0; - break; - } + pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1); + pipe_reset_data = pipe_reset_data << pipe_id; - usleep_range(500, 1000); - } + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0); - pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", - inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + queue_addr = 0; unlock_out: + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); amdgpu_gfx_rlc_exit_safe_mode(adev, inst); kgd_gfx_v9_release_queue(adev, inst); From 9e823f307074c0f82b5f6044943b0086e3079bed Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 8 Aug 2024 13:40:23 -0400 Subject: [PATCH 0925/1523] drm/amdgpu: Block MMR_READ IOCTL in reset Register access from userspace should be blocked until reset is complete. Signed-off-by: Victor Skvortsov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 45 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 66782be5917b..96af9ff1acb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,6 +43,7 @@ #include "amdgpu_gem.h" #include "amdgpu_display.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amd_pcie.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) @@ -778,6 +779,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { + int ret = 0; unsigned int n, alloc_size; uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> @@ -787,24 +789,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; + if (!down_read_trylock(&adev->reset_domain->sem)) + return -ENOENT; + /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { se_num = 0xffffffff; - else if (se_num >= AMDGPU_GFX_MAX_SE) - return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) - sh_num = 0xffffffff; - else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) - return -EINVAL; + } else if (se_num >= AMDGPU_GFX_MAX_SE) { + ret = -EINVAL; + goto out; + } - if (info->read_mmr_reg.count > 128) - return -EINVAL; + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + sh_num = 0xffffffff; + } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { + ret = -EINVAL; + goto out; + } + + if (info->read_mmr_reg.count > 128) { + ret = -EINVAL; + goto out; + } regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) - return -ENOMEM; + if (!regs) { + ret = -ENOMEM; + goto out; + } + alloc_size = info->read_mmr_reg.count * sizeof(*regs); amdgpu_gfx_off_ctrl(adev, false); @@ -816,13 +831,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) info->read_mmr_reg.dword_offset + i); kfree(regs); amdgpu_gfx_off_ctrl(adev, true); - return -EFAULT; + ret = -EFAULT; + goto out; } } amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); - return n ? -EFAULT : 0; + ret = (n ? -EFAULT : 0); +out: + up_read(&adev->reset_domain->sem); + return ret; } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; From b2dee0837a4be63e8d3e00550a9f057644f962c4 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:15:48 +0800 Subject: [PATCH 0926/1523] drm/amdgpu/mes12: add mes pipe switch support Add mes pipe switch to let caller choose pipe to submit packet. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 60 +++++++++++++++----------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 1b6c8d9b6891..449f7cd8f490 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,13 +142,14 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring[0]; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +178,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock[0], flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,32 +208,33 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -249,7 +251,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -321,6 +323,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -340,6 +343,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -365,6 +369,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.map_legacy_kq = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -398,6 +403,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -414,7 +420,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -424,7 +430,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -486,11 +492,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -501,12 +508,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -566,7 +573,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -674,6 +681,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), offsetof(union MESAPI__RESET, api_status)); } @@ -1479,19 +1487,19 @@ static int mes_v12_0_hw_init(void *handle) if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); /* Enable the MES to handle doorbell ring on unmapped queue */ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; From aa539da8aff07ab08def6490e8c9b441439e70ba Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:44:07 +0800 Subject: [PATCH 0927/1523] drm/amdgpu/mes12: adjust mes12 sw/hw init for multiple pipes Adjust mes12 sw/hw initiailization for both pipe0 and pipe1 enablement. The two pipes are almost identical pipe. Pipe0 behaves like schq and pipe1 like kiq, pipe0 was mapped by pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 115 +++++++++++++++---------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 449f7cd8f490..6e50a6233db5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -266,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -352,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -368,8 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -378,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -402,8 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -439,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -491,8 +504,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } @@ -657,6 +674,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, struct mes_reset_legacy_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; + int pipe; memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); @@ -680,8 +698,12 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), offsetof(union MESAPI__RESET, api_status)); } @@ -1141,14 +1163,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1160,13 +1180,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1186,25 +1205,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring[0]; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1218,7 +1241,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1240,12 +1263,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1286,9 +1307,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1296,18 +1314,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1402,10 +1417,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1432,6 +1447,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1463,7 +1486,7 @@ static int mes_v12_0_hw_init(void *handle) if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1483,6 +1506,9 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; @@ -1496,9 +1522,6 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); From ea5d6db17a8e3635ad91e8c53faa1fdc9570fbbb Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:49:30 +0800 Subject: [PATCH 0928/1523] drm/amdgpu/mes12: configure two pipes hardware resources Configure two pipes with different hardware resources. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 77 +++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 12 ++-- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 7 +-- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 49 ++++++++-------- 4 files changed, 81 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 8f6feb887a56..c598c3edff7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -165,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -214,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -228,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index d87d068952e0..548e724e3a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -112,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 4c7899e527fe..c0340ee3dec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -522,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1243,9 +1243,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 6e50a6233db5..f5d681f69d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -542,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } + + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; - - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -1331,9 +1337,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); From 98cae695a8ae0e4291b1fa7feef9b54fabefe885 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 15:23:16 +0800 Subject: [PATCH 0929/1523] drm/amdgpu/mes12: sw/hw fini for unified mes Free memory for two pipes and unmap pipe0 via pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index f5d681f69d06..35cd6ad73912 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1344,18 +1344,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); + + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, - &adev->mes.ring[0].mqd_gpu_addr, - &adev->mes.ring[0].mqd_ptr); - - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring[0]); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1472,7 +1475,13 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { if (adev->mes.ring[0].sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + adev->mes.ring[0].sched.ready = false; } From f7fb9d677faf0460131bc2af15afd766d48a1f47 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 12:03:11 +0800 Subject: [PATCH 0930/1523] drm/amdgpu/mes12: fix suspend issue Use mes pipe to unmap kcq and kgq. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +------------------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 28bd2098a65e..9be8cafdcecc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index df72fa125fd2..f14e27f86e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3595,33 +3595,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3630,8 +3606,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } From a7f670d5d8e77b092404ca8a35bb0f8f89ed3117 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Thu, 8 Aug 2024 12:19:50 -0400 Subject: [PATCH 0931/1523] drm/amd/amdgpu: command submission parser for JPEG Add JPEG IB command parser to ensure registers in the command are within the JPEG IP block. Reviewed-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 61 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 7 ++- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15d.h | 6 +++ 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1e167d925b64..78b3c067fea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1061,6 +1061,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index f4662920c653..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,7 +783,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 From 6a28a072d90e4543b5e07a904e3a6afad0117e26 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Fri, 2 Aug 2024 14:29:41 -0400 Subject: [PATCH 0932/1523] drm/amd/amdgpu: cleanup parse_cs callbacks Because gpu_addr is updated in the calling routine (amdgpu_cs_patch_ibs()),it is removed in the callback. Use .patch_cs_in_place instead of .parse_cs for amdgpu_vce_ring_parse_cs_vm() as there is no need for keeping a temporary IB, therefore ib->sa_bo is NULL and amdgpu_ib_free() is removed. Reviewed-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 -- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 775c09d57222..31fd30dcd593 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1088,7 +1088,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, int r; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 51b045de409d..74fdbf71d95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -749,7 +749,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, int i, r = 0; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { uint32_t len = amdgpu_ib_get_value(ib, idx); @@ -1044,7 +1043,6 @@ out: if (!r) { /* No error, free all destroyed handle slots */ tmp = destroyed; - amdgpu_ib_free(p->adev, ib, NULL); } else { /* Error during parsing, free all allocated handle slots */ tmp = allocated; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 32517c364cf7..4bfba2931b08 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -950,7 +950,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 06d787385ad4..0748bf44c880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1102,7 +1102,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + From 1091796fb1d9d6888656f2416ad5c99cfc62a4bf Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 12:10:18 +0530 Subject: [PATCH 0933/1523] drm/amdgpu: add gfx9_4_3 register support in ipdump Add general registers of gfx9_4_3 in ipdump for devcoredump support. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 133 +++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 8455fda750a6..3bd84acba643 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -63,6 +63,94 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define NORMALIZE_XCC_REG_OFFSET(offset) \ (offset & 0xFFFF) +static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -885,6 +973,22 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + uint32_t *ptr, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } +} + static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; @@ -986,6 +1090,8 @@ static int gfx_v9_4_3_sw_init(void *handle) if (!amdgpu_sriov_vf(adev)) r = amdgpu_gfx_sysfs_init(adev); + gfx_v9_4_3_alloc_ip_dump(adev); + return r; } @@ -1010,6 +1116,8 @@ static int gfx_v9_4_3_sw_fini(void *handle) if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + kfree(adev->gfx.ip_dump_core); + return 0; } @@ -4196,6 +4304,29 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no amdgpu_ring_write(ring, ring->funcs->nop); } +static void gfx_v9_4_3_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + amdgpu_gfx_off_ctrl(adev, false); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[xcc_offset + i] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], + GET_INST(GC, xcc_id))); + } + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4212,7 +4343,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, - .dump_ip_state = NULL, + .dump_ip_state = gfx_v9_4_3_ip_dump, .print_ip_state = NULL, }; From b232c4a63a176ed837e3c6bb4a3ac79a1ca5ef1d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 12:28:59 +0530 Subject: [PATCH 0934/1523] drm/amdgpu: add print support for gfx9_4_3 ipdump Add support of gfx9_4_3 ipdump print so devcoredump could trigger it to dump the captured registers in devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 3bd84acba643..59417feac9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4304,6 +4304,28 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no amdgpu_ring_write(ring, ring->funcs->nop); } +static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + drm_printf(p, "Number of Instances:%d\n", num_xcc); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + drm_printf(p, "\nInstance id:%d\n", xcc_id); + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_9_4_3[i].reg_name, + adev->gfx.ip_dump_core[xcc_offset + i]); + } +} + static void gfx_v9_4_3_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4344,7 +4366,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, .dump_ip_state = gfx_v9_4_3_ip_dump, - .print_ip_state = NULL, + .print_ip_state = gfx_v9_4_3_ip_print, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { From 98aded657f506cd5d6d459d68ab4996d9dc0938c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 12 Aug 2024 15:53:36 +0530 Subject: [PATCH 0935/1523] drm/amd/display: Align hwss_wait_for_all_blank_complete descriptor with implementation The descriptor for `hwss_wait_for_all_blank_complete` was previously misaligned with the actual implementation. This commit refines the descriptor to reflect the implementation of `hwss_wait_for_all_blank_complete` Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_hw_sequencer.c:991: warning: expecting prototype for hwss_wait_for_blank_complete(). Prototype was for hwss_wait_for_all_blank_complete() instead Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 9a569aac3c00..7ee2be8f82c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -980,7 +980,7 @@ void get_surface_tile_visual_confirm_color( } /** - * hwss_wait_for_blank_complete - wait for all active OPPs to finish pending blank + * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank * pattern updates * * @dc: [in] dc reference From 11752c013f562a1124088a35bd314aa0e9f0e88f Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Thu, 18 Jul 2024 16:38:50 +0800 Subject: [PATCH 0936/1523] drm/amdgpu/mes: fix mes ring buffer overflow wait memory room until enough before writing mes packets to avoid ring buffer overflow. v2: squash in sched_hw_submission fix Fixes: de3246254156 ("drm/amdgpu: cleanup MES11 command submission") Fixes: fffe347e1478 ("drm/amdgpu: cleanup MES12 command submission") Signed-off-by: Jack Xiao Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 34e087e8920e635c62e2ed6a758b0cd27f836d13) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18 ++++++++++++++---- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 18 ++++++++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..e6344a6b0a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..1a5ad5be33bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -168,7 +168,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -196,6 +196,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,8 +215,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,6 +258,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..249e5a66205c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -154,7 +154,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -182,6 +182,13 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,8 +201,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); @@ -215,7 +221,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -238,6 +244,10 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: spin_unlock_irqrestore(&mes->ring_lock, flags); From f6098641d3e1e4d4052ff9378857c831f9675f6b Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 6 Aug 2024 09:55:55 -0400 Subject: [PATCH 0937/1523] drm/amd/display: fix s2idle entry for DCN3.5+ To be able to get to the lowest power state when suspending systems with DCN3.5+, we must be in IPS before the display hardware is put into D3cold. So, to ensure that the system always reaches the lowest power state while suspending, force systems that support IPS to enter idle optimizations before entering D3cold. Reviewed-by: Roman Li Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher (cherry picked from commit 237193e21b29d4aa0617ffeea3d6f49e72999708) Cc: stable@vger.kernel.org # 6.10+ --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..983a977632ff 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); From 0dbb81d44108a2a1004e5b485ef3fca5bc078424 Mon Sep 17 00:00:00 2001 From: Loan Chen Date: Fri, 2 Aug 2024 13:57:40 +0800 Subject: [PATCH 0938/1523] drm/amd/display: Enable otg synchronization logic for DCN321 [Why] Tiled display cannot synchronize properly after S3. The fix for commit 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") is not enable in DCN321, which causes the otg is excluded from synchronization. [How] Enable otg synchronization logic in dcn321. Fixes: 5f0c74915815 ("drm/amd/display: Fix for otg synchronization logic") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Alvin Lee Signed-off-by: Loan Chen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d6ed53712f583423db61fbb802606759e023bf7b) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..8e0588b1cf30 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ From 338567d17627064dba63cf063459605e782f71d2 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 29 Jul 2024 10:23:03 -0400 Subject: [PATCH 0939/1523] drm/amd/display: Fix MST BW calculation Regression [Why & How] Revert commit 8b2cb32cf0c6 ("drm/amd/display: FEC overhead should be checked once for mst slot nums") Because causes bw calculation regression Cc: mario.limonciello@amd.com Cc: alexander.deucher@amd.com Reported-by: jirislaby@kernel.org Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3495 Closes: https://bugzilla.suse.com/show_bug.cgi?id=1228093 Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 12dbb3ed212fc7655fce421542a5add637f8af7a) Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 33 ++++++++++++++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 3 ++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 915eb2c08ece..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, From 737222cebecbdbcdde2b69475c52bcb9ecfeb830 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 31 Jan 2023 15:05:46 -0100 Subject: [PATCH 0940/1523] drm/amd/display: fix cursor offset on rotation 180 [why & how] Cursor gets clipped off in the middle of the screen with hw rotation 180. Fix a miscalculation of cursor offset when it's placed near the edges in the pipe split case. Cursor bugs with hw rotation were reported on AMD issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 The issues on rotation 270 was fixed by: https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-Brian.Chang@amd.com/ that partially addressed the rotation 180 too. So, this patch is the final bits for rotation 180. Reported-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 Reviewed-by: Harry Wentland Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1fd2cf090096af8a25bf85564341cfc21cec659d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index ff03b1d98aa7..1b9ac8812f5b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3589,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { From 56fb276d0244d430496f249335a44ae114dd5f54 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Aug 2024 16:16:35 -0600 Subject: [PATCH 0941/1523] drm/amd/display: Adjust cursor position [why & how] When the commit 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") was introduced, it used the wrong calculation for the position copy for X. This commit uses the correct calculation for that based on the original patch. Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Cc: Mario Limonciello Cc: Alex Deucher Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 8f9b23abbae5ffcd64856facd26a86b67195bc2f) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 1b9ac8812f5b..14a902ff3b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3682,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { From e414a304f2c5368a84f03ad34d29b89f965a33c9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:00:33 -0400 Subject: [PATCH 0942/1523] drm/amdgpu/jpeg2: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 35c628774e50b3784c59e8ca7973f03bcb067132) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From e6c6bd6253e792cee6c5c065e106e87b9f0d9ae9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 10:06:05 -0400 Subject: [PATCH 0943/1523] drm/amdgpu/jpeg4: properly set atomics vmid field This needs to be set as well if the IB uses atomics. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit c6c2e8b6a427d4fecc7c36cffccb908185afcab2) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..f4662920c653 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -782,11 +782,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); From 0573a1e2ea7e35bff08944a40f1adf2bb35cea61 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 6 Aug 2024 22:27:32 +0200 Subject: [PATCH 0944/1523] drm/amdgpu: Actually check flags for all context ops. Missing validation ... Checked libdrm and it clears all the structs, so we should be safe to just check everything. Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher (cherry picked from commit c6b86421f1f9ddf9d706f2453159813ee39d0cf9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: From 278e1865b7a2124ea783b75ea8b3ee0bc2da5d85 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:43:45 +0800 Subject: [PATCH 0945/1523] drm/amdgpu/mes12: update mes_v12_api_def.h Update mes12 api definition. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 2ab5dc59177419d8a49e89585e82ff41524270fc) --- drivers/gpu/drm/amd/include/mes_v12_api_def.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; From 2029b3d7e1358bcca30f74978543ba35b4bbc43d Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 11:53:35 +0800 Subject: [PATCH 0946/1523] drm/amdgpu/mes: add multiple mes ring instances support Add multiple mes ring instances in mes structure to support multiple mes pipes. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit c7d4355648ffa02a1551495b05c71ea6c884d29c) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 34 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 34 ++++++++++++------------ 9 files changed, 47 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..f165b9d49e29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -995,7 +995,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1065,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..8ef53f48ce15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..f89e3f61fe46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -82,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 26efce9aa410..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1a5ad5be33bf..44bdfa0b263a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,7 +162,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -191,7 +191,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -221,7 +221,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -263,7 +263,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1025,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1039,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1081,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1134,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1226,12 +1226,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1342,9 +1342,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1362,7 +1362,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1407,7 +1407,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 249e5a66205c..515de65d8aa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -148,7 +148,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +177,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,7 +207,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); @@ -249,7 +249,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -1095,7 +1095,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1114,7 +1114,7 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1160,7 +1160,7 @@ static int mes_v12_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v12_0_ring_funcs; @@ -1213,7 +1213,7 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1302,12 +1302,12 @@ static int mes_v12_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1351,7 +1351,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1415,9 +1415,9 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1430,7 +1430,7 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq || adev->enable_uni_mes) { @@ -1482,7 +1482,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; From 1d734a3e5d6bb266f52eaf2b1400c5d3f1875a54 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 12 Aug 2024 11:10:43 -0700 Subject: [PATCH 0947/1523] drm/xe: Name and document Wa_14019789679 Early in the development of Xe we identified an issue with SVG state handling on DG2 and MTL (and later on Xe2 as well). In commit 72ac304769dd ("drm/xe: Emit SVG state on RCS during driver load on DG2 and MTL") and commit fb24b858a20d ("drm/xe/xe2: Update SVG state handling") we implemented our own workaround to prevent SVG state from leaking from context A to context B in cases where context B never issues a specific state setting. The hardware teams have now created official workaround Wa_14019789679 to cover this issue. The workaround description only requires emitting 3DSTATE_MESH_CONTROL, since they believe that's the only SVG instruction that would potentially remain unset by a context B, but still cause notable issues if unwanted values were inherited from context A. However since we already have a more extensive implementation that emits the entire SVG state and prevents _any_ SVG state from unintentionally leaking, we'll stick with our existing implementation just to be safe. Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240812181042.2013508-2-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 35 +++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 58121821f081..974a9cd8c379 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include + #include #include "instructions/xe_mi_commands.h" @@ -24,6 +26,7 @@ #include "xe_memirq.h" #include "xe_sriov.h" #include "xe_vm.h" +#include "xe_wa.h" #define LRC_VALID BIT_ULL(0) #define LRC_PRIVILEGE BIT_ULL(8) @@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b int state_table_size = 0; /* - * At the moment we only need to emit non-register state for the RCS - * engine. + * Wa_14019789679 + * + * If the driver doesn't explicitly emit the SVG instructions while + * setting up the default LRC, the context switch will write 0's + * (noops) into the LRC memory rather than the expected instruction + * headers. Application contexts start out as a copy of the default + * LRC, and if they also do not emit specific settings for some SVG + * state, then on context restore they'll unintentionally inherit + * whatever state setting the previous context had programmed into the + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will + * prevent the hardware from resetting that state back to any specific + * value). + * + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL + * since that's a specific state setting that can easily cause GPU + * hangs if unintentionally inherited. However to be safe we'll + * continue to emit all of the SVG state since it's best not to leak + * any of the state between contexts, even if that leakage is harmless. */ - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) - return; - - switch (GRAPHICS_VERx100(xe)) { - case 1255: - case 1270 ... 2004: + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - break; - default: + } + + if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5cf27ff27ce6..920ca5060146 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -35,3 +35,5 @@ GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) +14019789679 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 2004) From a13d91bf3c1910212e45a69d04ad40d99878f8da Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 13:19:59 +0800 Subject: [PATCH 0948/1523] drm/amdgpu/mes12: load unified mes fw on pipe0 and pipe1 Enable unified mes firmware to load on pipe0 and pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit e69c2dd7534f3fcabf7bb801db2a7ac71e7e5da6) --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 27 +++---------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 8ef53f48ce15..81bed8e8478d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1501,7 +1501,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 515de65d8aa0..28bb72d2cffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -744,16 +744,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -767,8 +762,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -784,8 +778,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -800,10 +793,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1525,17 +1514,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; From 3738a7f0ddb920bde538d3f78a02edbc6ad1307e Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:15:48 +0800 Subject: [PATCH 0949/1523] drm/amdgpu/mes12: add mes pipe switch support Add mes pipe switch to let caller choose pipe to submit packet. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit b2dee0837a4be63e8d3e00550a9f057644f962c4) --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 59 ++++++++++++++------------ 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 28bb72d2cffd..1213f35e2900 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,13 +142,14 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring[0]; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; @@ -177,7 +178,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock[0], flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; @@ -207,32 +208,33 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -249,7 +251,7 @@ error_undo: amdgpu_ring_undo(ring); error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock[0], flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -321,6 +323,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -340,6 +343,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -365,6 +369,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.map_legacy_kq = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -398,6 +403,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -414,7 +420,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -424,7 +430,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -486,11 +492,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, } return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -501,12 +508,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -566,7 +573,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -1446,19 +1453,19 @@ static int mes_v12_0_hw_init(void *handle) if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); /* Enable the MES to handle doorbell ring on unmapped queue */ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; From 1097727d6d0c13eca25321fff46714fc5047d6e8 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:44:07 +0800 Subject: [PATCH 0950/1523] drm/amdgpu/mes12: adjust mes12 sw/hw init for multiple pipes Adjust mes12 sw/hw initiailization for both pipe0 and pipe1 enablement. The two pipes are almost identical pipe. Pipe0 behaves like schq and pipe1 like kiq, pipe0 was mapped by pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit aa539da8aff07ab08def6490e8c9b441439e70ba) --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 106 +++++++++++++++---------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 1213f35e2900..d18ec5855193 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -266,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -352,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -368,8 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -378,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -402,8 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -439,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -491,8 +504,12 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, - AMDGPU_MES_SCHED_PIPE, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } @@ -1107,14 +1124,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1126,13 +1141,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1152,25 +1166,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring[0]; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1184,7 +1202,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1206,12 +1224,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring[0]; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1252,9 +1268,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1262,18 +1275,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1368,10 +1378,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1398,6 +1408,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1429,7 +1447,7 @@ static int mes_v12_0_hw_init(void *handle) if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1449,6 +1467,9 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; @@ -1462,9 +1483,6 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); From 7254027e1e6edbff54f5930a5f13f14ac6f1694c Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 14:49:30 +0800 Subject: [PATCH 0951/1523] drm/amdgpu/mes12: configure two pipes hardware resources Configure two pipes with different hardware resources. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit ea5d6db17a8e3635ad91e8c53faa1fdc9570fbbb) --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 77 +++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 12 ++-- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 7 +-- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 49 ++++++++-------- 4 files changed, 81 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 81bed8e8478d..1cb1ec7beefe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -165,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -214,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -228,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index f89e3f61fe46..0bc837dab578 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -112,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 44bdfa0b263a..2ea8223eb969 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -522,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1210,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index d18ec5855193..ed77d70441cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -542,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } + + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; - - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -1292,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); From af401543df510a73f7beb13f80cf4c541be94786 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 15:23:16 +0800 Subject: [PATCH 0952/1523] drm/amdgpu/mes12: sw/hw fini for unified mes Free memory for two pipes and unmap pipe0 via pipe1. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 98cae695a8ae0e4291b1fa7feef9b54fabefe885) --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index ed77d70441cf..e39a58d262c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1305,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); + + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, - &adev->mes.ring[0].mqd_gpu_addr, - &adev->mes.ring[0].mqd_ptr); - - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring[0]); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1433,7 +1436,13 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { if (adev->mes.ring[0].sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + adev->mes.ring[0].sched.ready = false; } From 4246b1077ffcc37926868581bb818fdb49d0d065 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 7 Aug 2024 12:03:11 +0800 Subject: [PATCH 0953/1523] drm/amdgpu/mes12: fix suspend issue Use mes pipe to unmap kcq and kgq. Signed-off-by: Jack Xiao Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit f7fb9d677faf0460131bc2af15afd766d48a1f47) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +------------------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f165b9d49e29..c770cb201e64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 506fa8003388..2c611b8577a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3546,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3580,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } From 470516c2925493594a690bc4d05b1f4471d9f996 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Thu, 8 Aug 2024 12:19:50 -0400 Subject: [PATCH 0954/1523] drm/amd/amdgpu: command submission parser for JPEG Add JPEG IB command parser to ensure registers in the command are within the JPEG IP block. Reviewed-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit a7f670d5d8e77b092404ca8a35bb0f8f89ed3117) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 61 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 7 ++- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15d.h | 6 +++ 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9aa952f258cf..6dfdff58bffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index f4662920c653..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,7 +783,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 From 507a2286c052919fe416b3daa0f0061d0fc702b9 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Fri, 9 Aug 2024 17:20:26 -0400 Subject: [PATCH 0955/1523] drm/amdgpu: Update kmd_fw_shared for VCN5 kmd_fw_shared changed in VCN5 Signed-off-by: Yinjie Yao Reviewed-by: Ruijing Dong Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit aa02486fb18cecbaca0c4fd393d1a03f1d4c3f9a) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 From 23acd1f344e8102f803119d0c8fc4df4628d694f Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 8 Aug 2024 12:19:22 +0800 Subject: [PATCH 0956/1523] drm/amd/amdgpu: add HDP_SD support on gc 12.0.0/1 add HDP_SD support on gc 12.0.0/1 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 61cffacb3a1c590b15c0e9ff987de02d293e0dd8) --- drivers/gpu/drm/amd/amdgpu/soc24.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | From 100bff23818eb61751ed05d64a7df36ce9728a4d Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 13 Aug 2024 15:17:27 +0000 Subject: [PATCH 0957/1523] perf/bpf: Don't call bpf_overflow_handler() for tracing events The regressing commit is new in 6.10. It assumed that anytime event->prog is set bpf_overflow_handler() should be invoked to execute the attached bpf program. This assumption is false for tracing events, and as a result the regressing commit broke bpftrace by invoking the bpf handler with garbage inputs on overflow. Prior to the regression the overflow handlers formed a chain (of length 0, 1, or 2) and perf_event_set_bpf_handler() (the !tracing case) added bpf_overflow_handler() to that chain, while perf_event_attach_bpf_prog() (the tracing case) did not. Both set event->prog. The chain of overflow handlers was replaced by a single overflow handler slot and a fixed call to bpf_overflow_handler() when appropriate. This modifies the condition there to check event->prog->type == BPF_PROG_TYPE_PERF_EVENT, restoring the previous behavior and fixing bpftrace. Signed-off-by: Kyle Huey Suggested-by: Andrii Nakryiko Reported-by: Joe Damato Closes: https://lore.kernel.org/lkml/ZpFfocvyF3KHaSzF@LQ3V64L9R2/ Fixes: f11f10bfa1ca ("perf/bpf: Call BPF handler directly, not through overflow machinery") Cc: stable@vger.kernel.org Tested-by: Joe Damato # bpftrace Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240813151727.28797-1-jdamato@fastly.com Signed-off-by: Alexei Starovoitov --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index aa3450bdc227..c973e3c11e03 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9706,7 +9706,8 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); - if (event->prog && !bpf_overflow_handler(event, data, regs)) + if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT && + !bpf_overflow_handler(event, data, regs)) return ret; /* From 4b498d19610c6acd36a8ddf622afdefe4ab093fe Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 13 Aug 2024 16:14:19 +0530 Subject: [PATCH 0958/1523] drm/xe: Remove unused xe parameter Remove the xe parameter from the pde_encode_pat_index and pte_encode_pat_index functions, as it is no longer used. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Tejas Upadhyay Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240813104419.2958046-1-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f225107bdd65..89d86b7cdb02 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1191,7 +1191,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pde_encode_pat_index(u16 pat_index) { u64 pte = 0; @@ -1204,8 +1204,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) +static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) { u64 pte = 0; @@ -1246,12 +1245,11 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const u16 pat_index) { - struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); + pde |= pde_encode_pat_index(pat_index); return pde; } @@ -1259,12 +1257,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1276,14 +1273,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_vma_vm(vma)->xe; - pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1303,7 +1298,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) From faada2174c08662ae98b439c69efe3e79382c538 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 13 Aug 2024 16:35:14 +0200 Subject: [PATCH 0959/1523] dm persistent data: fix memory allocation failure kmalloc is unreliable when allocating more than 8 pages of memory. It may fail when there is plenty of free memory but the memory is fragmented. Zdenek Kabelac observed such failure in his tests. This commit changes kmalloc to kvmalloc - kvmalloc will fall back to vmalloc if the large allocation fails. Signed-off-by: Mikulas Patocka Reported-by: Zdenek Kabelac Reviewed-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-space-map-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 04698fd03e60..d48c4fafc779 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - kfree(smm); + kvfree(smm); } static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) @@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; - smm = kmalloc(sizeof(*smm), GFP_KERNEL); + smm = kvmalloc(sizeof(*smm), GFP_KERNEL); if (!smm) return ERR_PTR(-ENOMEM); From 3e30296b374af33cb4c12ff93df0b1e5b2d0f80b Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 8 Aug 2024 16:52:27 -0700 Subject: [PATCH 0960/1523] drm/msm: fix the highest_bank_bit for sc7180 sc7180 programs the ubwc settings as 0x1e as that would mean a highest bank bit of 14 which matches what the GPU sets as well. However, the highest_bank_bit field of the msm_mdss_data which is being used to program the SSPP's fetch configuration is programmed to a highest bank bit of 16 as 0x3 translates to 16 and not 14. Fix the highest bank bit field used for the SSPP to match the mdss and gpu settings. Fixes: 6f410b246209 ("drm/msm/mdss: populate missing data") Reviewed-by: Rob Clark Tested-by: Stephen Boyd # Trogdor.Lazor Patchwork: https://patchwork.freedesktop.org/patch/607625/ Link: https://lore.kernel.org/r/20240808235227.2701479-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index d90b9471ba6f..faa88fd6eb4d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, - .highest_bank_bit = 0x3, + .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; From f50733b45d865f91db90919f8311e2127ce5a0cb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 8 Aug 2024 11:39:08 -0700 Subject: [PATCH 0961/1523] exec: Fix ToCToU between perm check and set-uid/gid usage When opening a file for exec via do_filp_open(), permission checking is done against the file's metadata at that moment, and on success, a file pointer is passed back. Much later in the execve() code path, the file metadata (specifically mode, uid, and gid) is used to determine if/how to set the uid and gid. However, those values may have changed since the permissions check, meaning the execution may gain unintended privileges. For example, if a file could change permissions from executable and not set-id: ---------x 1 root root 16048 Aug 7 13:16 target to set-id and non-executable: ---S------ 1 root root 16048 Aug 7 13:16 target it is possible to gain root privileges when execution should have been disallowed. While this race condition is rare in real-world scenarios, it has been observed (and proven exploitable) when package managers are updating the setuid bits of installed programs. Such files start with being world-executable but then are adjusted to be group-exec with a set-uid bit. For example, "chmod o-x,u+s target" makes "target" executable only by uid "root" and gid "cdrom", while also becoming setuid-root: -rwxr-xr-x 1 root cdrom 16048 Aug 7 13:16 target becomes: -rwsr-xr-- 1 root cdrom 16048 Aug 7 13:16 target But racing the chmod means users without group "cdrom" membership can get the permission to execute "target" just before the chmod, and when the chmod finishes, the exec reaches brpm_fill_uid(), and performs the setuid to root, violating the expressed authorization of "only cdrom group members can setuid to root". Re-check that we still have execute permissions in case the metadata has changed. It would be better to keep a copy from the perm-check time, but until we can do that refactoring, the least-bad option is to do a full inode_permission() call (under inode lock). It is understood that this is safe against dead-locks, but hardly optimal. Reported-by: Marco Vanotti Tested-by: Marco Vanotti Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org Cc: Eric Biederman Cc: Alexander Viro Cc: Christian Brauner Signed-off-by: Kees Cook --- fs/exec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index a126e3d1cacb..50e76cc633c4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1692,6 +1692,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; + int err; if (!mnt_may_suid(file->f_path.mnt)) return; @@ -1708,12 +1709,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) /* Be careful if suid/sgid is set */ inode_lock(inode); - /* reload atomically mode/uid/gid now that lock held */ + /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); + /* Did the exec bit vanish out from under us? Give up. */ + if (err) + return; + /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) From 1eda95cba9df35f1fb0fb4b89511beb6603c614e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 13 Aug 2024 07:19:31 -0700 Subject: [PATCH 0962/1523] drm/xe: Rename enable_display module param The different approach used by xe regarding the initialization of display HW has been proved a great addition for early driver bring up: core xe can be tested without having all the bits sorted out on the display side. On the other hand, the approach exposed by i915-display is to *actively* disable the display by programming it if needed, i.e. if it was left enabled by firmware. It also has its use to make sure the HW is actually disabled and not wasting power. However having both the way it is in xe doesn't expose a good interface wrt module params. From modinfo: disable_display:Disable display (default: false) (bool) enable_display:Enable display (bool) Rename enable_display to probe_display to try to convey the message that the HW is being touched and improve the module param description. To avoid confusion, the enable_display is renamed everywhere, not only in the module param. New description for the parameters: disable_display:Disable display (default: false) (bool) probe_display:Probe display HW, otherwise it's left untouched (default: true) (bool) Reviewed-by: Matt Roper Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240813141931.3141395-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_display.c | 46 ++++++++++++------------- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 11 ++++-- drivers/gpu/drm/xe/xe_module.c | 6 ++-- drivers/gpu/drm/xe/xe_module.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 8 ++--- 6 files changed, 41 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 78cccbe28947..56a940b39412 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -45,7 +45,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -61,7 +61,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; @@ -103,7 +103,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_cleanup(xe); @@ -111,7 +111,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; /* Fake uncore lock */ @@ -127,7 +127,7 @@ static void xe_display_fini_noirq(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_noirq(xe); @@ -137,7 +137,7 @@ int xe_display_init_noirq(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; intel_display_driver_early_probe(xe); @@ -166,7 +166,7 @@ static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_nogem(xe); @@ -176,7 +176,7 @@ int xe_display_init_noaccel(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; err = intel_display_driver_probe_nogem(xe); @@ -188,7 +188,7 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; return intel_display_driver_probe(xe); @@ -196,7 +196,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_fini(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_hpd_poll_fini(xe); @@ -207,7 +207,7 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_register(xe); @@ -217,7 +217,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); @@ -227,7 +227,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_driver_remove(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove(xe); @@ -237,7 +237,7 @@ void xe_display_driver_remove(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) @@ -246,7 +246,7 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gu_misc_iir & GU_MISC_GSE) @@ -255,7 +255,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; gen11_display_irq_reset(xe); @@ -263,7 +263,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) @@ -297,7 +297,7 @@ static bool suspend_to_idle(void) void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; /* @@ -327,7 +327,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_suspend(xe, s2idle); @@ -337,7 +337,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_power_resume_early(xe); @@ -347,7 +347,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe, bool runtime) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_dmc_resume(xe); @@ -385,7 +385,7 @@ int xe_display_probe(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) goto no_display; intel_display_device_probe(xe); @@ -398,7 +398,7 @@ int xe_display_probe(struct xe_device *xe) return 0; no_display: - xe->info.enable_display = false; + xe->info.probe_display = false; unset_display_features(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1aba6f9eaa19..206328387150 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -543,7 +543,7 @@ static void update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { - xe->info.enable_display = 0; + xe->info.probe_display = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5b7292a9a66d..16a24eadd94b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -282,8 +282,15 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @info.enable_display: display enabled */ - u8 enable_display:1; + /** + * @info.probe_display: Probe display hardware. If set to + * false, the driver will behave as if there is no display + * hardware present and will not try to read/write to it in any + * way. The display hardware, if it exists, will not be + * exposed to userspace and will be left untouched in whatever + * state the firmware or bootloader left it in. + */ + u8 probe_display:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 499540add465..e7e4b1ebab50 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -15,7 +15,7 @@ #include "xe_sched_job.h" struct xe_modparam xe_modparam = { - .enable_display = true, + .probe_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, @@ -25,8 +25,8 @@ struct xe_modparam xe_modparam = { module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); -MODULE_PARM_DESC(enable_display, "Enable display"); +module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 61a0d28a28c8..161a5e6f717f 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -11,7 +11,7 @@ /* Module modprobe variables */ struct xe_modparam { bool force_execlist; - bool enable_display; + bool probe_display; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index bc575bbee42d..0f66cf067e2a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -616,9 +616,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.probe_display && + desc->has_display; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -829,7 +829,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - str_yes_no(xe->info.enable_display), + str_yes_no(xe->info.probe_display), xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); From b739dffa5d570b411d4bdf4bb9b8dfd6b7d72305 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Mon, 12 Aug 2024 12:06:51 +0200 Subject: [PATCH 0963/1523] of/irq: Prevent device address out-of-bounds read in interrupt map walk When of_irq_parse_raw() is invoked with a device address smaller than the interrupt parent node (from #address-cells property), KASAN detects the following out-of-bounds read when populating the initial match table (dyndbg="func of_irq_parse_* +p"): OF: of_irq_parse_one: dev=/soc@0/picasso/watchdog, index=0 OF: parent=/soc@0/pci@878000000000/gpio0@17,0, intsize=2 OF: intspec=4 OF: of_irq_parse_raw: ipar=/soc@0/pci@878000000000/gpio0@17,0, size=2 OF: -> addrsize=3 ================================================================== BUG: KASAN: slab-out-of-bounds in of_irq_parse_raw+0x2b8/0x8d0 Read of size 4 at addr ffffff81beca5608 by task bash/764 CPU: 1 PID: 764 Comm: bash Tainted: G O 6.1.67-484c613561-nokia_sm_arm64 #1 Hardware name: Unknown Unknown Product/Unknown Product, BIOS 2023.01-12.24.03-dirty 01/01/2023 Call trace: dump_backtrace+0xdc/0x130 show_stack+0x1c/0x30 dump_stack_lvl+0x6c/0x84 print_report+0x150/0x448 kasan_report+0x98/0x140 __asan_load4+0x78/0xa0 of_irq_parse_raw+0x2b8/0x8d0 of_irq_parse_one+0x24c/0x270 parse_interrupts+0xc0/0x120 of_fwnode_add_links+0x100/0x2d0 fw_devlink_parse_fwtree+0x64/0xc0 device_add+0xb38/0xc30 of_device_add+0x64/0x90 of_platform_device_create_pdata+0xd0/0x170 of_platform_bus_create+0x244/0x600 of_platform_notify+0x1b0/0x254 blocking_notifier_call_chain+0x9c/0xd0 __of_changeset_entry_notify+0x1b8/0x230 __of_changeset_apply_notify+0x54/0xe4 of_overlay_fdt_apply+0xc04/0xd94 ... The buggy address belongs to the object at ffffff81beca5600 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 8 bytes inside of 128-byte region [ffffff81beca5600, ffffff81beca5680) The buggy address belongs to the physical page: page:00000000230d3d03 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1beca4 head:00000000230d3d03 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x8000000000010200(slab|head|zone=2) raw: 8000000000010200 0000000000000000 dead000000000122 ffffff810000c300 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffff81beca5500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffff81beca5580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffff81beca5600: 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffff81beca5680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffff81beca5700: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc ================================================================== OF: -> got it ! Prevent the out-of-bounds read by copying the device address into a buffer of sufficient size. Signed-off-by: Stefan Wiehler Link: https://lore.kernel.org/r/20240812100652.3800963-1-stefan.wiehler@nokia.com Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb..8fd63100ba8f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; From 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Mon, 12 Aug 2024 21:40:28 +0200 Subject: [PATCH 0964/1523] i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume Add the missing geni_icc_disable() call before returning in the geni_i2c_runtime_resume() function. Commit 9ba48db9f77c ("i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume") by Gaosheng missed disabling the interconnect in one case. Fixes: bf225ed357c6 ("i2c: i2c-qcom-geni: Add interconnect support") Cc: Gaosheng Cui Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 365e37bba0f3..06e836e3e877 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,8 +986,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); if (ret) { From 655111b838cdabdb604f3625a9ff08c5eedb11da Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 12 Aug 2024 08:51:23 +0200 Subject: [PATCH 0965/1523] mptcp: correct MPTCP_SUBFLOW_ATTR_SSN_OFFSET reserved size ssn_offset field is u32 and is placed into the netlink response with nla_put_u32(), but only 2 bytes are reserved for the attribute payload in subflow_get_info_size() (even though it makes no difference in the end, as it is aligned up to 4 bytes). Supply the correct argument to the relevant nla_total_size() call to make it less confusing. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Signed-off-by: Eugene Syromiatnikov Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240812065024.GA19719@asgard.redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 3ae46b545d2c..2d3efb405437 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk) nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ From a24e6e7146e361aa0855cf8ee3b2e80b8eb692e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:40:39 -0400 Subject: [PATCH 0966/1523] bcachefs: delete faulty fastpath in bch2_btree_path_traverse_cached() bch2_btree_path_traverse_cached() was previously checking if it could just relock the path, which is a common idiom in path traversal. However, it was using btree_node_relock(), not btree_path_relock(); btree_path_relock() only succeeds if the path was in state BTREE_ITER_NEED_RELOCK. If the path was in state BTREE_ITER_NEED_TRAVERSE a full traversal is needed; this led to a null ptr deref in bch2_btree_path_traverse_cached(). And the short circuit check here isn't needed, since it was already done in the main bch2_btree_path_traverse_one(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b..79954490627c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); From bd864bc2d90790e00b02b17c75fb951cb4b0bb8b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:24:03 -0400 Subject: [PATCH 0967/1523] bcachefs: Fix bch2_trigger_alloc when upgrading from old versions bch2_trigger_alloc was assuming that the new key would always be newly created and thus always an alloc_v4 key, but - not when called from btree_gc. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708..0a8a1bc9a4ac 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); From d9e615762bf2eb7459fb0f270525f8b186bce6b7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 04:53:12 -0400 Subject: [PATCH 0968/1523] bcachefs: bch2_accounting_invalid() fixup Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 046ac92b6639..212f53927111 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -154,7 +154,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) - bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], c, err, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); From 486d920735325e507d965c2639ba2775b81fd329 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:47:55 -0400 Subject: [PATCH 0969/1523] bcachefs: disk accounting: ignore unknown types forward compat fix Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 212f53927111..03a9de6c2e0a 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -528,6 +528,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -760,6 +763,12 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -775,9 +784,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; From 48d6cc1b4895ada0781da11a0a483332a236ec14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 01:01:35 -0400 Subject: [PATCH 0970/1523] bcachefs: Add missing downgrade table entry Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 6c4469f53313..9f82d497d9e0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -104,6 +104,7 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { From 968feb854a86b59cc4bc72af3105989706ca2c7d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 16:34:28 -0400 Subject: [PATCH 0971/1523] bcachefs: Convert for_each_btree_node() to lockrestart_do() for_each_btree_node() now works similarly to for_each_btree_key(), where the loop body is passed as an argument to be passed to lockrestart_do(). This now calls trans_begin() on every loop iteration - which fixes an SRCU warning in backpointers fsck. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 15 +++++--------- fs/bcachefs/btree_iter.c | 1 + fs/bcachefs/btree_iter.h | 42 ++++++++++++++++++++++++-------------- fs/bcachefs/debug.c | 38 ++++++++-------------------------- 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a982..9edc4c5f735c 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -763,27 +763,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index aa8a049071f4..2e84d22e17bd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c..dca62375d7d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5..45aec1afdb0e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); - - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { From b2f11c6f3e1fc60742673b8675c95b78447f3dae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 21:04:35 -0400 Subject: [PATCH 0972/1523] lib/generic-radix-tree.c: Fix rare race in __genradix_ptr_alloc() If we need to increase the tree depth, allocate a new node, and then race with another thread that increased the tree depth before us, we'll still have a preallocated node that might be used later. If we then use that node for a new non-root node, it'll still have a pointer to the old root instead of being zeroed - fix this by zeroing it in the cmpxchg failure path. Signed-off-by: Kent Overstreet --- lib/generic-radix-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8..fa692c86f069 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } From 7254555c440ff6b136aa97fb3c33fd5e0bb4fb9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:40:09 -0400 Subject: [PATCH 0973/1523] bcachefs: Add hysteresis to waiting on btree key cache flush This helps ensure key cache reclaim isn't contending with threads waiting for the key cache to be helped, and fixes a severe performance bug. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++++ fs/bcachefs/btree_trans_commit.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1..113309f62918 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -20,6 +20,15 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) return nr_dirty > max_dirty; } +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; +} + int bch2_btree_key_cache_journal_flush(struct journal *, struct journal_entry_pin *, u64); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9..f567bfb82850 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -927,7 +927,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); From 790666c8ac6427ddaa00f502dc44073c1e039355 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:31:17 -0400 Subject: [PATCH 0974/1523] bcachefs: Improve trans_blocked_journal_reclaim tracepoint include information about the state of the btree key cache Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++-- fs/bcachefs/trace.c | 1 + fs/bcachefs/trace.h | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 113309f62918..51d6289b8dee 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,18 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; } static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b4..dfad1d06633d 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb..c62f00322d1e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, From 06a8693b890c0cf7d94bf7c6f0e2adf3a3aaa346 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 15:48:18 -0400 Subject: [PATCH 0975/1523] bcachefs: Add a time_stat for blocked on key cache flush Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_trans_commit.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eedf2d6045e7..0c7086e00d18 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index f567bfb82850..ac0c92683aad 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -973,9 +973,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; From c99471024f24b3cbafc02bf5b112ecf34b0dbd40 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:29:46 -0400 Subject: [PATCH 0976/1523] bcachefs: Fix warning in __bch2_fsck_err() for trans not passed in Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4a..70ebcca08ba2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1767,6 +1767,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { From d97de0d017cde0d442c3d144b4f969f43064cc0f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 21:31:25 -0400 Subject: [PATCH 0977/1523] bcachefs: Make bkey_fsck_err() a wrapper around fsck_err() bkey_fsck_err() was added as an interface that looks like fsck_err(), but previously all it did was ensure that the appropriate error counter was incremented in the superblock. This is a cleanup and bugfix patch that converts it to a wrapper around fsck_err(). This is needed to fix an issue with the upgrade path to disk_accounting_v3, where the "silent fix" error list now includes bkey_fsck errors; fsck_err() handles this in a unified way, and since we need to change printing of bkey fsck errors from the caller to the inner bkey_fsck_err() calls, this ends up being a pretty big change. Als,, rename .invalid() methods to .validate(), for clarity, while we're changing the function signature anyways (to drop the printbuf argument). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 63 ++++++------ fs/bcachefs/alloc_background.h | 26 +++-- fs/bcachefs/backpointers.c | 8 +- fs/bcachefs/backpointers.h | 5 +- fs/bcachefs/bkey.h | 7 +- fs/bcachefs/bkey_methods.c | 109 ++++++++++----------- fs/bcachefs/bkey_methods.h | 21 ++-- fs/bcachefs/btree_io.c | 67 ++++--------- fs/bcachefs/btree_node_scan.c | 2 +- fs/bcachefs/btree_trans_commit.c | 72 +++----------- fs/bcachefs/btree_update_interior.c | 16 +--- fs/bcachefs/data_update.c | 6 +- fs/bcachefs/dirent.c | 33 ++++--- fs/bcachefs/dirent.h | 5 +- fs/bcachefs/disk_accounting.c | 13 ++- fs/bcachefs/disk_accounting.h | 5 +- fs/bcachefs/ec.c | 15 ++- fs/bcachefs/ec.h | 5 +- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 22 +++++ fs/bcachefs/error.h | 39 ++++---- fs/bcachefs/extents.c | 144 ++++++++++++++-------------- fs/bcachefs/extents.h | 24 ++--- fs/bcachefs/inode.c | 77 +++++++-------- fs/bcachefs/inode.h | 24 ++--- fs/bcachefs/journal_io.c | 24 +---- fs/bcachefs/lru.c | 9 +- fs/bcachefs/lru.h | 5 +- fs/bcachefs/quota.c | 8 +- fs/bcachefs/quota.h | 5 +- fs/bcachefs/reflink.c | 19 ++-- fs/bcachefs/reflink.h | 22 ++--- fs/bcachefs/snapshot.c | 42 ++++---- fs/bcachefs/snapshot.h | 11 +-- fs/bcachefs/subvolume.c | 16 ++-- fs/bcachefs/subvolume.h | 5 +- fs/bcachefs/xattr.c | 21 ++-- fs/bcachefs/xattr.h | 5 +- 38 files changed, 448 insertions(+), 553 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 0a8a1bc9a4ac..fd3a2522bc3e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 96a0444ea78f..260e7fa83d05 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -240,52 +240,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 9edc4c5f735c..d4da6343efa9 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98..7daecadb764e 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0..e34cb2bf329c 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c..88d8958281e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb..3df3dd2723a1 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 70ebcca08ba2..56ea9a77cd4a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1954,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 001107226377..b28c649c6838 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index ac0c92683aad..1a1e9d2036da 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -1064,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1088,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e61f9695771e..b3454d4619e8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead..6a854c918496 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308e..32bfdf19289a 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a09..8945145865c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 03a9de6c2e0a..f059cbffdf23 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -126,9 +126,8 @@ static inline bool is_zero(char *start, char *end) #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, k.k->p); @@ -144,18 +143,18 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, break; case BCH_DISK_ACCOUNTING_replicas: bkey_fsck_err_on(!acc_k.replicas.nr_devs, - c, err, accounting_key_replicas_nr_devs_0, + c, accounting_key_replicas_nr_devs_0, "accounting key replicas entry with nr_devs=0"); bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || (acc_k.replicas.nr_required > 1 && acc_k.replicas.nr_required == acc_k.replicas.nr_devs), - c, err, accounting_key_replicas_nr_required_bad, + c, accounting_key_replicas_nr_required_bad, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], - c, err, accounting_key_replicas_devs_unsorted, + c, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); @@ -178,7 +177,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, } bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), - c, err, accounting_key_junk_at_end, + c, accounting_key_junk_at_end, "junk at end of accounting key"); fsck_err: return ret; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69..b92f8c2e3054 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 84f1cbf6497f..141a4c63142f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb6249..90962b3c0130 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bf..ab5a7adece10 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820..95afa7bf2020 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad..2f1b86978f36 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35f..4419ad3e454e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec..1a6ddee48041 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1..2be6be33afa3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a745099..f1fcb4c58039 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63..7664b68e6a15 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb371..96f2f4f8c397 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11..e6a7d8241bb8 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0a..c32a05e252e2 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218..a62abcc5332a 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525..e59c0abb4772 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67a..51afe11d8ed6 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5..8b18a9b483a4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e962..eb5ef64221d6 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b55862..dbe834cb349f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb2..a8299ba2cab2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c..f2b4c17a0307 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c85..c188a5ad64ce 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) From 5132b99bb62664c02bd6c0dd62ad3fedc75294d8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:35:10 -0400 Subject: [PATCH 0978/1523] bcachefs: Kill __bch2_accounting_mem_mod() The next patch will be adding a disk accounting counter type which is not kept in the in-memory eytzinger tree. As prep, fold __bch2_accounting_mem_mod() into bch2_accounting_mem_mod_locked() so that we can check for that counter type and bail out without calling bpos_to_disk_accounting_pos() twice. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 +- fs/bcachefs/disk_accounting.c | 4 +- fs/bcachefs/disk_accounting.h | 68 +++++++++++++++----------------- 3 files changed, 35 insertions(+), 41 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a1e9d2036da..a0101d9c5d83 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f059cbffdf23..e57b40623cd9 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -566,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -595,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index b92f8c2e3054..653090667aaa 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -106,8 +106,37 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) +/* + * Update in memory counters so they match the btree update we're doing; called + * from transaction commit path + */ +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { + struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); + + if (!gc && !read) { + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); + if (ca) { + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]); + } + rcu_read_unlock(); + break; + } + } + struct bch_accounting_mem *acc = &c->accounting; unsigned idx; @@ -129,45 +158,10 @@ static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_ac return 0; } -/* - * Update in memory counters so they match the btree update we're doing; called - * from transaction commit path - */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_fs *c = trans->c; - - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); - if (ca) { - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]); - } - rcu_read_unlock(); - break; - } - } - - return __bch2_accounting_mem_mod(c, a, gc); -} - static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } From 58474f76a770bcc79d4b2d7232e4d6650e732b50 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:27:36 -0400 Subject: [PATCH 0979/1523] bcachefs: bcachefs_metadata_version_disk_accounting_inum This adds another disk accounting counter to track usage per inode number (any snapshot ID). This will be used for a couple things: - It'll give us a way to tell the user how much space a given file ista consuming in all snapshots; i.e. how much extra space it's consuming due to snapshot versioning. - It counts number of extents and total size of extents (both in btree keyspace sectors and actual disk usage), meaning it gives us average extent size: that is, it'll let us cheaply find fragmented files that should be defragmented. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/buckets.c | 14 ++++++++++++++ fs/bcachefs/disk_accounting.c | 3 +++ fs/bcachefs/disk_accounting.h | 3 +++ fs/bcachefs/disk_accounting_format.h | 8 +++++++- fs/bcachefs/sb-downgrade.c | 5 ++++- 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b25f86356728..c75f2e0f32bb 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -676,7 +676,8 @@ struct bch_sb_field_ext { x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ - x(disk_accounting_v3, BCH_VERSION(1, 10)) + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 9f7004e941ce..b69ef4b3de6e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -810,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e57b40623cd9..e972e2bca546 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -768,6 +768,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) continue; + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 653090667aaa..f29fd0dd9581 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -116,6 +116,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index a93cf26ff4a9..7b6e6c97e6aa 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -136,6 +137,10 @@ struct bch_acct_btree { __u32 id; } __packed; +struct bch_acct_inum { + __u64 inum; +} __packed; + struct bch_acct_rebalance_work { }; @@ -152,6 +157,7 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; } __packed; } __packed; struct bpos _pad; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 9f82d497d9e0..650a1f77ca40 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -72,7 +72,10 @@ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ From 63de936b513f7a9ce559194d3269ac291f4f4662 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 21 Jul 2024 17:38:40 +0200 Subject: [PATCH 0980/1523] media: atomisp: Fix streaming no longer working on BYT / ISP2400 devices Commit a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG) support") broke BYT support because it removed a seemingly unused field from struct sh_css_sp_config and a seemingly unused value from enum ia_css_input_mode. But these are part of the ABI between the kernel and firmware on ISP2400 and this part of the TPG support removal changes broke ISP2400 support. ISP2401 support was not affected because on ISP2401 only a part of struct sh_css_sp_config is used. Restore the removed field and enum value to fix this. Fixes: a0821ca14bb8 ("media: atomisp: Remove test pattern generator (TPG) support") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil --- .../media/atomisp/pci/ia_css_stream_public.h | 8 ++++++-- .../media/atomisp/pci/sh_css_internal.h | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h index 961c61288083..aad860e54d3a 100644 --- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h +++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h @@ -27,12 +27,16 @@ #include "ia_css_prbs.h" #include "ia_css_input_port.h" -/* Input modes, these enumerate all supported input modes. - * Note that not all ISP modes support all input modes. +/* + * Input modes, these enumerate all supported input modes. + * This enum is part of the atomisp firmware ABI and must + * NOT be changed! + * Note that not all ISP modes support all input modes. */ enum ia_css_input_mode { IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */ IA_CSS_INPUT_MODE_FIFO, /** data from input-fifo */ + IA_CSS_INPUT_MODE_TPG, /** data from test-pattern generator */ IA_CSS_INPUT_MODE_PRBS, /** data from pseudo-random bit stream */ IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */ IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */ diff --git a/drivers/staging/media/atomisp/pci/sh_css_internal.h b/drivers/staging/media/atomisp/pci/sh_css_internal.h index a2d972ea3fa0..959e7f549641 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_internal.h +++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h @@ -344,7 +344,14 @@ struct sh_css_sp_input_formatter_set { #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3) -/* SP configuration information */ +/* + * SP configuration information + * + * This struct is part of the atomisp firmware ABI and is directly copied + * to ISP DRAM by sh_css_store_sp_group_to_ddr() + * + * Do NOT change this struct's layout or remove seemingly unused fields! + */ struct sh_css_sp_config { u8 no_isp_sync; /* Signal host immediately after start */ u8 enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */ @@ -354,6 +361,10 @@ struct sh_css_sp_config { host (true) or when they are passed to the preview/video pipe (false). */ + /* + * Note the fields below are only used on the ISP2400 not on the ISP2401, + * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401. + */ struct { u8 a_changed; u8 b_changed; @@ -363,11 +374,13 @@ struct sh_css_sp_config { } input_formatter; sync_generator_cfg_t sync_gen; + tpg_cfg_t tpg; prbs_cfg_t prbs; input_system_cfg_t input_circuit; u8 input_circuit_cfg_changed; - u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; - u8 enable_isys_event_queue; + u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; + /* These last 2 fields are used on both the ISP2400 and the ISP2401 */ + u8 enable_isys_event_queue; u8 disable_cont_vf; }; From a2cbb1603943281a604f5adc48079a148db5cb0d Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 8 Aug 2024 16:06:40 -0700 Subject: [PATCH 0981/1523] tcp: Update window clamping condition This patch is based on the discussions between Neal Cardwell and Eric Dumazet in the link https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/ It was correctly pointed out that tp->window_clamp would not be updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if (copied <= tp->rcvq_space.space). While it is expected for most setups to leave the sysctl enabled, the latter condition may not end up hitting depending on the TCP receive queue size and the pattern of arriving data. The updated check should be hit only on initial MSS update from TCP_MIN_MSS to measured MSS value and subsequently if there was an update to a larger value. Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e2b9583ed96a..e37488d3453f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); @@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { u64 rcvwin, grow; int rcvbuf; @@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - if (rcvbuf > sk->sk_rcvbuf) { - WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + if (rcvbuf > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - /* Make the window clamp follow along. */ - WRITE_ONCE(tp->window_clamp, - tcp_win_from_space(sk, rcvbuf)); - } - } else { - /* Make the window clamp follow along while being bounded - * by SO_RCVBUF. - */ - int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf)); - - if (clamp > tp->window_clamp) - WRITE_ONCE(tp->window_clamp, clamp); + /* Make the window clamp follow along. */ + WRITE_ONCE(tp->window_clamp, + tcp_win_from_space(sk, rcvbuf)); } } tp->rcvq_space.space = copied; From c286f204ce6ba7b48e3dcba53eda7df8eaa64dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Arboleda?= Date: Tue, 13 Aug 2024 11:10:53 -0500 Subject: [PATCH 0982/1523] ALSA: usb-audio: Support Yamaha P-125 quirk entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a USB quirk for the Yamaha P-125 digital piano. Signed-off-by: Juan José Arboleda Cc: Link: https://patch.msgid.link/20240813161053.70256-1-soyjuanarbol@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index f13a8d63a019..aaa6a515d0f8 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL), YAMAHA_DEVICE(0x105b, NULL), YAMAHA_DEVICE(0x105c, NULL), YAMAHA_DEVICE(0x105d, NULL), +YAMAHA_DEVICE(0x1718, "P-125"), { USB_DEVICE(0x0499, 0x1503), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { From 202b85da0a2fb6f034f0290af841e0f29352af9f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 18:12:16 +0300 Subject: [PATCH 0983/1523] drm/i915: make __intel_display_power_is_enabled() static The function isn't used outside of intel_display_power.c. Make it static. Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240813151216.2573845-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_display_power.c | 16 ++-------------- .../gpu/drm/i915/display/intel_display_power.h | 2 -- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0af1e34ef2a7..cf8b38f2ebf5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -198,20 +198,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) } } -/** - * __intel_display_power_is_enabled - unlocked check for a power domain - * @dev_priv: i915 device instance - * @domain: power domain to check - * - * This is the unlocked version of intel_display_power_is_enabled() and should - * only be used from error capture and recovery code where deadlocks are - * possible. - * - * Returns: - * True when the power domain is enabled, false otherwise. - */ -bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_well *power_well; bool is_enabled; diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index d6c2a5846bdc..0962f6aaeee6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -188,8 +188,6 @@ intel_display_power_domain_str(enum intel_display_power_domain domain); bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); intel_wakeref_t From fa0db8e568787c665384430eaf2221b299b85367 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 13 Aug 2024 15:19:01 +0200 Subject: [PATCH 0984/1523] Revert "ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error" This reverts commit 28ab9769117ca944cb6eb537af5599aa436287a4. Sense data can be in either fixed format or descriptor format. SAT-6 revision 1, "10.4.6 Control mode page", defines the D_SENSE bit: "The SATL shall support this bit as defined in SPC-5 with the following exception: if the D_ SENSE bit is set to zero (i.e., fixed format sense data), then the SATL should return fixed format sense data for ATA PASS-THROUGH commands." The libata SATL has always kept D_SENSE set to zero by default. (It is however possible to change the value using a MODE SELECT SG_IO command.) Failed ATA PASS-THROUGH commands correctly respected the D_SENSE bit, however, successful ATA PASS-THROUGH commands incorrectly returned the sense data in descriptor format (regardless of the D_SENSE bit). Commit 28ab9769117c ("ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error") fixed this bug for successful ATA PASS-THROUGH commands. However, after commit 28ab9769117c ("ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error"), there were bug reports that hdparm, hddtemp, and udisks were no longer working as expected. These applications incorrectly assume the returned sense data is in descriptor format, without even looking at the RESPONSE CODE field in the returned sense data (to see which format the returned sense data is in). Considering that there will be broken versions of these applications around roughly forever, we are stuck with being bug compatible with older kernels. Cc: stable@vger.kernel.org # 4.19+ Reported-by: Stephan Eisvogel Reported-by: Christian Heusel Closes: https://lore.kernel.org/linux-ide/0bf3f2f0-0fc6-4ba5-a420-c0874ef82d64@heusel.eu/ Fixes: 28ab9769117c ("ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error") Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240813131900.1285842-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-scsi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d6f5e25e1ed8..473e00a58a8b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -951,8 +951,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) &sense_key, &asc, &ascq); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { - /* ATA PASS-THROUGH INFORMATION AVAILABLE */ - ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D); + /* + * ATA PASS-THROUGH INFORMATION AVAILABLE + * + * Note: we are supposed to call ata_scsi_set_sense(), which + * respects the D_SENSE bit, instead of unconditionally + * generating the sense data in descriptor format. However, + * because hdparm, hddtemp, and udisks incorrectly assume sense + * data in descriptor format, without even looking at the + * RESPONSE CODE field in the returned sense data (to see which + * format the returned sense data is in), we are stuck with + * being bug compatible with older kernels. + */ + scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D); } } From 829e2a23121fb36ee30ea5145c2a85199f68e2c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Aug 2024 12:04:59 +0200 Subject: [PATCH 0985/1523] ALSA: hda/tas2781: Use correct endian conversion The data conversion is done rather by a wrong function. We convert to BE32, not from BE32. Although the end result must be same, this was complained by the compiler. Fix the code again and align with another similar function tas2563_apply_calib() that does already right. Fixes: 3beddef84d90 ("ALSA: hda/tas2781: fix wrong calibrated data order") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408141630.DiDUB8Z4-lkp@intel.com/ Link: https://patch.msgid.link/20240814100500.1944-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 7dbfc92d9d55..89d8235537cd 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -527,8 +527,8 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) for (i = 0; i < tas_priv->ndev; i++) { for (j = 0; j < CALIB_MAX; j++) { - data = get_unaligned_be32( - &tas_priv->cali_data.data[offset]); + data = cpu_to_be32( + *(uint32_t *)&tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), (unsigned char *)&data, 4); From 73c34b0b85d46bf9c2c0b367aeaffa1e2481b136 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 29 Jul 2024 13:44:33 -0700 Subject: [PATCH 0986/1523] xfs: attr forks require attr, not attr2 It turns out that I misunderstood the difference between the attr and attr2 feature bits. "attr" means that at some point an attr fork was created somewhere in the filesystem. "attr2" means that inodes have variable-sized forks, but says nothing about whether or not there actually /are/ attr forks in the system. If we have an attr fork, we only need to check that attr is set. Fixes: 99d9d8d05da26 ("xfs: scrub inode block mappings") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/scrub/bmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f1..5ab2ac53c920 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: From 04d6dbb55301a29aeb9a197e6b0012cdc265f1e4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 4 Aug 2024 14:39:34 -0700 Subject: [PATCH 0987/1523] xfs: revert AIL TASK_KILLABLE threshold In commit 9adf40249e6c, we changed the behavior of the AIL thread to set its own task state to KILLABLE whenever the timeout value is nonzero. Unfortunately, this missed the fact that xfsaild_push will return 50ms (aka a longish sleep) when we reach the push target or the AIL becomes empty, so xfsaild goes to sleep for a long period of time in uninterruptible D state. This results in artificially high load averages because KILLABLE processes are UNINTERRUPTIBLE, which contributes to load average even though the AIL is asleep waiting for someone to interrupt it. It's not blocked on IOs or anything, but people scrap ps for processes that look like they're stuck in D state, so restore the previous threshold. Fixes: 9adf40249e6c ("xfs: AIL doesn't need manual pushing") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_trans_ail.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe..8ede9d099d1f 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); From 8d16762047c627073955b7ed171a36addaf7b1ff Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 4 Aug 2024 14:39:57 -0700 Subject: [PATCH 0988/1523] xfs: conditionally allow FS_XFLAG_REALTIME changes if S_DAX is set If a file has the S_DAX flag (aka fsdax access mode) set, we cannot allow users to change the realtime flag unless the datadev and rtdev both support fsdax access modes. Even if there are no extents allocated to the file, the setattr thread could be racing with another thread that has already started down the write code paths. Fixes: ba23cba9b3bdc ("fs: allow per-device dax status checking for filesystems") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_ioctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12..6b13666d4e96 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { From 66155de93bcf4f2967e602a4b3bf7ebe58f34b11 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:02:58 -0700 Subject: [PATCH 0989/1523] KVM: x86: Disallow read-only memslots for SEV-ES and SEV-SNP (and TDX) Disallow read-only memslots for SEV-{ES,SNP} VM types, as KVM can't directly emulate instructions for ES/SNP, and instead the guest must explicitly request emulation. Unless the guest explicitly requests emulation without accessing memory, ES/SNP relies on KVM creating an MMIO SPTE, with the subsequent #NPF being reflected into the guest as a #VC. But for read-only memslots, KVM deliberately doesn't create MMIO SPTEs, because except for ES/SNP, doing so requires setting reserved bits in the SPTE, i.e. the SPTE can't be readable while also generating a #VC on writes. Because KVM never creates MMIO SPTEs and jumps directly to emulation, the guest never gets a #VC. And since KVM simply resumes the guest if ES/SNP guests trigger emulation, KVM effectively puts the vCPU into an infinite #NPF loop if the vCPU attempts to write read-only memory. Disallow read-only memory for all VMs with protected state, i.e. for upcoming TDX VMs as well as ES/SNP VMs. For TDX, it's actually possible to support read-only memory, as TDX uses EPT Violation #VE to reflect the fault into the guest, e.g. KVM could configure read-only SPTEs with RX protections and SUPPRESS_VE=0. But there is no strong use case for supporting read-only memslots on TDX, e.g. the main historical usage is to emulate option ROMs, but TDX disallows executing from shared memory. And if someone comes along with a legitimate, strong use case, the restriction can always be lifted for TDX. Don't bother trying to retroactively apply the restriction to SEV-ES VMs that are created as type KVM_X86_DEFAULT_VM. Read-only memslots can't possibly work for SEV-ES, i.e. disallowing such memslots is really just means reporting an error to userspace instead of silently hanging vCPUs. Trying to deal with the ordering between KVM_SEV_INIT and memslot creation isn't worth the marginal benefit it would provide userspace. Fixes: 26c44aa9e076 ("KVM: SEV: define VM types for SEV and SEV-ES") Fixes: 1dfe571c12cf ("KVM: SEV: Add initial SEV-SNP support") Cc: Peter Gonda Cc: Michael Roth Cc: Vishal Annapurve Cc: Ackerly Tng Signed-off-by: Sean Christopherson Message-ID: <20240809190319.1710470-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ include/linux/kvm_host.h | 7 +++++++ virt/kvm/kvm_main.c | 5 ++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 94e7b5a4fafe..4a68cb3eba78 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2192,6 +2192,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, #define kvm_arch_has_private_mem(kvm) false #endif +#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) + static inline u16 kvm_read_ldt(void) { u16 ldt; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 79a6b1a63027..b23c6d48392f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 92901656a0d4..cb2b78e92910 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1578,15 +1578,14 @@ static int check_memory_region_flags(struct kvm *kvm, if (mem->flags & KVM_MEM_GUEST_MEMFD) valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES; -#ifdef CONFIG_HAVE_KVM_READONLY_MEM /* * GUEST_MEMFD is incompatible with read-only memslots, as writes to * read-only memslots have emulated MMIO, not page fault, semantics, * and KVM doesn't allow emulated MMIO for private memory. */ - if (!(mem->flags & KVM_MEM_GUEST_MEMFD)) + if (kvm_arch_has_readonly_mem(kvm) && + !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; -#endif if (mem->flags & ~valid_flags) return -EINVAL; From f94511df53bb792e505c98662971434c7995388a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Aug 2024 11:37:31 +0100 Subject: [PATCH 0990/1523] arm64: uaccess: correct thinko in __get_mem_asm() In the CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y version of __get_mem_asm(), we incorrectly use _ASM_EXTABLE_##type##ACCESS_ERR() such that upon a fault the extable fixup handler writes -EFAULT into "%w0", which is the register containing 'x' (the result of the load). This was a thinko in commit: 86a6a68febfcf57b ("arm64: start using 'asm goto' for get_user() when available") Prior to that commit _ASM_EXTABLE_##type##ACCESS_ERR_ZERO() was used such that the extable fixup handler wrote -EFAULT into "%w0" (the register containing 'err'), and zero into "%w1" (the register containing 'x'). When the 'err' variable was removed, the extable entry was updated incorrectly. Writing -EFAULT to the value register is unnecessary but benign: * We never want -EFAULT in the value register, and previously this would have been zeroed in the extable fixup handler. * In __get_user_error() the value is overwritten with zero explicitly in the error path. * The asm goto outputs cannot be used when the goto label is taken, as older compilers (e.g. clang < 16.0.0) do not guarantee that asm goto outputs are usable in this path and may use a stale value rather than the value in an output register. Consequently, zeroing in the extable fixup handler is insufficient to ensure callers see zero in the error path. * The expected usage of unsafe_get_user() and get_kernel_nofault() requires that the value is not consumed in the error path. Some versions of GCC would mis-compile asm goto with outputs, and erroneously omit subsequent assignments, breaking the error path handling in __get_user_error(). This was discussed at: https://lore.kernel.org/lkml/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/ ... and was fixed by removing support for asm goto with outputs on those broken compilers in commit: f2f6a8e887172503 ("init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND") With that out of the way, we can safely replace the usage of _ASM_EXTABLE_##type##ACCESS_ERR() with _ASM_EXTABLE_##type##ACCESS(), leaving the value register unchanged in the case a fault is taken, as was originally intended. This matches other architectures and matches our __put_mem_asm(). Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20240807103731.2498893-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a..1aa4ecb73429 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else From a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 5 Aug 2024 11:30:24 +0800 Subject: [PATCH 0991/1523] arm64: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE Currently, only acpi_early_node_map[0] was initialized to NUMA_NO_NODE. To ensure all the values were properly initialized, switch to initialize all of them to NUMA_NO_NODE. Fixes: e18962491696 ("arm64: numa: rework ACPI NUMA initialization") Cc: # 4.19.x Reported-by: Andrew Jones Suggested-by: Andrew Jones Signed-off-by: Haibo Xu Reviewed-by: Anshuman Khandual Reviewed-by: Sunil V L Reviewed-by: Andrew Jones Acked-by: Catalin Marinas Acked-by: Lorenzo Pieralisi Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/853d7f74aa243f6f5999e203246f0d1ae92d2b61.1722828421.git.haibo1.xu@intel.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi_numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c33..2465f291c7e1 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { From 1c0e5881691a787a9399a99bff4d56ead6e75e91 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 14 Aug 2024 10:31:13 +0200 Subject: [PATCH 0992/1523] KVM: SEV: uapi: fix typo in SEV_RET_INVALID_CONFIG "INVALID" is misspelt in "SEV_RET_INAVLID_CONFIG". Since this is part of the UAPI, keep the current definition and add a new one with the fix. Fix-suggested-by: Marc Zyngier Signed-off-by: Amit Shah Message-ID: <20240814083113.21622-1-amit@kernel.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/psp-sev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 2289b7c76c59..832c15d9155b 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -51,6 +51,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, From 57d5af2660e9443b081eeaf1c373b3ce48477828 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Wed, 14 Aug 2024 20:42:37 +0530 Subject: [PATCH 0993/1523] spi: spi-cadence-quadspi: Fix OSPI NOR failures during system resume Its necessary to call pm_runtime_force_*() hooks as part of system suspend/resume calls so that the runtime_pm hooks get called. This ensures latest state of the IP is cached and restored during system sleep. This is especially true if runtime autosuspend is enabled as runtime suspend hooks may not be called at all before system sleeps. Without this patch, OSPI NOR enumeration (READ_ID) fails during resume as context saved during suspend path is inconsistent. Fixes: 078d62de433b ("spi: cadence-qspi: add system-wide suspend and resume callbacks") Signed-off-by: Vignesh Raghavendra Link: https://patch.msgid.link/20240814151237.3856184-1-vigneshr@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 05ebb03d319f..d4607cb89c48 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2000,13 +2000,25 @@ static int cqspi_runtime_resume(struct device *dev) static int cqspi_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; - return spi_controller_suspend(cqspi->host); + ret = spi_controller_suspend(cqspi->host); + if (ret) + return ret; + + return pm_runtime_force_suspend(dev); } static int cqspi_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } return spi_controller_resume(cqspi->host); } From 71833e79a42178d8a50b5081c98c78ace9325628 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 14 Aug 2024 13:16:49 +0100 Subject: [PATCH 0994/1523] i2c: Use IS_REACHABLE() for substituting empty ACPI functions Replace IS_ENABLED() with IS_REACHABLE() to substitute empty stubs for: i2c_acpi_get_i2c_resource() i2c_acpi_client_count() i2c_acpi_find_bus_speed() i2c_acpi_new_device_by_fwnode() i2c_adapter *i2c_acpi_find_adapter_by_handle() i2c_acpi_waive_d0_probe() commit f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") partially fixed this conditional to depend on CONFIG_I2C, but used IS_ENABLED(), which is wrong since CONFIG_I2C is tristate. CONFIG_ACPI is boolean but let's also change it to use IS_REACHABLE() to future-proof it against becoming tristate. Somehow despite testing various combinations of CONFIG_I2C and CONFIG_ACPI we missed the combination CONFIG_I2C=m, CONFIG_ACPI=y. Signed-off-by: Richard Fitzgerald Fixes: f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408141333.gYnaitcV-lkp@intel.com/ Reviewed-by: Takashi Iwai Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7eedd0c662da..377def497298 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); From 3cd740b985963f874a1a094f1969e998b9d05554 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Tue, 6 Aug 2024 12:40:52 +0100 Subject: [PATCH 0995/1523] netfilter: allow ipv6 fragments to arrive on different devices Commit 264640fc2c5f4 ("ipv6: distinguish frag queues by device for multicast and link-local packets") modified the ipv6 fragment reassembly logic to distinguish frag queues by device for multicast and link-local packets but in fact only the main reassembly code limits the use of the device to those address types and the netfilter reassembly code uses the device for all packets. This means that if fragments of a packet arrive on different interfaces then netfilter will fail to reassemble them and the fragments will be expired without going any further through the filters. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Tom Hughes Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f0844c9315d..4120e67a8ce6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; + q = inet_frag_find(nf_frag->fqdir, &key); if (!q) return NULL; From ab0d6ef864c5fa820e894ee1a07f861e63851664 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:12 +0530 Subject: [PATCH 0996/1523] drm/xe: Move enable host l2 VRAM post MCR init xe_gt_enable_host_l2_vram() is reading the XE2_GAMREQSTRM_CTRL register that is currently missing the MCR annotation. However, just adding the annotation doesn't work as this function is called before MCR handling is initialized in xe_gt_mcr_init(). xe_gt_enable_host_l2_vram() is used to implement WA 16023588340 that needs to be done as early as possible during initialization in order to be effective since the MMIO writes impact it. In the failure scenario, driver would simply not be able to bind successfully. Moving xe_gt_enable_host_l2_vram() later, after MCR initialization is done, only incurs a few additional HW accesses, particularly when loading GuC for hwconfig. Binding/unbinding the driver 100 times in BMG still works so it should be ok to start handling the WA a little bit later. This is sufficient to allow adding the MCR annotation to XE2_GAMREQSTRM_CTRL. Cc: Lucas De Marchi Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-2-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 58895ed22f6e..238c7d1053f0 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -557,7 +557,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); - xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -569,6 +568,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); From f0ffa657e9f3913c7921cbd4d876343401f15f52 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:13 +0530 Subject: [PATCH 0997/1523] drm/xe: Write all slices if its mcr register Register GAMREQSTRM_CTRL should be considered mcr register which should write to all slices as per documentation. Bspec: 71185 Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-3-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5f33dee35f8a..aeb17fcb27ac 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -83,7 +83,7 @@ #define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) -#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) #define CCS_AUX_INV XE_REG(0x4208) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 238c7d1053f0..224c137967c3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -110,9 +110,9 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); @@ -134,9 +134,9 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (WARN_ON(err)) return; - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } From 4551d60299b5ddc2655b6b365a4b92634e14e04f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:14 +0530 Subject: [PATCH 0998/1523] drm/xe: Define STATELESS_COMPRESSION_CTRL as mcr register Register STATELESS_COMPRESSION_CTRL should be considered mcr register which should write to all slices as per documentation. Bspec: 71185 Fixes: ecabb5e6ce54 ("drm/xe/xe2: Add performance turning changes") Signed-off-by: Tejas Upadhyay Reviewed-by: Shekhar Chauhan Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-4-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index aeb17fcb27ac..0d1a4a9f4e11 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,7 +80,7 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) -#define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) +#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) #define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) From 61119394631f219e23ce98bcc3eb993a64a8ea64 Mon Sep 17 00:00:00 2001 From: Celeste Liu Date: Thu, 27 Jun 2024 22:23:39 +0800 Subject: [PATCH 0999/1523] riscv: entry: always initialize regs->a0 to -ENOSYS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise when the tracer changes syscall number to -1, the kernel fails to initialize a0 with -ENOSYS and subsequently fails to return the error code of the failed syscall to userspace. For example, it will break strace syscall tampering. Fixes: 52449c17bdd1 ("riscv: entry: set a0 = -ENOSYS only when syscall != -1") Reported-by: "Dmitry V. Levin" Reviewed-by: Björn Töpel Cc: stable@vger.kernel.org Signed-off-by: Celeste Liu Link: https://lore.kernel.org/r/20240627142338.5114-2-CoelacanthusHex@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee..51ebfd23e007 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). From 57d76bc51fd80824bcc0c84a5b5ec944f1b51edd Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 8 May 2024 21:19:17 +0200 Subject: [PATCH 1000/1523] riscv: change XIP's kernel_map.size to be size of the entire kernel With XIP kernel, kernel_map.size is set to be only the size of data part of the kernel. This is inconsistent with "normal" kernel, who sets it to be the size of the entire kernel. More importantly, XIP kernel fails to boot if CONFIG_DEBUG_VIRTUAL is enabled, because there are checks on virtual addresses with the assumption that kernel_map.size is the size of the entire kernel (these checks are in arch/riscv/mm/physaddr.c). Change XIP's kernel_map.size to be the size of the entire kernel. Signed-off-by: Nam Cao Cc: # v6.1+ Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240508191917.2892064-1-namcao@linutronix.de Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8b698d9609e7..eb0649a61b4c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -927,7 +927,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; + end_va = kernel_map.virt_addr + kernel_map.size; for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), @@ -1096,7 +1096,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else From a445699879f989f6700df81f497b70bf94cc6163 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 5 Aug 2024 11:30:23 +0800 Subject: [PATCH 1001/1523] RISC-V: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE Currently, only acpi_early_node_map[0] was initialized to NUMA_NO_NODE. To ensure all the values were properly initialized, switch to initialize all of them to NUMA_NO_NODE. Fixes: eabd9db64ea8 ("ACPI: RISCV: Add NUMA support based on SRAT and SLIT") Reported-by: Andrew Jones Suggested-by: Andrew Jones Signed-off-by: Haibo Xu Reviewed-by: Sunil V L Reviewed-by: Andrew Jones Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/0d362a8ae50558b95685da4c821b2ae9e8cf78be.1722828421.git.haibo1.xu@intel.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/acpi_numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c index 0231482d6946..ff95aeebee3e 100644 --- a/arch/riscv/kernel/acpi_numa.c +++ b/arch/riscv/kernel/acpi_numa.c @@ -28,7 +28,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { From c42e2f076769c9c1bc5f3f0aa1c2032558e76647 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 9 Aug 2024 14:44:43 -0700 Subject: [PATCH 1002/1523] RISC-V: hwprobe: Add MISALIGNED_PERF key RISCV_HWPROBE_KEY_CPUPERF_0 was mistakenly flagged as a bitmask in hwprobe_key_is_bitmask(), when in reality it was an enum value. This causes problems when used in conjunction with RISCV_HWPROBE_WHICH_CPUS, since SLOW, FAST, and EMULATED have values whose bits overlap with each other. If the caller asked for the set of CPUs that was SLOW or EMULATED, the returned set would also include CPUs that were FAST. Introduce a new hwprobe key, RISCV_HWPROBE_KEY_MISALIGNED_PERF, which returns the same values in response to a direct query (with no flags), but is properly handled as an enumerated value. As a result, SLOW, FAST, and EMULATED are all correctly treated as distinct values under the new key when queried with the WHICH_CPUS flag. Leave the old key in place to avoid disturbing applications which may have already come to rely on the key, with or without its broken behavior with respect to the WHICH_CPUS flag. Fixes: e178bf146e4b ("RISC-V: hwprobe: Introduce which-cpus flag") Signed-off-by: Evan Green Reviewed-by: Charlie Jenkins Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20240809214444.3257596-2-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 20 +++++++++++++------- arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/kernel/sys_hwprobe.c | 1 + 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 3db60a0911df..a994eed75bde 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -239,8 +239,13 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was + mistakenly classified as a bitmask rather than a value. + +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing + the performance of misaligned scalar native word accesses on the selected set + of processors. * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned accesses is unknown. @@ -249,12 +254,13 @@ The following keys are defined: emulated via software, either in or below the kernel. These accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned native word + sized accesses are slower than the equivalent quantity of byte accesses. + Misaligned accesses may be supported directly in hardware, or trapped and + emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned native word + sized accesses are faster than the equivalent quantity of byte accesses. * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are not supported at all and will generate a misaligned address fault. diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index ef01c182af2b..ffb9484531af 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 8 +#define RISCV_HWPROBE_MAX_KEY 9 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index b706c8e47b02..635753084275 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -82,6 +82,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 #define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 +#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 8d1b5c35d2a7..2d0f4f6a32c3 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -225,6 +225,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; From 1f5288874de776412041022607513ffac74ae1a6 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 9 Aug 2024 14:44:44 -0700 Subject: [PATCH 1003/1523] RISC-V: hwprobe: Add SCALAR to misaligned perf defines In preparation for misaligned vector performance hwprobe keys, rename the hwprobe key values associated with misaligned scalar accesses to include the term SCALAR. Leave the old defines in place to maintain source compatibility. This change is intended to be a functional no-op. Signed-off-by: Evan Green Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240809214444.3257596-3-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 28 ++++++++++++---------- arch/riscv/include/uapi/asm/hwprobe.h | 5 ++++ arch/riscv/kernel/sys_hwprobe.c | 10 ++++---- arch/riscv/kernel/traps_misaligned.c | 6 ++--- arch/riscv/kernel/unaligned_access_speed.c | 12 +++++----- 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index a994eed75bde..85b709257918 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -247,23 +247,25 @@ The following keys are defined: the performance of misaligned scalar native word accesses on the selected set of processors. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of + misaligned scalar accesses is unknown. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar + accesses are emulated via software, either in or below the kernel. These + accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned native word - sized accesses are slower than the equivalent quantity of byte accesses. - Misaligned accesses may be supported directly in hardware, or trapped and - emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native + word sized accesses are slower than the equivalent quantity of byte + accesses. Misaligned accesses may be supported directly in hardware, or + trapped and emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned native word - sized accesses are faster than the equivalent quantity of byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native + word sized accesses are faster than the equivalent quantity of byte + accesses. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar + accesses are not supported at all and will generate a misaligned address + fault. * :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which represents the size of the Zicboz block in bytes. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 635753084275..1e153cda57db 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -83,6 +83,11 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 #define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 #define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 2d0f4f6a32c3..cea0ca2bf2a2 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -178,13 +178,13 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } @@ -192,12 +192,12 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus) { if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) - return RISCV_HWPROBE_MISALIGNED_FAST; + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) - return RISCV_HWPROBE_MISALIGNED_EMULATED; + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; - return RISCV_HWPROBE_MISALIGNED_SLOW; + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; } #endif diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b62d5a2f4541..192cd5603e95 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -338,7 +338,7 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; #endif if (!unaligned_enabled) @@ -532,13 +532,13 @@ static bool check_unaligned_access_emulated(int cpu) unsigned long tmp_var, tmp_val; bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index a9a6bcb02acf..160628a2116d 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -34,9 +34,9 @@ static int check_unaligned_access(void *param) struct page *page = param; void *dst; void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0; /* Make an unaligned destination buffer. */ @@ -95,14 +95,14 @@ static int check_unaligned_access(void *param) } if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; ratio = div_u64((byte_cycles * 100), word_cycles); pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", cpu, ratio / 100, ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; @@ -110,7 +110,7 @@ static int check_unaligned_access(void *param) * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions. */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) cpumask_set_cpu(cpu, &fast_misaligned_access); else cpumask_clear_cpu(cpu, &fast_misaligned_access); @@ -188,7 +188,7 @@ static int riscv_online_cpu(unsigned int cpu) static struct page *buf; /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); From d1a7b382a9d3f0f3e5a80e0be2991c075fa4f618 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 6 Aug 2024 16:43:24 +0100 Subject: [PATCH 1004/1523] netfilter: nfnetlink: Initialise extack before use in ACKs Add missing extack initialisation when ACKing BATCH_BEGIN and BATCH_END. Fixes: bf2ac490d28c ("netfilter: nfnetlink: Handle ACK flags for batch messages") Signed-off-by: Donald Hunter Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abf660c7baf..932b3ddb34f1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -427,8 +427,10 @@ replay_abort: nfnl_unlock(subsys_id); - if (nlh->nlmsg_flags & NLM_F_ACK) + if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); + } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -577,6 +579,7 @@ done: ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { From e9767137308daf906496613fd879808a07f006a2 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 6 Aug 2024 17:16:37 +0100 Subject: [PATCH 1005/1523] netfilter: flowtable: initialise extack before use Fix missing initialisation of extack in flow offload. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Donald Hunter Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index ff1a4e36c2b5..e06bc36f49fe 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; + struct netlink_ext_ack extack = {}; struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; From 7d8dc1c7be8d3509e8f5164dd5df64c8e34d7eeb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Aug 2024 21:28:41 +0200 Subject: [PATCH 1006/1523] netfilter: nf_queue: drop packets with cloned unconfirmed conntracks Conntrack assumes an unconfirmed entry (not yet committed to global hash table) has a refcount of 1 and is not visible to other cores. With multicast forwarding this assumption breaks down because such skbs get cloned after being picked up, i.e. ct->use refcount is > 1. Likewise, bridge netfilter will clone broad/mutlicast frames and all frames in case they need to be flood-forwarded during learning phase. For ip multicast forwarding or plain bridge flood-forward this will "work" because packets don't leave softirq and are implicitly serialized. With nfqueue this no longer holds true, the packets get queued and can be reinjected in arbitrary ways. Disable this feature, I see no other solution. After this patch, nfqueue cannot queue packets except the last multicast/broadcast packet. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 6 +++++- net/netfilter/nfnetlink_queue.c | 35 +++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 09f6a773a708..8f9c19d992ac 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv, if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; + if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + WARN_ON_ONCE(skb_shared(skb)); - WARN_ON_ONCE(refcount_read(&nfct->use) != 1); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 55e28e1da66e..e0716da256bf 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; - const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + struct nf_conn *ct = (void *)skb_nfct(entry->skb); + unsigned long status; + unsigned int use; - if (ct && ((ct->status & flags) == IPS_DYING)) + if (!ct) + return false; + + status = READ_ONCE(ct->status); + if ((status & flags) == IPS_DYING) return true; + + if (status & IPS_CONFIRMED) + return false; + + /* in some cases skb_clone() can occur after initial conntrack + * pickup, but conntrack assumes exclusive skb->_nfct ownership for + * unconfirmed entries. + * + * This happens for br_netfilter and with ip multicast routing. + * We can't be solved with serialization here because one clone could + * have been queued for local delivery. + */ + use = refcount_read(&ct->ct_general.use); + if (likely(use == 1)) + return false; + + /* Can't decrement further? Exclusive ownership. */ + if (!refcount_dec_not_one(&ct->ct_general.use)) + return false; + + skb_set_nfct(entry->skb, 0); + /* No nf_ct_put(): we already decremented .use and it cannot + * drop down to 0. + */ + return true; #endif return false; } From ea2306f0330c59ac8cd6ba13193497f0a6a02684 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 8 Aug 2024 23:14:43 +0200 Subject: [PATCH 1007/1523] selftests: netfilter: add test for br_netfilter+conntrack+queue combination Trigger cloned skbs leaving softirq protection. This triggers splat without the preceeding change ("netfilter: nf_queue: drop packets with cloned unconfirmed conntracks"): WARNING: at net/netfilter/nf_conntrack_core.c:1198 __nf_conntrack_confirm.. because local delivery and forwarding will race for confirmation. Based on a reproducer script from Yi Chen. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../testing/selftests/net/netfilter/Makefile | 1 + .../net/netfilter/br_netfilter_queue.sh | 78 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100755 tools/testing/selftests/net/netfilter/br_netfilter_queue.sh diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f92..d13fb5ea3e89 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 000000000000..6a764d70ab06 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin < /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + exit 1 +fi + +exit 0 From e0b6648b0446e59522819c75ba1dcb09e68d3e94 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 9 Aug 2024 15:07:30 +0200 Subject: [PATCH 1008/1523] netfilter: nf_tables: Audit log dump reset after the fact In theory, dumpreset may fail and invalidate the preceeding log message. Fix this and use the occasion to prepare for object reset locking, which benefits from a few unrelated changes: * Add an early call to nfnetlink_unicast if not resetting which effectively skips the audit logging but also unindents it. * Extract the table's name from the netlink attribute (which is verified via earlier table lookup) to not rely upon validity of the looked up table pointer. * Do not use local variable family, it will vanish. Fixes: 8e6cf365e1d5 ("audit: log nftables configuration change events") Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 481ee78e77bc..4fa132715fcc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8055,6 +8055,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { + const struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; @@ -8064,6 +8065,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct sk_buff *skb2; bool reset = false; u32 objtype; + char *buf; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { @@ -8102,27 +8104,23 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) reset = true; - if (reset) { - const struct nftables_pernet *nft_net; - char *buf; - - nft_net = nft_pernet(net); - buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); - - audit_log_nfcfg(buf, - family, - 1, - AUDIT_NFT_OP_OBJ_RESET, - GFP_ATOMIC); - kfree(buf); - } - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); if (err < 0) goto err_fill_obj_info; + if (!reset) + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_obj_info: From 69fc3e9e90f1afc11f4015e6b75d18ab9acee348 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 9 Aug 2024 15:07:31 +0200 Subject: [PATCH 1009/1523] netfilter: nf_tables: Introduce nf_tables_getobj_single Outsource the reply skb preparation for non-dump getrule requests into a distinct function. Prep work for object reset locking. Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 85 ++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4fa132715fcc..c12c9cae784d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8052,10 +8052,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { - const struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; @@ -8063,11 +8063,51 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; - bool reset = false; u32 objtype; - char *buf; int err; + if (!nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_TYPE]) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); + return ERR_CAST(table); + } + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); + return ERR_CAST(obj); + } + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb2) + return ERR_PTR(-ENOMEM); + + err = nf_tables_fill_obj_info(skb2, net, portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } + + return skb2; +} + +static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + bool reset = false; + char *buf; + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_obj_start, @@ -8080,35 +8120,12 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - if (!nla[NFTA_OBJ_NAME] || - !nla[NFTA_OBJ_TYPE]) - return -EINVAL; - - table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); - if (IS_ERR(table)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); - return PTR_ERR(table); - } - - objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); - if (IS_ERR(obj)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); - return PTR_ERR(obj); - } - - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) reset = true; - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, - info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj, reset); - if (err < 0) - goto err_fill_obj_info; + skb2 = nf_tables_getobj_single(portid, info, nla, reset); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); if (!reset) return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); @@ -8121,11 +8138,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); kfree(buf); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); - -err_fill_obj_info: - kfree_skb(skb2); - return err; + return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) From bd662c4218f9648e888bebde9468146965f3f8a0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 9 Aug 2024 15:07:32 +0200 Subject: [PATCH 1010/1523] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests Objects' dump callbacks are not concurrency-safe per-se with reset bit set. If two CPUs perform a reset at the same time, at least counter and quota objects suffer from value underrun. Prevent this by introducing dedicated locking callbacks for nfnetlink and the asynchronous dump handling to serialize access. Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects") Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 74 ++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c12c9cae784d..0a2f79346958 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8020,6 +8020,19 @@ cont: return skb->len; } +static int nf_tables_dumpreset_obj(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_obj(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) +{ + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_obj_start(cb); +} + static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8101,12 +8120,8 @@ nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - struct nftables_pernet *nft_net = nft_pernet(info->net); u32 portid = NETLINK_CB(skb).portid; - struct net *net = info->net; struct sk_buff *skb2; - bool reset = false; - char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -8120,15 +8135,46 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; - - skb2 = nf_tables_getobj_single(portid, info, nla, reset); + skb2 = nf_tables_getobj_single(portid, info, nla, false); if (IS_ERR(skb2)) return PTR_ERR(skb2); - if (!reset) - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, info->net, portid); +} + +static int nf_tables_getobj_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dumpreset_obj_start, + .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getobj_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_OBJ_TABLE]), @@ -9421,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, From 14d069d92951a3e150c0a81f2ca3b93e54da913b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 13 Aug 2024 09:12:53 -0700 Subject: [PATCH 1011/1523] i2c: tegra: Do not mark ACPI devices as irq safe On ACPI machines, the tegra i2c module encounters an issue due to a mutex being called inside a spinlock. This leads to the following bug: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585 ... Call trace: __might_sleep __mutex_lock_common mutex_lock_nested acpi_subsys_runtime_resume rpm_resume tegra_i2c_xfer The problem arises because during __pm_runtime_resume(), the spinlock &dev->power.lock is acquired before rpm_resume() is called. Later, rpm_resume() invokes acpi_subsys_runtime_resume(), which relies on mutexes, triggering the error. To address this issue, devices on ACPI are now marked as not IRQ-safe, considering the dependency of acpi_subsys_runtime_resume() on mutexes. Fixes: bd2fdedbf2ba ("i2c: tegra: Add the ACPI support") Cc: # v5.17+ Co-developed-by: Michael van der Westhuizen Signed-off-by: Michael van der Westhuizen Signed-off-by: Breno Leitao Reviewed-by: Dmitry Osipenko Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558d..1df5b4204142 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); From 6c569b77f0300f8a9960277c7094fa0f128eb811 Mon Sep 17 00:00:00 2001 From: Abhinav Jain Date: Wed, 14 Aug 2024 13:37:43 +0530 Subject: [PATCH 1012/1523] selftest: af_unix: Fix kselftest compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change expected_buf from (const void *) to (const char *) in function __recvpair(). This change fixes the below warnings during test compilation: ``` In file included from msg_oob.c:14: msg_oob.c: In function ‘__recvpair’: ../../kselftest_harness.h:106:40: warning: format ‘%s’ expects argument of type ‘char *’,but argument 6 has type ‘const void *’ [-Wformat=] ../../kselftest_harness.h:101:17: note: in expansion of macro ‘__TH_LOG’ msg_oob.c:235:17: note: in expansion of macro ‘TH_LOG’ ../../kselftest_harness.h:106:40: warning: format ‘%s’ expects argument of type ‘char *’,but argument 6 has type ‘const void *’ [-Wformat=] ../../kselftest_harness.h:101:17: note: in expansion of macro ‘__TH_LOG’ msg_oob.c:259:25: note: in expansion of macro ‘TH_LOG’ ``` Fixes: d098d77232c3 ("selftest: af_unix: Add msg_oob.c.") Signed-off-by: Abhinav Jain Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240814080743.1156166-1-jain.abhinav177@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/msg_oob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaeb..535eb2c3d7d1 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, + const char *expected_buf, int expected_len, int buf_len, int flags) { int i, ret[2], recv_errno[2], expected_errno = 0; From 0863bffda1131fd2fa9c05b653ad9ee3d8db127e Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Wed, 14 Aug 2024 13:17:47 +0200 Subject: [PATCH 1013/1523] Revert "serial: 8250_omap: Set the console genpd always on if no console suspend" This reverts commit 68e6939ea9ec3d6579eadeab16060339cdeaf940. Kevin reported that this causes a crash during suspend on platforms that dont use PM domains. Link: https://lore.kernel.org/r/7ha5hgpchq.fsf@baylibre.com Cc: Thomas Richard Fixes: 68e6939ea9ec ("serial: 8250_omap: Set the console genpd always on if no console suspend") Cc: stable Reported-by: Kevin Hilman Signed-off-by: Griffin Kroah-Hartman Link: https://lore.kernel.org/r/20240814111747.82371-1-griffin@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 33 +++++------------------------ 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c65..afef1dd4ddf4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); From f75c235565f90c4a17b125e47f1c68ef6b8c2bce Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 14 Aug 2024 02:09:53 -0700 Subject: [PATCH 1014/1523] arm64: Fix KASAN random tag seed initialization Currently, kasan_init_sw_tags() is called before setup_per_cpu_areas(), so per_cpu(prng_state, cpu) accesses the same address regardless of the value of "cpu", and the same seed value gets copied to the percpu area for every CPU. Fix this by moving the call to smp_prepare_boot_cpu(), which is the first architecture hook after setup_per_cpu_areas(). Fixes: 3c9e3aa11094 ("kasan: add tag related helper functions") Fixes: 3f41b6093823 ("kasan: fix random seed generation for tag-based mode") Signed-off-by: Samuel Holland Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20240814091005.969756-1-samuel.holland@sifive.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 3 --- arch/arm64/kernel/smp.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044..b22d28ec8028 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a2..f01f0fd7b7fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* From 69139d2919dd4aa9a553c8245e7c63e82613e3fc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 11 Aug 2024 19:21:53 -0700 Subject: [PATCH 1015/1523] vsock: fix recursive ->recvmsg calls After a vsock socket has been added to a BPF sockmap, its prot->recvmsg has been replaced with vsock_bpf_recvmsg(). Thus the following recursiion could happen: vsock_bpf_recvmsg() -> __vsock_recvmsg() -> vsock_connectible_recvmsg() -> prot->recvmsg() -> vsock_bpf_recvmsg() again We need to fix it by calling the original ->recvmsg() without any BPF sockmap logic in __vsock_recvmsg(). Fixes: 634f1a7110b4 ("vsock: support sockmap") Reported-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com Tested-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com Cc: Bobby Eshleman Cc: Michael S. Tsirkin Cc: Stefano Garzarella Signed-off-by: Cong Wang Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20240812022153.86512-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 4 ++++ net/vmw_vsock/af_vsock.c | 50 +++++++++++++++++++++++---------------- net/vmw_vsock/vsock_bpf.c | 4 ++-- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5..24d970f7a4fa 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -230,8 +230,12 @@ struct vsock_tap { int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); +int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4b040285aa78..0ff9b2dd86ba 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1270,25 +1270,28 @@ out: return err; } +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + return vsk->transport->dgram_dequeue(vsk, msg, len, flags); +} + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; const struct proto *prot; -#endif - struct vsock_sock *vsk; - struct sock *sk; - sk = sock->sk; - vsk = vsock_sk(sk); - -#ifdef CONFIG_BPF_SYSCALL prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif - return vsk->transport->dgram_dequeue(vsk, msg, len, flags); + return __vsock_dgram_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); @@ -2174,15 +2177,12 @@ out: } int -vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; -#ifdef CONFIG_BPF_SYSCALL - const struct proto *prot; -#endif int err; sk = sock->sk; @@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto out; } -#ifdef CONFIG_BPF_SYSCALL - prot = READ_ONCE(sk->sk_prot); - if (prot != &vsock_proto) { - release_sock(sk); - return prot->recvmsg(sk, msg, len, flags, NULL); - } -#endif - if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else @@ -2250,6 +2242,22 @@ out: release_sock(sk); return err; } + +int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ +#ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; + const struct proto *prot; + + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); +#endif + + return __vsock_connectible_recvmsg(sock, msg, len, flags); +} EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index a3c97546ab84..c42c5cc18f32 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int int err; if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) - err = vsock_connectible_recvmsg(sock, msg, len, flags); + err = __vsock_connectible_recvmsg(sock, msg, len, flags); else if (sk->sk_type == SOCK_DGRAM) - err = vsock_dgram_recvmsg(sock, msg, len, flags); + err = __vsock_dgram_recvmsg(sock, msg, len, flags); else err = -EPROTOTYPE; From fde25c20f51807db340b875953cfd1cedaa392fc Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Mon, 12 Aug 2024 17:08:24 +0300 Subject: [PATCH 1016/1523] net: ethtool: Allow write mechanism of LPL and both LPL and EPL CMIS 5.2 standard section 9.4.2 defines four types of firmware update supported mechanism: None, only LPL, only EPL, both LPL and EPL. Currently, only LPL (Local Payload) type of write firmware block is supported. However, if the module supports both LPL and EPL the flashing process wrongly fails for no supporting LPL. Fix that, by allowing the write mechanism to be LPL or both LPL and EPL. Fixes: c4f78134d45c ("ethtool: cmis_fw_update: add a layer for supporting firmware update using CDB") Reported-by: Vladyslav Mykhaliuk Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Link: https://patch.msgid.link/20240812140824.3718826-1-danieller@nvidia.com Signed-off-by: Paolo Abeni --- net/ethtool/cmis_fw_update.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index ae4b4b28a601..655ff5224ffa 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -35,7 +35,10 @@ struct cmis_cdb_fw_mng_features_rpl { __be16 resv7; }; -#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 +enum cmis_cdb_fw_write_mechanism { + CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01, + CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, +}; static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, @@ -64,7 +67,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, } rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; - if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL || + rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) { ethnl_module_fw_flash_ntf_err(dev, ntf_params, "Write LPL is not supported", NULL); From 1f1b194284093d619c9fbc7e9e38b2c68d0408e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 12 Aug 2024 15:13:22 +0100 Subject: [PATCH 1017/1523] net: thunder_bgx: Fix netdev structure allocation Commit 94833addfaba ("net: thunderx: Unembed netdev structure") had a go at dynamically allocating the netdev structures for the thunderx_bgx driver. This change results in my ThunderX box catching fire (to be fair, it is what it does best). The issues with this change are that: - bgx_lmac_enable() is called *after* bgx_acpi_register_phy() and bgx_init_of_phy(), both expecting netdev to be a valid pointer. - bgx_init_of_phy() populates the MAC addresses for *all* LMACs attached to a given BGX instance, and thus needs netdev for each of them to have been allocated. There is a few things to be said about how the driver mixes LMAC and BGX states which leads to this sorry state, but that's beside the point. To address this, go back to a situation where all netdev structures are allocated before the driver starts relying on them, and move the freeing of these structures to driver removal. Someone brave enough can always go and restructure the driver if they want. Fixes: 94833addfaba ("net: thunderx: Unembed netdev structure") Signed-off-by: Marc Zyngier Cc: Breno Leitao Cc: Sunil Goutham Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Reviewed-by: Simon Horman Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20240812141322.1742918-1-maz@kernel.org Signed-off-by: Paolo Abeni --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index a40c266c37f2..608cc6af5af1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1054,18 +1054,12 @@ static int phy_interface_mode(u8 lmac_type) static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { - struct lmac *lmac, **priv; + struct lmac *lmac; u64 cfg; lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - lmac->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); - if (!lmac->netdev) - return -ENOMEM; - priv = netdev_priv(lmac->netdev); - *priv = lmac; - if ((lmac->lmac_type == BGX_MODE_SGMII) || (lmac->lmac_type == BGX_MODE_QSGMII) || (lmac->lmac_type == BGX_MODE_RGMII)) { @@ -1191,7 +1185,6 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); - free_netdev(lmac->netdev); lmac->phydev = NULL; } @@ -1653,6 +1646,23 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_get_qlm_mode(bgx); + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + struct lmac *lmacp, **priv; + + lmacp = &bgx->lmac[lmac]; + lmacp->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); + + if (!lmacp->netdev) { + for (int i = 0; i < lmac; i++) + free_netdev(bgx->lmac[i].netdev); + err = -ENOMEM; + goto err_enable; + } + + priv = netdev_priv(lmacp->netdev); + *priv = lmacp; + } + err = bgx_init_phy(bgx); if (err) goto err_enable; @@ -1692,8 +1702,10 @@ static void bgx_remove(struct pci_dev *pdev) u8 lmac; /* Disable all LMACs */ - for (lmac = 0; lmac < bgx->lmac_count; lmac++) + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { bgx_lmac_disable(bgx, lmac); + free_netdev(bgx->lmac[lmac].netdev); + } pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); From fd45cc614b8acca5bb435ba37fe9b3f9a17fab84 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Mon, 5 Aug 2024 13:08:56 +0200 Subject: [PATCH 1018/1523] drm/rockchip: inno-hdmi: Fix infoframe upload HDMI analyser shows that the AVI infoframe is no being longer send. The switch to the HDMI connector api should have used the frame content which is now given in the buffer parameter, but instead still uses the (now) empty and superfluous packed_frame variable. Fix it. Fixes: 65548c8ff0ab ("drm/rockchip: inno_hdmi: Switch to HDMI connector") Signed-off-by: Alex Bee Acked-by: Maxime Ripard Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240805110855.274140-2-knaerzche@gmail.com --- drivers/gpu/drm/rockchip/inno_hdmi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a2946..dec6913cec5b 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } From 52dd070c62e4ae2b5e7411b920e3f7a64235ecfb Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 14 Aug 2024 20:47:40 +0800 Subject: [PATCH 1019/1523] pmdomain: imx: wait SSAR when i.MX93 power domain on With "quiet" set in bootargs, there is power domain failure: "imx93_power_domain 44462400.power-domain: pd_off timeout: name: 44462400.power-domain, stat: 4" The current power on opertation takes ISO state as power on finished flag, but it is wrong. Before powering on operation really finishes, powering off comes and powering off will never finish because the last powering on still not finishes, so the following powering off actually not trigger hardware state machine to run. SSAR is the last step when powering on a domain, so need to wait SSAR done when powering on. Since EdgeLock Enclave(ELE) handshake is involved in the flow, enlarge the waiting time to 10ms for both on and off to avoid timeout. Cc: stable@vger.kernel.org Fixes: 0a0f7cc25d4a ("soc: imx: add i.MX93 SRC power domain driver") Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20240814124740.2778952-1-peng.fan@oss.nxp.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/imx93-pd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pmdomain/imx/imx93-pd.c b/drivers/pmdomain/imx/imx93-pd.c index 1e94b499c19b..d750a7dc58d2 100644 --- a/drivers/pmdomain/imx/imx93-pd.c +++ b/drivers/pmdomain/imx/imx93-pd.c @@ -20,6 +20,7 @@ #define FUNC_STAT_PSW_STAT_MASK BIT(0) #define FUNC_STAT_RST_STAT_MASK BIT(2) #define FUNC_STAT_ISO_STAT_MASK BIT(4) +#define FUNC_STAT_SSAR_STAT_MASK BIT(8) struct imx93_power_domain { struct generic_pm_domain genpd; @@ -50,7 +51,7 @@ static int imx93_pd_on(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - !(val & FUNC_STAT_ISO_STAT_MASK), 1, 10000); + !(val & FUNC_STAT_SSAR_STAT_MASK), 1, 10000); if (ret) { dev_err(domain->dev, "pd_on timeout: name: %s, stat: %x\n", genpd->name, val); return ret; @@ -72,7 +73,7 @@ static int imx93_pd_off(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - val & FUNC_STAT_PSW_STAT_MASK, 1, 1000); + val & FUNC_STAT_PSW_STAT_MASK, 1, 10000); if (ret) { dev_err(domain->dev, "pd_off timeout: name: %s, stat: %x\n", genpd->name, val); return ret; From cdc90f75387c42d64a0ed1ba03550ea9447249d4 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 13 Aug 2024 09:37:19 +0200 Subject: [PATCH 1020/1523] pse-core: Conditionally set current limit during PI regulator registration Fix an issue where `devm_regulator_register()` would fail for PSE controllers that do not support current limit control, such as simple GPIO-based controllers like the podl-pse-regulator. The `REGULATOR_CHANGE_CURRENT` flag and `max_uA` constraint are now conditionally set only if the `pi_set_current_limit` operation is supported. This change prevents the regulator registration routine from attempting to call `pse_pi_set_current_limit()`, which would return `-EOPNOTSUPP` and cause the registration to fail. Fixes: 4a83abcef5f4f ("net: pse-pd: Add new power limit get and set c33 features") Signed-off-by: Oleksij Rempel Reviewed-by: Kory Maincent Tested-by: Kyle Swenson Link: https://patch.msgid.link/20240813073719.2304633-1-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- drivers/net/pse-pd/pse_core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index ec20953e0f82..4f032b16a8a0 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -401,9 +401,14 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, rdesc->ops = &pse_pi_ops; rdesc->owner = pcdev->owner; - rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS | - REGULATOR_CHANGE_CURRENT; - rinit_data->constraints.max_uA = MAX_PI_CURRENT; + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + + if (pcdev->ops->pi_set_current_limit) { + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; + rinit_data->constraints.max_uA = MAX_PI_CURRENT; + } + rinit_data->supply_regulator = "vpwr"; rconfig.dev = pcdev->dev; From 7965a7f32a53d9ad807ce2c53bdda69ba104974f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 13 Aug 2024 15:39:34 +0200 Subject: [PATCH 1021/1523] selftests: net: lib: kill PIDs before del netns When deleting netns, it is possible to still have some tasks running, e.g. background tasks like tcpdump running in the background, not stopped because the test has been interrupted. Before deleting the netns, it is then safer to kill all attached PIDs, if any. That should reduce some noises after the end of some tests, and help with the debugging of some issues. That's why this modification is seen as a "fix". Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Acked-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Acked-by: Florian Westphal Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/20240813-upstream-net-20240813-selftests-net-lib-kill-v1-1-27b689b248b8@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/lib.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f773..8ee4489238ca 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -146,6 +146,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" From 8445d9d3c03101859663d34fda747f6a50947556 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 13 Aug 2024 22:10:20 +0800 Subject: [PATCH 1022/1523] net: hns3: fix wrong use of semaphore up Currently, if hns3 PF or VF FLR reset failed after five times retry, the reset done process will directly release the semaphore which has already released in hclge_reset_prepare_general. This will cause down operation fail. So this patch fixes it by adding reset state judgement. The up operation is only called after successful PF FLR reset. Fixes: 8627bdedc435 ("net: hns3: refactor the precedure of PF FLR") Fixes: f28368bb4542 ("net: hns3: refactor the procedure of VF FLR") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++-- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 82574ce0194f..125e04434611 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11516,8 +11516,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static void hclge_clear_resetting_state(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3735d2fed11f..094a7c7b5592 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) From 30545e17eac1f50c5ef49644daf6af205100a965 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 13 Aug 2024 22:10:21 +0800 Subject: [PATCH 1023/1523] net: hns3: use the user's cfg after reset Consider the followed case that the user change speed and reset the net interface. Before the hw change speed successfully, the driver get old old speed from hw by timer task. After reset, the previous speed is config to hw. As a result, the new speed is configed successfully but lost after PF reset. The followed pictured shows more dirrectly. +------+ +----+ +----+ | USER | | PF | | HW | +---+--+ +-+--+ +-+--+ | ethtool -s 100G | | +------------------>| set speed 100G | | +--------------------->| | | set successfully | | |<---------------------+---+ | |query cfg (timer task)| | | +--------------------->| | handle speed | | return 200G | | changing event | ethtool --reset |<---------------------+ | (100G) +------------------>| cfg previous speed |<--+ | | after reset (200G) | | +--------------------->| | | +---+ | |query cfg (timer task)| | | +--------------------->| | handle speed | | return 100G | | changing event | |<---------------------+ | (200G) | | |<--+ | |query cfg (timer task)| | +--------------------->| | | return 200G | | |<---------------------+ | | | v v v This patch save new speed if hw change speed successfully, which will be used after reset successfully. Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- .../hisilicon/hns3/hns3pf/hclge_main.c | 24 ++++++++++++++----- .../hisilicon/hns3/hns3pf/hclge_mdio.c | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 125e04434611..465f0d582283 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2653,8 +2653,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int ret; - return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + + if (ret) + return ret; + + hdev->hw.mac.req_speed = speed; + hdev->hw.mac.req_duplex = duplex; + + return 0; } static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) @@ -2956,17 +2965,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) hdev->hw.mac.duplex = HCLGE_MAC_FULL; - ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, - hdev->hw.mac.duplex, hdev->hw.mac.lane_num); - if (ret) - return ret; - if (hdev->hw.mac.support_autoneg) { ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); if (ret) return ret; } + if (!hdev->hw.mac.autoneg) { + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed, + hdev->hw.mac.req_duplex, + hdev->hw.mac.lane_num); + if (ret) + return ret; + } + mac->link = 0; if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 85fb11de43a1..80079657afeb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev) if (ret) netdev_err(netdev, "failed to adjust link.\n"); + hdev->hw.mac.req_speed = (u32)speed; + hdev->hw.mac.req_duplex = (u8)duplex; + ret = hclge_cfg_flowctrl(hdev); if (ret) netdev_err(netdev, "failed to configure flow control.\n"); From be5e816d00a506719e9dbb1a9c861c5ced30a109 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 13 Aug 2024 22:10:22 +0800 Subject: [PATCH 1024/1523] net: hns3: fix a deadlock problem when config TC during resetting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When config TC during the reset process, may cause a deadlock, the flow is as below: pf reset start │ ▼ ...... setup tc │ │ ▼ ▼ DOWN: napi_disable() napi_disable()(skip) │ │ │ ▼ ▼ ...... ...... │ │ ▼ │ napi_enable() │ ▼ UINIT: netif_napi_del() │ ▼ ...... │ ▼ INIT: netif_napi_add() │ ▼ ...... global reset start │ │ ▼ ▼ UP: napi_enable()(skip) ...... │ │ ▼ ▼ ...... napi_disable() In reset process, the driver will DOWN the port and then UINIT, in this case, the setup tc process will UP the port before UINIT, so cause the problem. Adds a DOWN process in UINIT to fix it. Fixes: bb6b94a896d4 ("net: hns3: Add reset interface implementation in client") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a5fc0209d628..4cbc4d069a1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + hns3_nic_net_stop(netdev); + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); return 0; From 86db7bfb06704ef17340eeae71c832f21cfce35c Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 13 Aug 2024 22:10:23 +0800 Subject: [PATCH 1025/1523] net: hns3: void array out of bound when loop tnl_num When query reg inf of SSU, it loops tnl_num times. However, tnl_num comes from hardware and the length of array is a fixed value. To void array out of bound, make sure the loop time is not greater than the length of array Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index e132c2f09560..cc7f46c0b35f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1598,8 +1598,7 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) { u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; struct hclge_mod_reg_common_msg msg; - u8 i, j, num; - u32 loop_time; + u8 i, j, num, loop_time; num = ARRAY_SIZE(hclge_ssu_reg_common_msg); for (i = 0; i < num; i++) { @@ -1609,7 +1608,8 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) loop_time = 1; loop_para[0] = 0; if (msg.need_para) { - loop_time = hdev->ae_dev->dev_specs.tnl_num; + loop_time = min(hdev->ae_dev->dev_specs.tnl_num, + HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE); for (j = 0; j < loop_time; j++) loop_para[j] = j + 1; } From 7660833d217528c8f2385528951ab820a031e4e3 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 13 Aug 2024 22:10:24 +0800 Subject: [PATCH 1026/1523] net: hns3: use correct release function during uninitialization pci_request_regions is called to apply for PCI I/O and memory resources when the driver is initialized, Therefore, when the driver is uninstalled, pci_release_regions should be used to release PCI I/O and memory resources instead of pci_release_mem_regions is used to release memory reasouces only. Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 465f0d582283..6c33195a1168 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11456,7 +11456,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_free_irq_vectors(pdev); - pci_release_mem_regions(pdev); + pci_release_regions(pdev); pci_disable_device(pdev); } From ddeb7989a98faf8da67ac613731a0eee32667b7d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 17 Jul 2024 07:04:28 -0700 Subject: [PATCH 1027/1523] drm/xe: Validate user fence during creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fail invalid addresses during user fence creation. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240717140429.1396820-1-matthew.brost@intel.com (cherry picked from commit 0fde907da2d5fd4da68845e96c6842497159c858) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index c4e018aa2982..e8d31e010860 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; From e98a032c0340d45c199f4eb536359f5762a8748f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 18 Jul 2024 14:05:45 -0700 Subject: [PATCH 1028/1523] drm/xe: Move part of xe_file cleanup to a helper In order to make xe_file ref counted, move destruction of xe_file members to a helper. v2: Move xe_vm_close_and_put back into xe_file_close (Matt) Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240718210548.3580382-2-umesh.nerlige.ramappa@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 3d0c4a62cc553c6ffde4cb11620eba991e770665) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 76109415eba6..452dbc1b495b 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -90,9 +90,25 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) return 0; } +static void xe_file_destroy(struct xe_file *xef) +{ + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { - struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = file->driver_priv; struct xe_vm *vm; struct xe_exec_queue *q; @@ -108,21 +124,12 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); - - xe_drm_client_put(xef->client); - kfree(xef); + xe_file_destroy(xef); } static const struct drm_ioctl_desc xe_ioctls[] = { From d28bb0120f360e772458a7cf295d6d0ae3dc18a4 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 18 Jul 2024 14:05:46 -0700 Subject: [PATCH 1029/1523] drm/xe: Add ref counting for xe_file Add ref counting for xe_file. v2: - Add kernel doc for exported functions (Matt) - Instead of xe_file_destroy, export the get/put helpers (Lucas) v3: Fixup the kernel-doc format and description (Matt, Lucas) Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240718210548.3580382-3-umesh.nerlige.ramappa@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit ce8c161cbad43f4056451e541f7ae3471d0cca12) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 33 ++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_device.h | 3 +++ drivers/gpu/drm/xe/xe_device_types.h | 3 +++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 452dbc1b495b..4178bd05701e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -87,11 +87,14 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } -static void xe_file_destroy(struct xe_file *xef) +static void xe_file_destroy(struct kref *ref) { + struct xe_file *xef = container_of(ref, struct xe_file, refcount); struct xe_device *xe = xef->xe; xa_destroy(&xef->exec_queue.xa); @@ -107,6 +110,32 @@ static void xe_file_destroy(struct xe_file *xef) kfree(xef); } +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_file *xef = file->driver_priv; @@ -129,7 +158,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xe_file_destroy(xef); + xe_file_put(xef); } static const struct drm_ioctl_desc xe_ioctls[] = { diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb..b3952718b3c1 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744..cbc582bcc90a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -566,6 +566,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif From 6309f9b1fc4de2daa1293fe12a488d765e60507d Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 18 Jul 2024 14:05:47 -0700 Subject: [PATCH 1030/1523] drm/xe: Take a ref to xe file when user creates a VM Take a reference to xef when user creates the VM and put the reference when user destroys the VM. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240718210548.3580382-4-umesh.nerlige.ramappa@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a2387e69493df3de706f14e4573ee123d23d5d34) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684..6bfcbd4e778a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) From 817c70e2ba278e9d5360833b1137ef8855ac1728 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 18 Jul 2024 14:05:48 -0700 Subject: [PATCH 1031/1523] drm/xe: Fix use after free when client stats are captured xe_file_close triggers an asynchronous queue cleanup and then frees up the xef object. Since queue cleanup flushes all pending jobs and the KMD stores client usage stats into the xef object after jobs are flushed, we see a use-after-free for the xef object. Resolve this by taking a reference to xef from xe_exec_queue. While at it, revert an earlier change that contained a partial work around for this issue. v2: - Take a ref to xef even for the VM bind queue (Matt) - Squash patches relevant to that fix and work around (Lucas) v3: Fix typo (Lucas) Fixes: ce62827bc294 ("drm/xe: Do not access xe file when updating exec queue run_ticks") Fixes: 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1908 Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240718210548.3580382-5-umesh.nerlige.ramappa@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 2149ded63079449b8dddf9da38392632f155e6b5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_drm_client.c | 5 +---- drivers/gpu/drm/xe/xe_exec_queue.c | 10 +++++++++- drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 +++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e..7ddd59908334 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b..a39384bb9553 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c3..a35ce24c9798 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -139,10 +142,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; From 64da63cd3f7d771bf8f240e72203da1f72aa3728 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 18 Jul 2024 22:31:55 +0200 Subject: [PATCH 1032/1523] drm/xe/vf: Fix register value lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should use the number of actual entries stored in the runtime register buffer, not the maximum number of entries that this buffer can hold, otherwise bsearch() may fail and we may miss the data and wrongly report unexpected access to some registers. Fixes: 4edadc41a3a4 ("drm/xe/vf: Use register values obtained from the PF") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240718203155.486-1-michal.wajdeczko@intel.com (cherry picked from commit ad16682db18f4414e53bba1ce0db75b08bdc4dff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e..8892d6c2291e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } From 55ea73aacfb9a92def840a7110a468c5a76caeb5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 19 Jul 2024 10:29:05 -0700 Subject: [PATCH 1033/1523] drm/xe: Build PM into GuC CT layer Take PM ref when any G2H are outstanding, drop when none are outstanding. To safely ensure we have PM ref when in the GuC CT layer, a PM ref needs to be held when scheduler messages are pending too. v2: - Add outer PM protections to xe_file_close (CI) v3: - Only take PM ref 0->1 and drop on 1->0 (Matthew Auld) v4: - Add assert to G2H increment function v5: - Rebase v6: - Declare xe as local variable in xe_file_close (CI) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Auld Cc: Rodrigo Vivi Cc: Nirmoy Das Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240719172905.1527927-5-matthew.brost@intel.com (cherry picked from commit d930c19fdff3109e97b610fa10943b7602efcabd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 5 +++++ drivers/gpu/drm/xe/xe_guc_ct.c | 10 +++++++++- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 4178bd05701e..f2f1d8ddb221 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -138,11 +138,14 @@ void xe_file_put(struct xe_file *xef) static void xe_file_close(struct drm_device *dev, struct drm_file *file) { + struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = file->driver_priv; struct xe_vm *vm; struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -159,6 +162,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&xef->vm.lock); xe_file_put(xef); + + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d8..64afc90ad2c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8d7e7f4bbff7..6398629e6b4e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; From 4f7652dcd339aca6678084d42fda999ecb19b624 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 6 Aug 2024 20:05:16 +0200 Subject: [PATCH 1034/1523] drm/xe/pf: Fix VF config validation on multi-GT platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When validating VF config on the media GT, we may wrongly report that VF is already partially configured on it, as we consider GGTT and LMEM provisioning done on the primary GT (since both GGTT and LMEM are tile-level resources, not a GT-level). This will cause skipping a VF auto-provisioning on the media-GT and in result will block a VF from successfully initialize that GT. Fix that by considering GGTT and LMEM configurations only when checking if a VF provisioning is complete, and omit GGTT and LMEM when reporting empty/partial provisioning. Fixes: 234670cea9a2 ("drm/xe/pf: Skip fair VFs provisioning if already provisioned") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240806180516.618-1-michal.wajdeczko@intel.com (cherry picked from commit 5bdacb0907c1f531995b6ba47b832ac3a0182ae9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 4699b7836001..b6f0a7299c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } From 90be4cc6f7674a1478c4c750beeee3edd14aee38 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 19 Jul 2024 10:29:02 -0700 Subject: [PATCH 1035/1523] drm/xe: Add xe_gt_tlb_invalidation_fence_init helper Other layers should not be touching struct xe_gt_tlb_invalidation_fence directly, add helper for initialization. v2: - Add dma_fence_get and list init to xe_gt_tlb_invalidation_fence_init Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240719172905.1527927-2-matthew.brost@intel.com (cherry picked from commit a522b285c6b4b611406d59612a8d7241714d2e31) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 36 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 ++ drivers/gpu/drm/xe/xe_pt.c | 26 +-------------- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8b..92a18a0e4acd 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -508,3 +508,39 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * + * Initialize TLB invalidation fence for use + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + dma_fence_get(&fence->base); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985..948f4a2f5214 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -26,4 +26,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence); + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0ad..4c17a1ec8f8b 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base); - INIT_LIST_HEAD(&ifence->base.link); - - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; From 58bfe6674467f4c037e89111e6007f25b34d8bb3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 19 Jul 2024 10:29:03 -0700 Subject: [PATCH 1036/1523] drm/xe: Drop xe_gt_tlb_invalidation_wait Having two methods to wait on GT TLB invalidations is not ideal. Remove xe_gt_tlb_invalidation_wait and only use GT TLB invalidation fences. In addition to two methods being less than ideal, once GT TLB invalidations are coalesced the seqno cannot be assigned during xe_gt_tlb_invalidation_ggtt/range. Thus xe_gt_tlb_invalidation_wait would not have a seqno to wait one. A fence however can be armed and later signaled. v3: - Add explaination about coalescing to commit message v4: - Don't put dma fence if defined on stack (CI) v5: - Initialize ret to zero (CI) v6: - Use invalidation_fence_signal helper in tlb timeout (Matthew Auld) Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240719172905.1527927-3-matthew.brost@intel.com (cherry picked from commit 61ac035361ae555ee5a17a7667fe96afdde3d59a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 148 ++++++++------------ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 10 +- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 30 ++-- 4 files changed, 80 insertions(+), 110 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 92a18a0e4acd..c3419d4412ce 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -17,6 +17,8 @@ #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +35,23 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +73,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +104,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +113,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +135,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +165,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,10 +175,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); @@ -209,7 +209,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +222,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +239,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +258,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) + return ret; - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +293,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +313,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +383,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -400,43 +399,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_vma_vm(vma)->usm.asid); } -/** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - /** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc @@ -480,12 +442,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -530,11 +487,13 @@ static const struct dma_fence_ops invalidation_fence_ops = { * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence * @gt: GT * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable * * Initialize TLB invalidation fence for use */ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) { spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&fence->base, &invalidation_fence_ops, @@ -542,5 +501,8 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&fence->link); - dma_fence_get(&fence->base); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 948f4a2f5214..f430d5797af7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,10 +23,16 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence); + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 4c17a1ec8f8b..31a751a5de3f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1153,7 +1153,7 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); ifence->fence = fence; ifence->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 6bfcbd4e778a..931935ec33db 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3341,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; - int ret; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3369,29 +3369,31 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[id], true); + /* * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[id], vma); + if (ret < 0) + goto wait; + + tile_needs_invalidate |= BIT(id); } } - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; - } - } +wait: + for_each_tile(tile, xe, id) + if (tile_needs_invalidate & BIT(id)) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { From f002702290fccbd473f5bb94e52f25c96917fff2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 19 Jul 2024 10:29:04 -0700 Subject: [PATCH 1037/1523] drm/xe: Hold a PM ref when GT TLB invalidations are inflight Avoid GT TLB invalidation timeouts by holding a PM ref when invalidations are inflight. v2: - Drop PM ref before signaling fence (CI) v3: - Move invalidation_fence_signal helper in tlb timeout to previous patch (Matthew Auld) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Nirmoy Das Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240719172905.1527927-4-matthew.brost@intel.com (cherry picked from commit 0a382f9bc5dc4744a33970a5ed4df8f9c702ee9e) Requires: 46209ce5287b ("drm/xe: Add xe_gt_tlb_invalidation_fence_init helper") Requires: 0e414ab036e0 ("drm/xe: Drop xe_gt_tlb_invalidation_wait") Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 23 +++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 1 + .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h | 4 ++++ drivers/gpu/drm/xe/xe_vm.c | 4 +++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index c3419d4412ce..481d83d07367 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,6 +13,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" @@ -41,6 +42,7 @@ __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_ bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); dma_fence_signal(&fence->base); if (!stack) dma_fence_put(&fence->base); @@ -263,8 +265,10 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_fence_init(gt, &fence, true); ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret < 0) + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); return ret; + } xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { @@ -489,12 +493,15 @@ static const struct dma_fence_ops invalidation_fence_ops = { * @fence: TLB invalidation fence to initialize * @stack: fence is stack variable * - * Initialize TLB invalidation fence for use + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. */ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack) { + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&fence->base, &invalidation_fence_ops, >->tlb_invalidation.lock, @@ -505,4 +512,16 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, set_bit(FENCE_STACK_BIT, &fence->base.flags); else dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index f430d5797af7..a84065fa324c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31..de6e825e0851 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 931935ec33db..c7561a56abaf 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3379,8 +3379,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, &fence[id], vma); - if (ret < 0) + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[id]); goto wait; + } tile_needs_invalidate |= BIT(id); } From af8e119f52e9c13e556be9e03f27957554a84656 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Aug 2024 17:11:17 +0300 Subject: [PATCH 1038/1523] xhci: Fix Panther point NULL pointer deref at full-speed re-enumeration re-enumerating full-speed devices after a failed address device command can trigger a NULL pointer dereference. Full-speed devices may need to reconfigure the endpoint 0 Max Packet Size value during enumeration. Usb core calls usb_ep0_reinit() in this case, which ends up calling xhci_configure_endpoint(). On Panther point xHC the xhci_configure_endpoint() function will additionally check and reserve bandwidth in software. Other hosts do this in hardware If xHC address device command fails then a new xhci_virt_device structure is allocated as part of re-enabling the slot, but the bandwidth table pointers are not set up properly here. This triggers the NULL pointer dereference the next time usb_ep0_reinit() is called and xhci_configure_endpoint() tries to check and reserve bandwidth [46710.713538] usb 3-1: new full-speed USB device number 5 using xhci_hcd [46710.713699] usb 3-1: Device not responding to setup address. [46710.917684] usb 3-1: Device not responding to setup address. [46711.125536] usb 3-1: device not accepting address 5, error -71 [46711.125594] BUG: kernel NULL pointer dereference, address: 0000000000000008 [46711.125600] #PF: supervisor read access in kernel mode [46711.125603] #PF: error_code(0x0000) - not-present page [46711.125606] PGD 0 P4D 0 [46711.125610] Oops: Oops: 0000 [#1] PREEMPT SMP PTI [46711.125615] CPU: 1 PID: 25760 Comm: kworker/1:2 Not tainted 6.10.3_2 #1 [46711.125620] Hardware name: Gigabyte Technology Co., Ltd. [46711.125623] Workqueue: usb_hub_wq hub_event [usbcore] [46711.125668] RIP: 0010:xhci_reserve_bandwidth (drivers/usb/host/xhci.c Fix this by making sure bandwidth table pointers are set up correctly after a failed address device command, and additionally by avoiding checking for bandwidth in cases like this where no actual endpoints are added or removed, i.e. only context for default control endpoint 0 is evaluated. Reported-by: Karel Balej Closes: https://lore.kernel.org/linux-usb/D3CKQQAETH47.1MUO22RTCH2O3@matfyz.cz/ Cc: stable@vger.kernel.org Fixes: 651aaf36a7d7 ("usb: xhci: Handle USB transaction error on address command") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240815141117.2702314-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f82..efdf4c228b8c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; From dd925902634def895690426bf10e0a8b3e56f56d Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 9 Aug 2024 17:11:28 +0530 Subject: [PATCH 1039/1523] drm/i915/hdcp: Use correct cp_irq_count We are checking cp_irq_count from the wrong hdcp structure which ends up giving timed out errors. We only increment the cp_irq_count of the primary connector's hdcp structure but here in case of multidisplay setup we end up checking the secondary connector's hdcp structure, which will not have its cp_irq_count incremented. This leads to a timed out at CP_IRQ error even though a CP_IRQ was raised. Extract it from the correct intel_hdcp structure. --v2 -Explain why it was the wrong hdcp structure [Jani] Fixes: 8c9e4f68b861 ("drm/i915/hdcp: Use per-device debugs") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240809114127.3940699-2-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index ec357d2670f1..3425b3643143 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) From c7085d08c7e53d9aef0cdd4b20798356f6f5d469 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Tue, 13 Aug 2024 09:58:07 +0530 Subject: [PATCH 1040/1523] drm/i915/pps: Disable DPLS_GATING around pps sequence Disable bit 29 of SCLKGATE_DIS register around pps sequence when we turn panel power on. --v2 -Squash two commit together [Jani] -Use IS_DISPLAY_VER [Jani] -Fix multiline comment [Jani] --v3 -Define register in a more appropriate place [Mitul] --v4 -Register is already defined no need to define it again [Ville] -Use correct WA number (lineage no.) [Dnyaneshwar] -Fix the range on which this WA is applied [Dnyaneshwar] Bspec: 49304 Signed-off-by: Suraj Kandpal Reviewed-by: Dnyaneshwar Bhadane Link: https://patchwork.freedesktop.org/patch/msgid/20240813042807.4015214-1-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_pps.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 7ce926241e83..0918eb218fc8 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -951,6 +951,14 @@ void intel_pps_on_unlocked(struct intel_dp *intel_dp) intel_de_posting_read(dev_priv, pp_ctrl_reg); } + /* + * WA: 22019252566 + * Disable DPLS gating around power sequence. + */ + if (IS_DISPLAY_VER(dev_priv, 13, 14)) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + 0, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); + pp |= PANEL_POWER_ON; if (!IS_IRONLAKE(dev_priv)) pp |= PANEL_POWER_RESET; @@ -961,6 +969,10 @@ void intel_pps_on_unlocked(struct intel_dp *intel_dp) wait_panel_on(intel_dp); intel_dp->pps.last_power_on = jiffies; + if (IS_DISPLAY_VER(dev_priv, 13, 14)) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + PCH_DPLSUNIT_CLOCK_GATE_DISABLE, 0); + if (IS_IRONLAKE(dev_priv)) { pp |= PANEL_POWER_RESET; /* restore panel reset bit */ intel_de_write(dev_priv, pp_ctrl_reg, pp); From 9bb5e74b2bf88fbb024bb15ded3b011e02c673be Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Thu, 15 Aug 2024 11:49:20 +0200 Subject: [PATCH 1041/1523] Revert "misc: fastrpc: Restrict untrusted app to attach to privileged PD" This reverts commit bab2f5e8fd5d2f759db26b78d9db57412888f187. Joel reported that this commit breaks userspace and stops sensors in SDM845 from working. Also breaks other qcom SoC devices running postmarketOS. Cc: stable Cc: Ekansh Gupta Cc: Dmitry Baryshkov Reported-by: Joel Selvaraj Link: https://lore.kernel.org/r/9a9f5646-a554-4b65-8122-d212bb665c81@umsystem.edu Signed-off-by: Griffin Kroah-Hartman Acked-by: Srinivas Kandagatla Fixes: bab2f5e8fd5d ("misc: fastrpc: Restrict untrusted app to attach to privileged PD") Link: https://lore.kernel.org/r/20240815094920.8242-1-griffin@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 22 +++------------------- include/uapi/misc/fastrpc.h | 3 --- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3..339d126414d4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) From 92e9bac18124682c4b99ede9ee3bcdd68f121e92 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 15 Aug 2024 01:04:31 +0100 Subject: [PATCH 1042/1523] kunit/overflow: Fix UB in overflow_allocation_test The 'device_name' array doesn't exist out of the 'overflow_allocation_test' function scope. However, it is being used as a driver name when calling 'kunit_driver_create' from 'kunit_device_register'. It produces the kernel panic with KASAN enabled. Since this variable is used in one place only, remove it and pass the device name into kunit_device_register directly as an ascii string. Signed-off-by: Ivan Orlov Reviewed-by: David Gow Link: https://lore.kernel.org/r/20240815000431.401869-1-ivan.orlov0322@gmail.com Signed-off-by: Kees Cook --- lib/overflow_kunit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index f314a0c15a6d..2abc78367dd1 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); static void overflow_allocation_test(struct kunit *test) { - const char device_name[] = "overflow-test"; struct device *dev; int count = 0; @@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test) } while (0) /* Create dummy device for devm_kmalloc()-family tests. */ - dev = kunit_device_register(test, device_name); + dev = kunit_device_register(test, "overflow-test"); KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), "Cannot register test device\n"); From 020925ce92990c3bf59ab2cde386ac6d9ec734ff Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 7 Aug 2024 15:05:12 -0700 Subject: [PATCH 1043/1523] kallsyms: Do not cleanup .llvm. suffix before sorting symbols Cleaning up the symbols causes various issues afterwards. Let's sort the list based on original name. Signed-off-by: Song Liu Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") Reviewed-by: Masami Hiramatsu (Google) Tested-by: Masami Hiramatsu (Google) Acked-by: Petr Mladek Reviewed-by: Sami Tolvanen Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240807220513.3100483-2-song@kernel.org Signed-off-by: Kees Cook --- scripts/kallsyms.c | 31 ++----------------------------- scripts/link-vmlinux.sh | 4 ---- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0ed873491bf5..123dab0572f8 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,8 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] - * [--lto-clang] in.map > out.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -62,7 +61,6 @@ static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; -static int lto_clang; static int token_profit[0x10000]; @@ -73,8 +71,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " - "[--lto-clang] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); exit(1); } @@ -344,25 +341,6 @@ static bool symbol_absolute(const struct sym_entry *s) return s->percpu_absolute; } -static void cleanup_symbol_name(char *s) -{ - char *p; - - /* - * ASCII[.] = 2e - * ASCII[0-9] = 30,39 - * ASCII[A-Z] = 41,5a - * ASCII[_] = 5f - * ASCII[a-z] = 61,7a - * - * As above, replacing the first '.' in ".llvm." with '\0' does not - * affect the main sorting, but it helps us with subsorting. - */ - p = strstr(s, ".llvm."); - if (p) - *p = '\0'; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -526,10 +504,6 @@ static void write_src(void) output_address(relative_base); printf("\n"); - if (lto_clang) - for (i = 0; i < table_cnt; i++) - cleanup_symbol_name((char *)table[i]->sym); - sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) @@ -807,7 +781,6 @@ int main(int argc, char **argv) static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, {"absolute-percpu", no_argument, &absolute_percpu, 1}, - {"lto-clang", no_argument, <o_clang, 1}, {}, }; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba9..22d0bc843986 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -156,10 +156,6 @@ kallsyms() kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_LTO_CLANG; then - kallsymopt="${kallsymopt} --lto-clang" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" From fb6a421fb6153d97cf3058f9bd550b377b76a490 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 7 Aug 2024 15:05:13 -0700 Subject: [PATCH 1044/1523] kallsyms: Match symbols exactly with CONFIG_LTO_CLANG With CONFIG_LTO_CLANG=y, the compiler may add .llvm. suffix to function names to avoid duplication. APIs like kallsyms_lookup_name() and kallsyms_on_each_match_symbol() tries to match these symbol names without the .llvm. suffix, e.g., match "c_stop" with symbol c_stop.llvm.17132674095431275852. This turned out to be problematic for use cases that require exact match, for example, livepatch. Fix this by making the APIs to match symbols exactly. Also cleanup kallsyms_selftests accordingly. Signed-off-by: Song Liu Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") Tested-by: Masami Hiramatsu (Google) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Petr Mladek Reviewed-by: Sami Tolvanen Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240807220513.3100483-3-song@kernel.org Signed-off-by: Kees Cook --- kernel/kallsyms.c | 55 +++++--------------------------------- kernel/kallsyms_selftest.c | 22 +-------------- 2 files changed, 7 insertions(+), 70 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fb2c77368d18..a9a0ca605d4a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static void cleanup_symbol_name(char *s) -{ - char *res; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes only in LLVM LTO observed: - * - foo.llvm.[0-9a-f]+ - */ - res = strstr(s, ".llvm."); - if (res) - *res = '\0'; - - return; -} - -static int compare_symbol_name(const char *name, char *namebuf) -{ - /* The kallsyms_seqs_of_names is sorted based on names after - * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. - * To ensure correct bisection in kallsyms_lookup_names(), do - * cleanup_symbol_name(namebuf) before comparing name and namebuf. - */ - cleanup_symbol_name(namebuf); - return strcmp(name, namebuf); -} - static unsigned int get_symbol_seq(int index) { unsigned int i, seq = 0; @@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(mid); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - ret = compare_symbol_name(name, namebuf); + ret = strcmp(name, namebuf); if (ret > 0) low = mid + 1; else if (ret < 0) @@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(low - 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; low--; } @@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(high + 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; high++; } @@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = strlen(namebuf); - goto found; + return strlen(namebuf); } /* See if it's in a module or a BPF JITed image. */ @@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); -found: - cleanup_symbol_name(namebuf); return ret; } @@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr, int lookup_symbol_name(unsigned long addr, char *symname) { - int res; - symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - goto found; + return 0; } /* See if it's in a module. */ - res = lookup_module_symbol_name(addr, symname); - if (res) - return res; - -found: - cleanup_symbol_name(symname); - return 0; + return lookup_module_symbol_name(addr, symname); } /* Look up a kernel symbol and return it in a text buffer. */ diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 2f84896a7bcb..873f7c445488 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void) stat.min, stat.max, div_u64(stat.sum, stat.real_cnt)); } -static bool match_cleanup_name(const char *s, const char *name) -{ - char *p; - int len; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - p = strstr(s, ".llvm."); - if (!p) - return false; - - len = strlen(name); - if (p - s != len) - return false; - - return !strncmp(s, name, len); -} - static int find_symbol(void *data, const char *name, unsigned long addr) { struct test_stat *stat = (struct test_stat *)data; - if (strcmp(name, stat->name) == 0 || - (!stat->perf && match_cleanup_name(name, stat->name))) { + if (!strcmp(name, stat->name)) { stat->real_cnt++; stat->addr = addr; From aae6b81260fd9a7224f7eb4fc440d625852245bb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 12 Aug 2024 10:43:48 -0400 Subject: [PATCH 1045/1523] Bluetooth: HCI: Invert LE State quirk to be opt-out rather then opt-in This inverts the LE State quirk so by default we assume the controllers would report valid states rather than invalid which is how quirks normally behave, also this would result in HCI command failing it the LE States are really broken thus exposing the controllers that are really broken in this respect. Link: https://github.com/bluez/bluez/issues/584 Fixes: 220915857e29 ("Bluetooth: Adding driver and quirk defs for multi-role LE") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 10 ---------- drivers/bluetooth/btintel_pcie.c | 3 --- drivers/bluetooth/btmtksdio.c | 3 --- drivers/bluetooth/btrtl.c | 1 - drivers/bluetooth/btusb.c | 4 ++-- drivers/bluetooth/hci_qca.c | 4 ++-- drivers/bluetooth/hci_vhci.c | 2 -- include/net/bluetooth/hci.h | 17 ++++++++++------- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_event.c | 2 +- 10 files changed, 16 insertions(+), 32 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 2ebc970e6573..7d5e4de64e3c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2945,9 +2945,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) - set_bit(HCI_QUIRK_VALID_LE_STATES, - &hdev->quirks); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -2956,7 +2953,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x12: /* ThP */ case 0x13: /* HrP */ case 0x14: /* CcP */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); fallthrough; case 0x0c: /* WsP */ /* Apply the device specific HCI quirks @@ -3048,9 +3044,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* These variants don't seem to support LE Coded PHY */ set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); - /* Set Valid LE States quirk */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3076,9 +3069,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0d1a0415557b..1c7631f22c52 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1180,9 +1180,6 @@ static int btintel_pcie_setup(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 39d6898497a4..497e4c87f5be 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1148,9 +1148,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) } } - /* Valid LE States quirk for MediaTek 7921 */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - break; case 0x7663: case 0x7668: diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index f2f37143c454..fd7991ea7672 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,6 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8852C: case CHIP_ID_8851B: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); /* RTL8852C needs to transmit mSBC data continuously without diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index acdba5d77694..51d9d4532dda 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3956,8 +3956,8 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_WIDEBAND_SPEECH) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (id->driver_info & BTUSB_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(id->driver_info & BTUSB_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 45adc1560d94..4b1ad7ea5b95 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2474,8 +2474,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (data->capabilities & QCA_CAP_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); } return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index c4046f8f1985..43e9ac5a3324 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -425,8 +425,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e372a88e8c3f..d1d073089f38 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -206,14 +206,17 @@ enum { */ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - /* When this quirk is set, the controller has validated that - * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are - * valid. This mechanism is necessary as many controllers have - * been seen has having trouble initiating a connectable - * advertisement despite the state combination being reported as - * supported. + /* When this quirk is set, the LE states reported through the + * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. + * + * This mechanism is necessary as many controllers have been seen has + * having trouble initiating a connectable advertisement despite the + * state combination being reported as supported. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. */ - HCI_QUIRK_VALID_LE_STATES, + HCI_QUIRK_BROKEN_LE_STATES, /* When this quirk is set, then erroneous data reporting * is ignored. This is mainly due to the fact that the HCI diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 31020891fc68..e449dba698f3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ (hdev->le_states[4] & 0x08) && /* Central */ \ (hdev->le_states[4] & 0x40) && /* Peripheral */ \ (hdev->le_states[3] & 0x10)) /* Simultaneous */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d0c118c47f6c..1c82dcdf6e8f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5920,7 +5920,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || + (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || !(hdev->le_states[3] & 0x10))) return NULL; From 932021a11805b9da4bd6abf66fe233cccd59fe0e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 12 Aug 2024 11:22:08 -0400 Subject: [PATCH 1046/1523] Bluetooth: hci_core: Fix LE quote calculation Function hci_sched_le needs to update the respective counter variable inplace other the likes of hci_quote_sent would attempt to use the possible outdated value of conn->{le_cnt,acl_cnt}. Link: https://github.com/bluez/bluez/issues/915 Fixes: 73d80deb7bdf ("Bluetooth: prioritizing data over HCI") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 06da8ac13dca..f25a21f532aa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3664,19 +3664,19 @@ static void hci_sched_le(struct hci_dev *hdev) { struct hci_chan *chan; struct sk_buff *skb; - int quote, cnt, tmp; + int quote, *cnt, tmp; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, LE_LINK)) return; - cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - __check_timeout(hdev, cnt, LE_LINK); + __check_timeout(hdev, *cnt, LE_LINK); - tmp = cnt; - while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + tmp = *cnt; + while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, @@ -3691,7 +3691,7 @@ static void hci_sched_le(struct hci_dev *hdev) hci_send_frame(hdev, skb); hdev->le_last_tx = jiffies; - cnt--; + (*cnt)--; chan->sent++; chan->conn->sent++; @@ -3701,12 +3701,7 @@ static void hci_sched_le(struct hci_dev *hdev) } } - if (hdev->le_pkts) - hdev->le_cnt = cnt; - else - hdev->acl_cnt = cnt; - - if (cnt != tmp) + if (*cnt != tmp) hci_prio_recalculate(hdev, LE_LINK); } From 28cd47f75185c4818b0fb1b46f2f02faaba96376 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 30 Aug 2023 15:08:06 -0700 Subject: [PATCH 1047/1523] Bluetooth: SMP: Fix assumption of Central always being Initiator SMP initiator role shall be considered the one that initiates the pairing procedure with SMP_CMD_PAIRING_REQ: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part H page 1557: Figure 2.1: LE pairing phases Note that by sending SMP_CMD_SECURITY_REQ it doesn't change the role to be Initiator. Link: https://github.com/bluez/bluez/issues/567 Fixes: b28b4943660f ("Bluetooth: Add strict checks for allowed SMP PDUs") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1e7ea3a4b7ef..4f9fdf400584 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, * Confirms and the responder Enters the passkey. */ if (smp->method == OVERLAP) { - if (hcon->role == HCI_ROLE_MASTER) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp->method = CFM_PASSKEY; else smp->method = REQ_PASSKEY; @@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp) smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); else SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp) int ret; bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, - conn->hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, @@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp) return SMP_CONFIRM_FAILED; } - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 stk[16]; __le64 rand = 0; __le16 ediv = 0; @@ -1256,14 +1257,15 @@ static void smp_distribute_keys(struct smp_chan *smp) rsp = (void *) &smp->prsp[1]; /* The responder sends its keys first */ - if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) && + (smp->remote_key_dist & KEY_DIST_MASK)) { smp_allow_key_dist(smp); return; } req = (void *) &smp->preq[1]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { keydist = &rsp->init_key_dist; *keydist &= req->init_key_dist; } else { @@ -1432,7 +1434,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) struct hci_conn *hcon = smp->conn->hcon; u8 *na, *nb, a[7], b[7]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { na = smp->prnd; nb = smp->rrnd; } else { @@ -1460,7 +1462,7 @@ static void sc_dhkey_check(struct smp_chan *smp) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->preq[1], 3); @@ -1539,7 +1541,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) /* The round is only complete when the initiator * receives pairing random. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); if (smp->passkey_round == 20) @@ -1567,7 +1569,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); return 0; @@ -1578,7 +1580,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) case SMP_CMD_PUBLIC_KEY: default: /* Initiating device starts the round */ - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; bt_dev_dbg(hdev, "Starting passkey round %u", @@ -1623,7 +1625,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey) } /* Initiator sends DHKey check first */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) { @@ -1746,7 +1748,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct l2cap_chan *chan = conn->smp; struct hci_dev *hdev = conn->hcon->hdev; - struct smp_chan *smp; + struct smp_chan *smp = chan->data; u8 key_size, auth, sec_level; int ret; @@ -1755,16 +1757,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*req)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_SLAVE) + if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; - if (!chan->data) + if (!smp) { smp = smp_chan_create(conn); - else - smp = chan->data; - - if (!smp) - return SMP_UNSPECIFIED; + if (!smp) + return SMP_UNSPECIFIED; + } /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); @@ -1946,7 +1946,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*rsp)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_MASTER) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; skb_pull(skb, sizeof(*rsp)); @@ -2041,7 +2041,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2063,7 +2063,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) u8 auth; /* The issue is only observed when we're in responder role */ - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { @@ -2099,7 +2099,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) struct hci_dev *hdev = hcon->hdev; bt_dev_dbg(hdev, "conn %p %s", conn, - hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); if (skb->len < sizeof(smp->pcnf)) return SMP_INVALID_PARAMS; @@ -2121,7 +2122,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) return ret; } - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2156,7 +2157,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_SC, &smp->flags)) return smp_random(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { pkax = smp->local_pk; pkbx = smp->remote_pk; na = smp->prnd; @@ -2169,7 +2170,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); @@ -2180,7 +2181,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 cfm[16]; err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk, @@ -2221,7 +2222,7 @@ mackey_and_ltk: return SMP_UNSPECIFIED; if (smp->method == REQ_OOB) { - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } @@ -2295,10 +2296,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, return false; } +static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_pairing cp; + + if (smp->conn->hcon->type == ACL_LINK) + build_bredr_pairing_cmd(smp, &cp, NULL); + else + build_pairing_cmd(smp->conn, &cp, NULL, auth); + + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); + + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + + set_bit(SMP_FLAG_INITIATOR, &smp->flags); +} + static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_security_req *rp = (void *) skb->data; - struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; struct smp_chan *smp; @@ -2347,18 +2365,22 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(*rp)); - memset(&cp, 0, sizeof(cp)); - build_pairing_cmd(conn, &cp, NULL, auth); - - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + smp_send_pairing_req(smp, auth); return 0; } +static void smp_send_security_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_security_req cp; + + cp.auth_req = auth; + smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); + + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); +} + int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -2427,23 +2449,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq |= SMP_AUTH_MITM; } - if (hcon->role == HCI_ROLE_MASTER) { - struct smp_cmd_pairing cp; + if (hcon->role == HCI_ROLE_MASTER) + smp_send_pairing_req(smp, authreq); + else + smp_send_security_req(smp, authreq); - build_pairing_cmd(conn, &cp, NULL, authreq); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); - } else { - struct smp_cmd_security_req cp; - cp.auth_req = authreq; - smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); - } - - set_bit(SMP_FLAG_INITIATOR, &smp->flags); ret = 0; unlock: @@ -2694,8 +2704,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) static u8 sc_select_method(struct smp_chan *smp) { - struct l2cap_conn *conn = smp->conn; - struct hci_conn *hcon = conn->hcon; struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; @@ -2708,7 +2716,7 @@ static u8 sc_select_method(struct smp_chan *smp) * the "struct smp_cmd_pairing" from them we need to skip the * first byte which contains the opcode. */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local = (void *) &smp->preq[1]; remote = (void *) &smp->prsp[1]; } else { @@ -2777,7 +2785,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { err = sc_send_public_key(smp); if (err) return err; @@ -2839,7 +2847,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2848,7 +2856,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); if (smp->method == REQ_PASSKEY) { @@ -2863,7 +2871,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* The Initiating device waits for the non-initiating device to * send the confirm value. */ - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd, @@ -2897,7 +2905,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->prsp[1], 3); @@ -2922,7 +2930,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) { set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags); return 0; @@ -2934,7 +2942,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) sc_add_ltk(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; } @@ -3083,7 +3091,6 @@ static void bredr_pairing(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; - struct smp_cmd_pairing req; struct smp_chan *smp; bt_dev_dbg(hdev, "chan %p", chan); @@ -3135,14 +3142,7 @@ static void bredr_pairing(struct l2cap_chan *chan) bt_dev_dbg(hdev, "starting SMP over BR/EDR"); - /* Prepare and send the BR/EDR SMP Pairing Request */ - build_bredr_pairing_cmd(smp, &req, NULL); - - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &req, sizeof(req)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + smp_send_pairing_req(smp, 0x00); } static void smp_resume_cb(struct l2cap_chan *chan) From 538fd3921afac97158d4177139a0ad39f056dbb2 Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Thu, 15 Aug 2024 13:51:00 +0200 Subject: [PATCH 1048/1523] Bluetooth: MGMT: Add error handling to pair_device() hci_conn_params_add() never checks for a NULL value and could lead to a NULL pointer dereference causing a crash. Fixed by adding error handling in the function. Cc: Stable Fixes: 5157b8a503fa ("Bluetooth: Fix initializing conn_params in scan phase") Signed-off-by: Griffin Kroah-Hartman Reported-by: Yiwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 40d4887c7f79..25979f4283a6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3456,6 +3456,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * will be kept and this function does nothing. */ p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + if (!p) { + err = -EIO; + goto unlock; + } if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; From 624ab9cde26a9f150b4fd268b0f3dae3184dc40c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 16 Jul 2024 09:06:30 -0700 Subject: [PATCH 1049/1523] drm/msm/adreno: Fix error return if missing firmware-name -ENODEV is used to signify that there is no zap shader for the platform, and the CPU can directly take the GPU out of secure mode. We want to use this return code when there is no zap-shader node. But not when there is, but without a firmware-name property. This case we want to treat as-if the needed fw is not found. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/604564/ --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1c6626747b98..ecc3fc5cec22 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * was a bad idea, and is only provided for backwards * compatibility for older targets. */ - return -ENODEV; + return -ENOENT; } if (IS_ERR(fw)) { From c0247d289e73e18f6ddb0895de30c09770fbed95 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 13 Aug 2024 12:53:15 +0200 Subject: [PATCH 1050/1523] btrfs: send: annotate struct name_cache_entry with __counted_by() Add the __counted_by compiler attribute to the flexible array member name to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Signed-off-by: Thorsten Blum Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7fc692fc76e1..619fa0b8b3f6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ From 3bc2ac2f8f0b78a13140fc72022771efe0c9b778 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 Aug 2024 12:30:52 -0400 Subject: [PATCH 1051/1523] btrfs: update target inode's ctime on unlink Unlink changes the link count on the target inode. POSIX mandates that the ctime must also change when this occurs. According to https://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html: "Upon successful completion, unlink() shall mark for update the last data modification and last file status change timestamps of the parent directory. Also, if the file's link count is not 0, the last file status change timestamp of the file shall be marked for update." Signed-off-by: Jeff Layton Reviewed-by: David Sterba [ add link to the opengroup docs ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 333b0e8587a2..b1b6564ab68f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4195,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); From 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 11 Aug 2024 15:00:22 +0930 Subject: [PATCH 1052/1523] btrfs: tree-checker: add dev extent item checks [REPORT] There is a corruption report that btrfs refused to mount a fs that has overlapping dev extents: BTRFS error (device sdc): dev extent devid 4 physical offset 14263979671552 overlap with previous dev extent end 14263980982272 BTRFS error (device sdc): failed to verify dev extents against chunks: -117 BTRFS error (device sdc): open_ctree failed [CAUSE] The direct cause is very obvious, there is a bad dev extent item with incorrect length. With btrfs check reporting two overlapping extents, the second one shows some clue on the cause: ERROR: dev extent devid 4 offset 14263979671552 len 6488064 overlap with previous dev extent end 14263980982272 ERROR: dev extent devid 13 offset 2257707008000 len 6488064 overlap with previous dev extent end 2257707270144 ERROR: errors found in extent allocation tree or chunk allocation The second one looks like a bitflip happened during new chunk allocation: hex(2257707008000) = 0x20da9d30000 hex(2257707270144) = 0x20da9d70000 diff = 0x00000040000 So it looks like a bitflip happened during new dev extent allocation, resulting the second overlap. Currently we only do the dev-extent verification at mount time, but if the corruption is caused by memory bitflip, we really want to catch it before writing the corruption to the storage. Furthermore the dev extent items has the following key definition: ( DEV_EXTENT ) Thus we can not just rely on the generic key order check to make sure there is no overlapping. [ENHANCEMENT] Introduce dedicated dev extent checks, including: - Fixed member checks * chunk_tree should always be BTRFS_CHUNK_TREE_OBJECTID (3) * chunk_objectid should always be BTRFS_FIRST_CHUNK_CHUNK_TREE_OBJECTID (256) - Alignment checks * chunk_offset should be aligned to sectorsize * length should be aligned to sectorsize * key.offset should be aligned to sectorsize - Overlap checks If the previous key is also a dev-extent item, with the same device id, make sure we do not overlap with the previous dev extent. Reported: Stefan N Link: https://lore.kernel.org/linux-btrfs/CA+W5K0rSO3koYTo=nzxxTm1-Pdu1HYgVxEpgJ=aGc7d=E8mGEg@mail.gmail.com/ CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 69 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6f1e2f2215d9..634d69964fe4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1764,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1800,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; From e30729d4bd4001881be4d1ad4332a5d4985398f8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 1 Aug 2024 16:47:52 +0900 Subject: [PATCH 1053/1523] btrfs: zoned: properly take lock to read/update block group's zoned variables __btrfs_add_free_space_zoned() references and modifies bg's alloc_offset, ro, and zone_unusable, but without taking the lock. It is mostly safe because they monotonically increase (at least for now) and this function is mostly called by a transaction commit, which is serialized by itself. Still, taking the lock is a safer and correct option and I'm going to add a change to reset zone_unusable while a block group is still alive. So, add locking around the operations. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f5996a43db24..eaa1dbd31352 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,7 +2719,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. @@ -2727,11 +2730,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, block_group->zone_unusable += to_unusable; WARN_ON(block_group->zone_unusable > block_group->length); } - spin_unlock(&ctl->tree_lock); if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2745,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } From 6486cad00a8b7f8585983408c152bbe33dda529b Mon Sep 17 00:00:00 2001 From: David Gstir Date: Wed, 17 Jul 2024 13:28:44 +0200 Subject: [PATCH 1054/1523] KEYS: trusted: fix DCP blob payload length assignment The DCP trusted key type uses the wrong helper function to store the blob's payload length which can lead to the wrong byte order being used in case this would ever run on big endian architectures. Fix by using correct helper function. Cc: stable@vger.kernel.org # v6.10+ Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys") Suggested-by: Richard Weinberger Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202405240610.fj53EK0q-lkp@intel.com/ Signed-off-by: David Gstir Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/trusted-keys/trusted_dcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b5f81a05be36..b0947f072a98 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -222,7 +222,7 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) return ret; } - b->payload_len = get_unaligned_le32(&p->key_len); + put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; return 0; } From 0e28bf61a5f9ab30be3f3b4eafb8d097e39446bb Mon Sep 17 00:00:00 2001 From: David Gstir Date: Wed, 17 Jul 2024 13:28:45 +0200 Subject: [PATCH 1055/1523] KEYS: trusted: dcp: fix leak of blob encryption key Trusted keys unseal the key blob on load, but keep the sealed payload in the blob field so that every subsequent read (export) will simply convert this field to hex and send it to userspace. With DCP-based trusted keys, we decrypt the blob encryption key (BEK) in the Kernel due hardware limitations and then decrypt the blob payload. BEK decryption is done in-place which means that the trusted key blob field is modified and it consequently holds the BEK in plain text. Every subsequent read of that key thus send the plain text BEK instead of the encrypted BEK to userspace. This issue only occurs when importing a trusted DCP-based key and then exporting it again. This should rarely happen as the common use cases are to either create a new trusted key and export it, or import a key blob and then just use it without exporting it again. Fix this by performing BEK decryption and encryption in a dedicated buffer. Further always wipe the plain text BEK buffer to prevent leaking the key via uninitialized memory. Cc: stable@vger.kernel.org # v6.10+ Fixes: 2e8a0f40a39c ("KEYS: trusted: Introduce NXP DCP-backed trusted keys") Signed-off-by: David Gstir Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/trusted-keys/trusted_dcp.c | 33 +++++++++++++++--------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b0947f072a98..4edc5bbbcda3 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -186,20 +186,21 @@ out: return ret; } -static int decrypt_blob_key(u8 *key) +static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key) { - return do_dcp_crypto(key, key, false); + return do_dcp_crypto(encrypted_key, plain_key, false); } -static int encrypt_blob_key(u8 *key) +static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key) { - return do_dcp_crypto(key, key, true); + return do_dcp_crypto(plain_key, encrypted_key, true); } static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; blen = calc_blob_len(p->key_len); if (blen > MAX_BLOB_SIZE) @@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) b->fmt_version = DCP_BLOB_VERSION; get_random_bytes(b->nonce, AES_KEYSIZE_128); - get_random_bytes(b->blob_key, AES_KEYSIZE_128); + get_random_bytes(plain_blob_key, AES_KEYSIZE_128); - ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key, + ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key, b->nonce, true); if (ret) { pr_err("Unable to encrypt blob payload: %i\n", ret); - return ret; + goto out; } - ret = encrypt_blob_key(b->blob_key); + ret = encrypt_blob_key(plain_blob_key, b->blob_key); if (ret) { pr_err("Unable to encrypt blob key: %i\n", ret); - return ret; + goto out; } put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; - return 0; + ret = 0; + +out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + + return ret; } static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; if (b->fmt_version != DCP_BLOB_VERSION) { pr_err("DCP blob has bad version: %i, expected %i\n", @@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) goto out; } - ret = decrypt_blob_key(b->blob_key); + ret = decrypt_blob_key(b->blob_key, plain_blob_key); if (ret) { pr_err("Unable to decrypt blob key: %i\n", ret); goto out; } ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN, - b->blob_key, b->nonce, false); + plain_blob_key, b->nonce, false); if (ret) { pr_err("Unwrap of DCP payload failed: %i\n", ret); goto out; @@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) ret = 0; out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + return ret; } From 23ab1cb6591dba7c97b65eb407cd71147bd878b8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 12 Aug 2024 15:13:32 +0100 Subject: [PATCH 1056/1523] drm/xe: fix engine_class bounds check again This was fixed in commit b7dce525c4fc ("drm/xe/queue: fix engine_class bounds check"), but then re-introduced in commit 6f20fc09936e ("drm/xe: Move and export xe_hw_engine lookup.") which should only be simple code movement of the existing function. Fixes: 6f20fc09936e ("drm/xe: Move and export xe_hw_engine lookup.") Signed-off-by: Matthew Auld Cc: Dominik Grzegorzek Cc: Mika Kuoppala Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240812141331.729843-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 402dfa748e16..e195022ca836 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1189,7 +1189,7 @@ xe_hw_engine_lookup(struct xe_device *xe, { unsigned int idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) From b0ee81dac3205db1e01019629c83595e9433d96b Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 14 Aug 2024 13:56:54 -0700 Subject: [PATCH 1057/1523] drm/xe: Make exec_queue_kill safe to call twice An upcoming PXP patch will kill queues at runtime when a PXP invalidation event occurs, so we need exec_queue_kill to be safe to call multiple times. v2: Add documentation (Matt B) Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814205654.1716586-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 9 +++++++++ drivers/gpu/drm/xe/xe_vm.c | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 971e1234b8ea..7d170d37fdbe 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -770,6 +770,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89d86b7cdb02..d6386be79e91 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -275,6 +275,8 @@ out_up_write: * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM * @vm: The VM. * @q: The exec_queue + * + * Note that this function might be called multiple times on the same queue. */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { @@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; + if (!list_empty(&q->lr.link)) { + list_del_init(&q->lr.link); + --vm->preempt.num_exec_queues; + } if (q->lr.pfence) { dma_fence_enable_sw_signaling(q->lr.pfence); dma_fence_put(q->lr.pfence); From e01d48c699bbe015d887cb598e4047f08f3998a8 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 14 Aug 2024 21:26:19 +0200 Subject: [PATCH 1058/1523] riscv: Fix out-of-bounds when accessing Andes per hart vendor extension array The out-of-bounds access is reported by UBSAN: [ 0.000000] UBSAN: array-index-out-of-bounds in ../arch/riscv/kernel/vendor_extensions.c:41:66 [ 0.000000] index -1 is out of range for type 'riscv_isavendorinfo [32]' [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.11.0-rc2ubuntu-defconfig #2 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] Call Trace: [ 0.000000] [] dump_backtrace+0x32/0x40 [ 0.000000] [] show_stack+0x38/0x44 [ 0.000000] [] dump_stack_lvl+0x70/0x9c [ 0.000000] [] dump_stack+0x18/0x20 [ 0.000000] [] ubsan_epilogue+0x10/0x46 [ 0.000000] [] __ubsan_handle_out_of_bounds+0x94/0x9c [ 0.000000] [] __riscv_isa_vendor_extension_available+0x90/0x92 [ 0.000000] [] riscv_cpufeature_patch_func+0xc4/0x148 [ 0.000000] [] _apply_alternatives+0x42/0x50 [ 0.000000] [] apply_boot_alternatives+0x3c/0x100 [ 0.000000] [] setup_arch+0x85a/0x8bc [ 0.000000] [] start_kernel+0xa4/0xfb6 The dereferencing using cpu should actually not happen, so remove it. Fixes: 23c996fc2bc1 ("riscv: Extend cpufeature.c to detect vendor extensions") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240814192619.276794-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vendor_extensions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index b6c1e7b5d34b..a8126d118341 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -38,7 +38,7 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES case ANDES_VENDOR_ID: bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; - cpu_bmap = &riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap[cpu]; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif default: From 74c2ab6d653b4c2354df65a7f7f2df1925a40a51 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 8 Aug 2024 20:23:32 +0800 Subject: [PATCH 1059/1523] smb/client: avoid possible NULL dereference in cifs_free_subrequest() Clang static checker (scan-build) warning: cifsglob.h:line 890, column 3 Access to field 'ops' results in a dereference of a null pointer. Commit 519be989717c ("cifs: Add a tracepoint to track credits involved in R/W requests") adds a check for 'rdata->server', and let clang throw this warning about NULL dereference. When 'rdata->credits.value != 0 && rdata->server == NULL' happens, add_credits_and_wake_if() will call rdata->server->ops->add_credits(). This will cause NULL dereference problem. Add a check for 'rdata->server' to avoid NULL dereference. Cc: stable@vger.kernel.org Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks") Reviewed-by: David Howells Signed-off-by: Su Hui Signed-off-by: Steve French --- fs/smb/client/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4..45459af5044d 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -315,7 +315,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -323,8 +323,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } From c916ca35308d3187c9928664f9be249b22a3a701 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 3 Aug 2024 17:11:37 +0800 Subject: [PATCH 1060/1523] md/raid1: Fix data corruption for degraded array with slow disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_balance() will avoid reading from slow disks as much as possible, however, if valid data only lands in slow disks, and a new normal disk is still in recovery, unrecovered data can be read: raid1_read_request read_balance raid1_should_read_first -> return false choose_best_rdev -> normal disk is not recovered, return -1 choose_bb_rdev -> missing the checking of recovery, return the normal disk -> read unrecovered data Root cause is that the checking of recovery is missing in choose_bb_rdev(). Hence add such checking to fix the problem. Also fix similar problem in choose_slow_rdev(). Cc: stable@vger.kernel.org Fixes: 9f3ced792203 ("md/raid1: factor out choose_bb_rdev() from read_balance()") Fixes: dfa8ecd167c1 ("md/raid1: factor out choose_slow_rdev() from read_balance()") Reported-and-tested-by: Mateusz Jończyk Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/ Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.com Signed-off-by: Song Liu --- drivers/md/raid1.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8d..761989d67906 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ From 836bb3268db405cf9021496ac4dbc26d3e4758fe Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 Aug 2024 14:03:43 -0500 Subject: [PATCH 1061/1523] smb3: fix lock breakage for cached writes Mandatory locking is enforced for cached writes, which violates default posix semantics, and also it is enforced inconsistently. This apparently breaks recent versions of libreoffice, but can also be demonstrated by opening a file twice from the same client, locking it from handle one and writing to it from handle two (which fails, returning EACCES). Since there was already a mount option "forcemandatorylock" (which defaults to off), with this change only when the user intentionally specifies "forcemandatorylock" on mount will we break posix semantics on write to a locked range (ie we will only fail the write in this case, if the user mounts with "forcemandatorylock"). Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks") Cc: stable@vger.kernel.org Cc: Pavel Shilovsky Reported-by: abartlet@samba.org Reported-by: Kevin Ottens Reviewed-by: David Howells Signed-off-by: Steve French --- fs/smb/client/file.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 45459af5044d..06a0667f8ff2 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2753,6 +2753,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2769,12 +2770,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); From 5b4f3af39b6588e8de4444d8e1ccf759b40f9414 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 8 Aug 2024 16:04:04 -0600 Subject: [PATCH 1062/1523] smb: smb2pdu.h: Use static_assert() to check struct sizes Commit 9f9bef9bc5c6 ("smb: smb2pdu.h: Avoid -Wflex-array-member-not-at-end warnings") introduced tagged `struct create_context_hdr`. We want to ensure that when new members need to be added to the flexible structure, they are always included within this tagged struct. So, we use `static_assert()` to ensure that the memory layout for both the flexible structure and the tagged struct is the same after any changes. Acked-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d25..c769f9dbc0b4 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; From 8d3a2d3d766a823c7510cdc17e6ff7c042c63b61 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 9 Aug 2024 16:12:35 -0700 Subject: [PATCH 1063/1523] drm/xe: use devm instead of drmm for managed bo The BO cleanup touches the GGTT and therefore requires the HW to be available, so we need to use devm instead of drmm. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1160 Signed-off-by: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Matthew Auld Reviewed-by: Matthew Auld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240809231237.1503796-2-daniele.ceraolospurio@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 56a089aa3916..800119c8fc8d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1584,7 +1584,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1599,7 +1599,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1647,7 +1647,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; From b313a8c835516bdda85025500be866ac8a74e022 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 15 Aug 2024 10:47:36 +0800 Subject: [PATCH 1064/1523] block: Fix lockdep warning in blk_mq_mark_tag_wait Lockdep reported a warning in Linux version 6.6: [ 414.344659] ================================ [ 414.345155] WARNING: inconsistent lock state [ 414.345658] 6.6.0-07439-gba2303cacfda #6 Not tainted [ 414.346221] -------------------------------- [ 414.346712] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. [ 414.347545] kworker/u10:3/1152 [HC0[0]:SC0[0]:HE0:SE1] takes: [ 414.349245] ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0 [ 414.351204] {IN-SOFTIRQ-W} state was registered at: [ 414.351751] lock_acquire+0x18d/0x460 [ 414.352218] _raw_spin_lock_irqsave+0x39/0x60 [ 414.352769] __wake_up_common_lock+0x22/0x60 [ 414.353289] sbitmap_queue_wake_up+0x375/0x4f0 [ 414.353829] sbitmap_queue_clear+0xdd/0x270 [ 414.354338] blk_mq_put_tag+0xdf/0x170 [ 414.354807] __blk_mq_free_request+0x381/0x4d0 [ 414.355335] blk_mq_free_request+0x28b/0x3e0 [ 414.355847] __blk_mq_end_request+0x242/0xc30 [ 414.356367] scsi_end_request+0x2c1/0x830 [ 414.345155] WARNING: inconsistent lock state [ 414.345658] 6.6.0-07439-gba2303cacfda #6 Not tainted [ 414.346221] -------------------------------- [ 414.346712] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. [ 414.347545] kworker/u10:3/1152 [HC0[0]:SC0[0]:HE0:SE1] takes: [ 414.349245] ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0 [ 414.351204] {IN-SOFTIRQ-W} state was registered at: [ 414.351751] lock_acquire+0x18d/0x460 [ 414.352218] _raw_spin_lock_irqsave+0x39/0x60 [ 414.352769] __wake_up_common_lock+0x22/0x60 [ 414.353289] sbitmap_queue_wake_up+0x375/0x4f0 [ 414.353829] sbitmap_queue_clear+0xdd/0x270 [ 414.354338] blk_mq_put_tag+0xdf/0x170 [ 414.354807] __blk_mq_free_request+0x381/0x4d0 [ 414.355335] blk_mq_free_request+0x28b/0x3e0 [ 414.355847] __blk_mq_end_request+0x242/0xc30 [ 414.356367] scsi_end_request+0x2c1/0x830 [ 414.356863] scsi_io_completion+0x177/0x1610 [ 414.357379] scsi_complete+0x12f/0x260 [ 414.357856] blk_complete_reqs+0xba/0xf0 [ 414.358338] __do_softirq+0x1b0/0x7a2 [ 414.358796] irq_exit_rcu+0x14b/0x1a0 [ 414.359262] sysvec_call_function_single+0xaf/0xc0 [ 414.359828] asm_sysvec_call_function_single+0x1a/0x20 [ 414.360426] default_idle+0x1e/0x30 [ 414.360873] default_idle_call+0x9b/0x1f0 [ 414.361390] do_idle+0x2d2/0x3e0 [ 414.361819] cpu_startup_entry+0x55/0x60 [ 414.362314] start_secondary+0x235/0x2b0 [ 414.362809] secondary_startup_64_no_verify+0x18f/0x19b [ 414.363413] irq event stamp: 428794 [ 414.363825] hardirqs last enabled at (428793): [] ktime_get+0x1dc/0x200 [ 414.364694] hardirqs last disabled at (428794): [] _raw_spin_lock_irq+0x47/0x50 [ 414.365629] softirqs last enabled at (428444): [] __do_softirq+0x540/0x7a2 [ 414.366522] softirqs last disabled at (428419): [] irq_exit_rcu+0x14b/0x1a0 [ 414.367425] other info that might help us debug this: [ 414.368194] Possible unsafe locking scenario: [ 414.368900] CPU0 [ 414.369225] ---- [ 414.369548] lock(&sbq->ws[i].wait); [ 414.370000] [ 414.370342] lock(&sbq->ws[i].wait); [ 414.370802] *** DEADLOCK *** [ 414.371569] 5 locks held by kworker/u10:3/1152: [ 414.372088] #0: ffff88810130e938 ((wq_completion)writeback){+.+.}-{0:0}, at: process_scheduled_works+0x357/0x13f0 [ 414.373180] #1: ffff88810201fdb8 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}, at: process_scheduled_works+0x3a3/0x13f0 [ 414.374384] #2: ffffffff86ffbdc0 (rcu_read_lock){....}-{1:2}, at: blk_mq_run_hw_queue+0x637/0xa00 [ 414.375342] #3: ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0 [ 414.376377] #4: ffff888106205a08 (&hctx->dispatch_wait_lock){+.-.}-{2:2}, at: blk_mq_dispatch_rq_list+0x1337/0x1ee0 [ 414.378607] stack backtrace: [ 414.379177] CPU: 0 PID: 1152 Comm: kworker/u10:3 Not tainted 6.6.0-07439-gba2303cacfda #6 [ 414.380032] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 414.381177] Workqueue: writeback wb_workfn (flush-253:0) [ 414.381805] Call Trace: [ 414.382136] [ 414.382429] dump_stack_lvl+0x91/0xf0 [ 414.382884] mark_lock_irq+0xb3b/0x1260 [ 414.383367] ? __pfx_mark_lock_irq+0x10/0x10 [ 414.383889] ? stack_trace_save+0x8e/0xc0 [ 414.384373] ? __pfx_stack_trace_save+0x10/0x10 [ 414.384903] ? graph_lock+0xcf/0x410 [ 414.385350] ? save_trace+0x3d/0xc70 [ 414.385808] mark_lock.part.20+0x56d/0xa90 [ 414.386317] mark_held_locks+0xb0/0x110 [ 414.386791] ? __pfx_do_raw_spin_lock+0x10/0x10 [ 414.387320] lockdep_hardirqs_on_prepare+0x297/0x3f0 [ 414.387901] ? _raw_spin_unlock_irq+0x28/0x50 [ 414.388422] trace_hardirqs_on+0x58/0x100 [ 414.388917] _raw_spin_unlock_irq+0x28/0x50 [ 414.389422] __blk_mq_tag_busy+0x1d6/0x2a0 [ 414.389920] __blk_mq_get_driver_tag+0x761/0x9f0 [ 414.390899] blk_mq_dispatch_rq_list+0x1780/0x1ee0 [ 414.391473] ? __pfx_blk_mq_dispatch_rq_list+0x10/0x10 [ 414.392070] ? sbitmap_get+0x2b8/0x450 [ 414.392533] ? __blk_mq_get_driver_tag+0x210/0x9f0 [ 414.393095] __blk_mq_sched_dispatch_requests+0xd99/0x1690 [ 414.393730] ? elv_attempt_insert_merge+0x1b1/0x420 [ 414.394302] ? __pfx___blk_mq_sched_dispatch_requests+0x10/0x10 [ 414.394970] ? lock_acquire+0x18d/0x460 [ 414.395456] ? blk_mq_run_hw_queue+0x637/0xa00 [ 414.395986] ? __pfx_lock_acquire+0x10/0x10 [ 414.396499] blk_mq_sched_dispatch_requests+0x109/0x190 [ 414.397100] blk_mq_run_hw_queue+0x66e/0xa00 [ 414.397616] blk_mq_flush_plug_list.part.17+0x614/0x2030 [ 414.398244] ? __pfx_blk_mq_flush_plug_list.part.17+0x10/0x10 [ 414.398897] ? writeback_sb_inodes+0x241/0xcc0 [ 414.399429] blk_mq_flush_plug_list+0x65/0x80 [ 414.399957] __blk_flush_plug+0x2f1/0x530 [ 414.400458] ? __pfx___blk_flush_plug+0x10/0x10 [ 414.400999] blk_finish_plug+0x59/0xa0 [ 414.401467] wb_writeback+0x7cc/0x920 [ 414.401935] ? __pfx_wb_writeback+0x10/0x10 [ 414.402442] ? mark_held_locks+0xb0/0x110 [ 414.402931] ? __pfx_do_raw_spin_lock+0x10/0x10 [ 414.403462] ? lockdep_hardirqs_on_prepare+0x297/0x3f0 [ 414.404062] wb_workfn+0x2b3/0xcf0 [ 414.404500] ? __pfx_wb_workfn+0x10/0x10 [ 414.404989] process_scheduled_works+0x432/0x13f0 [ 414.405546] ? __pfx_process_scheduled_works+0x10/0x10 [ 414.406139] ? do_raw_spin_lock+0x101/0x2a0 [ 414.406641] ? assign_work+0x19b/0x240 [ 414.407106] ? lock_is_held_type+0x9d/0x110 [ 414.407604] worker_thread+0x6f2/0x1160 [ 414.408075] ? __kthread_parkme+0x62/0x210 [ 414.408572] ? lockdep_hardirqs_on_prepare+0x297/0x3f0 [ 414.409168] ? __kthread_parkme+0x13c/0x210 [ 414.409678] ? __pfx_worker_thread+0x10/0x10 [ 414.410191] kthread+0x33c/0x440 [ 414.410602] ? __pfx_kthread+0x10/0x10 [ 414.411068] ret_from_fork+0x4d/0x80 [ 414.411526] ? __pfx_kthread+0x10/0x10 [ 414.411993] ret_from_fork_asm+0x1b/0x30 [ 414.412489] When interrupt is turned on while a lock holding by spin_lock_irq it throws a warning because of potential deadlock. blk_mq_prep_dispatch_rq blk_mq_get_driver_tag __blk_mq_get_driver_tag __blk_mq_alloc_driver_tag blk_mq_tag_busy -> tag is already busy // failed to get driver tag blk_mq_mark_tag_wait spin_lock_irq(&wq->lock) -> lock A (&sbq->ws[i].wait) __add_wait_queue(wq, wait) -> wait queue active blk_mq_get_driver_tag __blk_mq_tag_busy -> 1) tag must be idle, which means there can't be inflight IO spin_lock_irq(&tags->lock) -> lock B (hctx->tags) spin_unlock_irq(&tags->lock) -> unlock B, turn on interrupt accidentally -> 2) context must be preempt by IO interrupt to trigger deadlock. As shown above, the deadlock is not possible in theory, but the warning still need to be fixed. Fix it by using spin_lock_irqsave to get lockB instead of spin_lock_irq. Fixes: 4f1731df60f9 ("blk-mq: fix potential io hang by wrong 'wake_batch'") Signed-off-by: Li Lingfeng Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240815024736.2040971-1-lilingfeng@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0b..2cafcf11ee8b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; + unsigned long flags; struct blk_mq_tags *tags = hctx->tags; /* @@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) return; } - spin_lock_irq(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); users = tags->active_queues + 1; WRITE_ONCE(tags->active_queues, users); blk_mq_update_wake_batch(tags, users); - spin_unlock_irq(&tags->lock); + spin_unlock_irqrestore(&tags->lock, flags); } /* From 9b340aeb26d50e9a9ec99599e2a39b035fac978e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 16 Aug 2024 06:19:23 +1000 Subject: [PATCH 1065/1523] nouveau/firmware: use dma non-coherent allocator Currently, enabling SG_DEBUG in the kernel will cause nouveau to hit a BUG() on startup, when the iommu is enabled: kernel BUG at include/linux/scatterlist.h:187! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 930 Comm: (udev-worker) Not tainted 6.9.0-rc3Lyude-Test+ #30 Hardware name: MSI MS-7A39/A320M GAMING PRO (MS-7A39), BIOS 1.I0 01/22/2019 RIP: 0010:sg_init_one+0x85/0xa0 Code: 69 88 32 01 83 e1 03 f6 c3 03 75 20 a8 01 75 1e 48 09 cb 41 89 54 24 08 49 89 1c 24 41 89 6c 24 0c 5b 5d 41 5c e9 7b b9 88 00 <0f> 0b 0f 0b 0f 0b 48 8b 05 5e 46 9a 01 eb b2 66 66 2e 0f 1f 84 00 RSP: 0018:ffffa776017bf6a0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffa77600d87000 RCX: 000000000000002b RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffa77680d87000 RBP: 000000000000e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff98f4c46aa508 R11: 0000000000000000 R12: ffff98f4c46aa508 R13: ffff98f4c46aa008 R14: ffffa77600d4a000 R15: ffffa77600d4a018 FS: 00007feeb5aae980(0000) GS:ffff98f5c4dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f22cb9a4520 CR3: 00000001043ba000 CR4: 00000000003506f0 Call Trace: ? die+0x36/0x90 ? do_trap+0xdd/0x100 ? sg_init_one+0x85/0xa0 ? do_error_trap+0x65/0x80 ? sg_init_one+0x85/0xa0 ? exc_invalid_op+0x50/0x70 ? sg_init_one+0x85/0xa0 ? asm_exc_invalid_op+0x1a/0x20 ? sg_init_one+0x85/0xa0 nvkm_firmware_ctor+0x14a/0x250 [nouveau] nvkm_falcon_fw_ctor+0x42/0x70 [nouveau] ga102_gsp_booter_ctor+0xb4/0x1a0 [nouveau] r535_gsp_oneinit+0xb3/0x15f0 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? nvkm_udevice_new+0x95/0x140 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? ktime_get+0x47/0xb0 Fix this by using the non-coherent allocator instead, I think there might be a better answer to this, but it involve ripping up some of APIs using sg lists. Cc: stable@vger.kernel.org Fixes: 2541626cfb79 ("drm/nouveau/acr: use common falcon HS FW code for ACR FWs") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240815201923.632803-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/core/firmware.c | 9 ++++++--- drivers/gpu/drm/nouveau/nvkm/falcon/fw.c | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6..0af01a0ec601 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), + fw->img, fw->phys, DMA_TO_DEVICE); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, break; case NVKM_FIRMWARE_IMG_DMA: { dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); + fw->img = dma_alloc_noncoherent(fw->device->dev, + len, &addr, + DMA_TO_DEVICE, + GFP_KERNEL); if (fw->img) { memcpy(fw->img, src, fw->len); fw->phys = addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c index 80a480b12174..a1c8545f1249 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c @@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user, nvkm_falcon_fw_dtor_sigs(fw); } + /* after last write to the img, sync dma mappings */ + dma_sync_single_for_device(fw->fw.device->dev, + fw->fw.phys, + sg_dma_len(&fw->fw.mem.sgl), + DMA_TO_DEVICE); + FLCNFW_DBG(fw, "resetting"); fw->func->reset(fw); From b96ed2c97c791954abc881ef384e773010945aec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Aug 2024 14:25:00 +0200 Subject: [PATCH 1066/1523] virtio_net: move netdev_tx_reset_queue() call before RX napi enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During suspend/resume the following BUG was hit: ------------[ cut here ]------------ kernel BUG at lib/dynamic_queue_limits.c:99! Internal error: Oops - BUG: 0 [#1] SMP ARM Modules linked in: bluetooth ecdh_generic ecc libaes CPU: 1 PID: 1282 Comm: rtcwake Not tainted 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240 Hardware name: Generic DT based system PC is at dql_completed+0x270/0x2cc LR is at __free_old_xmit+0x120/0x198 pc : []    lr : []    psr: 80000013 ... Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none Control: 10c5387d  Table: 43a4406a  DAC: 00000051 ... Process rtcwake (pid: 1282, stack limit = 0xfbc21278) Stack: (0xe0805e80 to 0xe0806000) ... Call trace:  dql_completed from __free_old_xmit+0x120/0x198  __free_old_xmit from free_old_xmit+0x44/0xe4  free_old_xmit from virtnet_poll_tx+0x88/0x1b4  virtnet_poll_tx from __napi_poll+0x2c/0x1d4  __napi_poll from net_rx_action+0x140/0x2b4  net_rx_action from handle_softirqs+0x11c/0x350  handle_softirqs from call_with_stack+0x18/0x20  call_with_stack from do_softirq+0x48/0x50  do_softirq from __local_bh_enable_ip+0xa0/0xa4  __local_bh_enable_ip from virtnet_open+0xd4/0x21c  virtnet_open from virtnet_restore+0x94/0x120  virtnet_restore from virtio_device_restore+0x110/0x1f4  virtio_device_restore from dpm_run_callback+0x3c/0x100  dpm_run_callback from device_resume+0x12c/0x2a8  device_resume from dpm_resume+0x12c/0x1e0  dpm_resume from dpm_resume_end+0xc/0x18  dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c  suspend_devices_and_enter from pm_suspend+0x270/0x2a0  pm_suspend from state_store+0x68/0xc8  state_store from kernfs_fop_write_iter+0x10c/0x1cc  kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc  vfs_write from ksys_write+0x5c/0xd4  ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe8bf1fa8 to 0xe8bf1ff0) ... ---[ end trace 0000000000000000 ]--- After virtnet_napi_enable() is called, the following path is hit: __napi_poll() -> virtnet_poll() -> virtnet_poll_cleantx() -> netif_tx_wake_queue() That wakes the TX queue and allows skbs to be submitted and accounted by BQL counters. Then netdev_tx_reset_queue() is called that resets BQL counters and eventually leads to the BUG in dql_completed(). Move virtnet_napi_tx_enable() what does BQL counters reset before RX napi enable to avoid the issue. Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/netdev/e632e378-d019-4de7-8f13-07c572ab37a9@samsung.com/ Fixes: c8bd1f7f3e61 ("virtio_net: add support for Byte Queue Limits") Tested-by: Marek Szyprowski Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://patch.msgid.link/20240814122500.1710279-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3f10c72743e9..c6af18948092 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; From c948c0973df5db9314459da621342e1170bd9e8e Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 14 Aug 2024 15:54:29 -0700 Subject: [PATCH 1067/1523] bnxt_en: Don't clear ntuple filters and rss contexts during ethtool ops The driver currently blindly deletes its cache of RSS cotexts and ntuple filters when the ethtool channel count is changing. It also deletes the ntuple filters cache when the default indirection table is changing. The core will not allow ethtool channels to drop below any that have been configured as ntuple destinations since this commit from 2022: 47f3ecf4763d ("ethtool: Fail number of channels change when it conflicts with rxnfc") So there is absolutely no need to delete the ntuple filters and RSS contexts when changing ethtool channels. It is also unnecessary to delete ntuple filters when the default RSS indirection table is changing. Remove bnxt_clear_usr_fltrs() and bnxt_clear_rss_ctxis() from the ethtool ops and change them to static functions. This bug will cause confusion to the end user and causes failure when running the rss_ctx.py selftest. Fixes: 1018319f949c ("bnxt_en: Invalidate user filters when needed") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240725111912.7bc17cf6@kernel.org/ Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20240814225429.199280-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e27e1082ee33..04a623b3eee2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5056,7 +5056,7 @@ void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr) list_del_init(&fltr->list); } -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) +static void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) { struct bnxt_filter_base *usr_fltr, *tmp; @@ -10248,7 +10248,7 @@ static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) } } -void bnxt_clear_rss_ctxs(struct bnxt *bp) +static void bnxt_clear_rss_ctxs(struct bnxt *bp) { struct ethtool_rxfh_context *ctx; unsigned long context; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 6bbdc718c3a7..059a6f81c1a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2790,7 +2790,6 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); @@ -2842,7 +2841,6 @@ int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all); -void bnxt_clear_rss_ctxs(struct bnxt *bp); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9dadc89378f0..4cf9bf8b01b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -968,9 +968,6 @@ static int bnxt_set_channels(struct net_device *dev, return -EINVAL; } - bnxt_clear_usr_fltrs(bp, true); - if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) - bnxt_clear_rss_ctxs(bp); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -2000,7 +1997,6 @@ static int bnxt_set_rxfh(struct net_device *dev, bnxt_modify_rss(bp, NULL, NULL, rxfh); - bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); rc = bnxt_open_nic(bp, false, false); From b153b3c747003e1ce312ba205e552db4bd9e8df7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Aug 2024 07:28:32 -0700 Subject: [PATCH 1068/1523] MAINTAINERS: add selftests to network drivers tools/testing/selftests/drivers/net/ is not listed under networking entries. Add it to NETWORKING DRIVERS. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240814142832.3473685-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..a964a34651f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15886,6 +15886,7 @@ F: include/linux/netdevice.h F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h +F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) From 3396900aa273903639a1792afa4d23dc09bec291 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 16:28:30 -0700 Subject: [PATCH 1069/1523] drm/xe: Fix tile fini sequence Only set tile->mmio.regs to NULL if not the root tile in tile_fini. The root tile mmio regs is setup ealier in MMIO init thus it should be set to NULL in mmio_fini. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240809232830.3302251-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index bdcc7282385c..f5bdb540e823 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } /* @@ -146,9 +147,11 @@ int xe_mmio_probe_tiles(struct xe_device *xe) static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) From e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 7 Aug 2024 18:33:35 +0100 Subject: [PATCH 1070/1523] mseal: fix is_madv_discard() is_madv_discard did its check wrong. MADV_ flags are not bitwise, they're normal sequential numbers. So, for instance: behavior & (/* ... */ | MADV_REMOVE) tagged both MADV_REMOVE and MADV_RANDOM (bit 0 set) as discard operations. As a result the kernel could erroneously block certain madvises (e.g MADV_RANDOM or MADV_HUGEPAGE) on sealed VMAs due to them sharing bits with blocked MADV operations (e.g REMOVE or WIPEONFORK). This is obviously incorrect, so use a switch statement instead. Link: https://lkml.kernel.org/r/20240807173336.2523757-1-pedro.falcato@gmail.com Link: https://lkml.kernel.org/r/20240807173336.2523757-2-pedro.falcato@gmail.com Fixes: 8be7258aad44 ("mseal: add mseal syscall") Signed-off-by: Pedro Falcato Tested-by: Jeff Xu Reviewed-by: Jeff Xu Cc: Kees Cook Cc: Liam R. Howlett Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- mm/mseal.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0..15bba28acc00 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 1 Aug 2024 22:47:48 +0200 Subject: [PATCH 1071/1523] mm/hugetlb: fix hugetlb vs. core-mm PT locking We recently made GUP's common page table walking code to also walk hugetlb VMAs without most hugetlb special-casing, preparing for the future of having less hugetlb-specific page table walking code in the codebase. Turns out that we missed one page table locking detail: page table locking for hugetlb folios that are not mapped using a single PMD/PUD. Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the page tables, will perform a pte_offset_map_lock() to grab the PTE table lock. However, hugetlb that concurrently modifies these page tables would actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the locks would differ. Something similar can happen right now with hugetlb folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS. This issue can be reproduced [1], for example triggering: [ 3105.936100] ------------[ cut here ]------------ [ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188 [ 3105.944634] Modules linked in: [...] [ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1 [ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024 [ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 3105.991108] pc : try_grab_folio+0x11c/0x188 [ 3105.994013] lr : follow_page_pte+0xd8/0x430 [ 3105.996986] sp : ffff80008eafb8f0 [ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43 [ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48 [ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978 [ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001 [ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000 [ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000 [ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0 [ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080 [ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000 [ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000 [ 3106.047957] Call trace: [ 3106.049522] try_grab_folio+0x11c/0x188 [ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0 [ 3106.055527] follow_page_mask+0x1a0/0x2b8 [ 3106.058118] __get_user_pages+0xf0/0x348 [ 3106.060647] faultin_page_range+0xb0/0x360 [ 3106.063651] do_madvise+0x340/0x598 Let's make huge_pte_lockptr() effectively use the same PT locks as any core-mm page table walker would. Add ptep_lockptr() to obtain the PTE page table lock using a pte pointer -- unfortunately we cannot convert pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables we can have with CONFIG_HIGHPTE. Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such that when e.g., CONFIG_PGTABLE_LEVELS==2 with PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected. Document why that works. There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb folio being mapped using two PTE page tables. While hugetlb wants to take the PMD table lock, core-mm would grab the PTE table lock of one of both PTE page tables. In such corner cases, we have to make sure that both locks match, which is (fortunately!) currently guaranteed for 8xx as it does not support SMP and consequently doesn't use split PT locks. [1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/ Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Peter Xu Cc: Oscar Salvador Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++--- include/linux/mm.h | 11 +++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..45bf05ad5c53 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..6549d0979b28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} From ace0741a55e453c265cbf3d965eea7f687cd6d45 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:34 +0000 Subject: [PATCH 1072/1523] mm: don't account memmap on failure Patch series "Fixes for memmap accounting", v4. Memmap accounting provides us with observability of how much memory is used for per-page metadata: i.e. "struct page"'s and "struct page_ext". It also provides with information of how much was allocated using boot allocator (i.e. not part of MemTotal), and how much was allocated using buddy allocated (i.e. part of MemTotal). This small series fixes a few problems that were discovered with the original patch. This patch (of 3): When we fail to allocate the mmemmap in alloc_vmemmap_page_list(), do not account any already-allocated pages: we're going to free all them before we return from the function. Link: https://lkml.kernel.org/r/20240809191020.1142142-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-2-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Reviewed-by: Fan Ni Reviewed-by: Yosry Ahmed Acked-by: David Hildenbrand Tested-by: Alison Schofield Reviewed-by: Muchun Song Acked-by: David Rientjes Cc: Dan Williams Cc: Domenico Cerasuolo Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yi Zhang Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914..4f51e0596197 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -392,13 +392,10 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); return 0; From f4cb78af91e3b2b7aa76dbf8213b898fa8811b12 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:35 +0000 Subject: [PATCH 1073/1523] mm: add system wide stats items category /proc/vmstat contains events and stats, events can only grow, but stats can grow and shrink. vmstat has the following: ------------------------- NR_VM_ZONE_STAT_ITEMS: per-zone stats NR_VM_NUMA_EVENT_ITEMS: per-numa events NR_VM_NODE_STAT_ITEMS: per-numa stats NR_VM_WRITEBACK_STAT_ITEMS: system-wide background-writeback and dirty-throttling tresholds. NR_VM_EVENT_ITEMS: system-wide events ------------------------- Rename NR_VM_WRITEBACK_STAT_ITEMS to NR_VM_STAT_ITEMS, to track the system-wide stats, we are going to add per-page metadata stats to this category in the next patch. Also delete unused writeback_stat_name(). Link: https://lkml.kernel.org/r/20240809191020.1142142-2-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-3-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Suggested-by: Yosry Ahmed Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Cc: Dan Williams Cc: Domenico Cerasuolo Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yi Zhang Cc: Fan Ni Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 15 ++++----------- mm/vmstat.c | 6 +++--- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd17942036..9ab4fa5e09b5 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,11 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +515,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..6f8aa4766f16 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1257,7 +1257,7 @@ const char * const vmstat_text[] = { "pgdemote_khugepaged", "nr_memmap", "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", @@ -1790,7 +1790,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1827,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); From 9d85731110241fb8ca9445ea4177d816041a8825 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:36 +0000 Subject: [PATCH 1074/1523] mm: don't account memmap per-node Fix invalid access to pgdat during hot-remove operation: ndctl users reported a GPF when trying to destroy a namespace: $ ndctl destroy-namespace all -r all -f Segmentation fault dmesg: Oops: general protection fault, probably for non-canonical address 0xdffffc0000005650: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: probably user-memory-access in range [0x000000000002b280-0x000000000002b287] CPU: 26 UID: 0 PID: 1868 Comm: ndctl Not tainted 6.11.0-rc1 #1 Hardware name: Dell Inc. PowerEdge R640/08HT8T, BIOS 2.20.1 09/13/2023 RIP: 0010:mod_node_page_state+0x2a/0x110 cxl-test users report a GPF when trying to unload the test module: $ modrpobe -r cxl-test dmesg BUG: unable to handle page fault for address: 0000000000004200 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 UID: 0 PID: 1076 Comm: modprobe Tainted: G O N 6.11.0-rc1 #197 Tainted: [O]=OOT_MODULE, [N]=TEST Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/15 RIP: 0010:mod_node_page_state+0x6/0x90 Currently, when memory is hot-plugged or hot-removed the accounting is done based on the assumption that memmap is allocated from the same node as the hot-plugged/hot-removed memory, which is not always the case. In addition, there are challenges with keeping the node id of the memory that is being remove to the time when memmap accounting is actually performed: since this is done after remove_pfn_range_from_zone(), and also after remove_memory_block_devices(). Meaning that we cannot use pgdat nor walking though memblocks to get the nid. Given all of that, account the memmap overhead system wide instead. For this we are going to be using global atomic counters, but given that memmap size is rarely modified, and normally is only modified either during early boot when there is only one CPU, or under a hotplug global mutex lock, therefore there is no need for per-cpu optimizations. Also, while we are here rename nr_memmap to nr_memmap_pages, and nr_memmap_boot to nr_memmap_boot_pages to be self explanatory that the units are in page count. [pasha.tatashin@soleen.com: address a few nits from David Hildenbrand] Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-4-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Reported-by: Yi Zhang Closes: https://lore.kernel.org/linux-cxl/CAHj4cs9Ax1=CoJkgBGP_+sNu6-6=6v=_L-ZBZY0bVLD3wUWZQg@mail.gmail.com Reported-by: Alison Schofield Closes: https://lore.kernel.org/linux-mm/Zq0tPd2h6alFz8XF@aschofie-mobl2/#t Tested-by: Dan Williams Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Tested-by: Yi Zhang Cc: Domenico Cerasuolo Cc: Fan Ni Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- include/linux/vmstat.h | 7 ++++--- mm/hugetlb_vmemmap.c | 8 ++++---- mm/mm_init.c | 3 +-- mm/page_alloc.c | 1 - mm/page_ext.c | 18 ++++------------- mm/sparse-vmemmap.c | 11 ++++------ mm/sparse.c | 5 ++--- mm/vmstat.c | 46 ++++++++++++++++++++---------------------- 9 files changed, 41 insertions(+), 60 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9ab4fa5e09b5..9eb77c9007e6 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -38,6 +38,8 @@ struct reclaim_stat { enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ NR_VM_STAT_ITEMS, }; @@ -618,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4f51e0596197..0c3f56b3578e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -396,7 +396,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, goto out; list_add(&page->lru, list); } - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b..f9a60ffc5532 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..875d76e8684a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5755,7 +5755,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401..641d93f6af4c 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b..edcc7a6b0f6f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0f018c6f9ec5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6f8aa4766f16..e875f2a4915f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1827,6 +1845,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} From d75abd0d0bc29e6ebfebbf76d11b4067b35844af Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 6 Aug 2024 12:41:07 -0400 Subject: [PATCH 1075/1523] mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu The memory_failure_cpu structure is a per-cpu structure. Access to its content requires the use of get_cpu_var() to lock in the current CPU and disable preemption. The use of a regular spinlock_t for locking purpose is fine for a non-RT kernel. Since the integration of RT spinlock support into the v5.15 kernel, a spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping lock in a preemption disabled context is illegal resulting in the following kind of warning. [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0 [12135.732252] preempt_count: 1, expected: 0 [12135.732255] RCU nest depth: 2, expected: 2 : [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021 [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred [12135.732433] Call Trace: [12135.732436] [12135.732450] dump_stack_lvl+0x57/0x81 [12135.732461] __might_resched.cold+0xf4/0x12f [12135.732479] rt_spin_lock+0x4c/0x100 [12135.732491] memory_failure_queue+0x40/0xe0 [12135.732503] ghes_do_memory_failure+0x53/0x390 [12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0 [12135.732575] ghes_proc+0xf9/0x1a0 [12135.732591] ghes_notify_hed+0x6a/0x150 [12135.732602] notifier_call_chain+0x43/0xb0 [12135.732626] blocking_notifier_call_chain+0x43/0x60 [12135.732637] acpi_ev_notify_dispatch+0x47/0x70 [12135.732648] acpi_os_execute_deferred+0x13/0x20 [12135.732654] process_one_work+0x41f/0x500 [12135.732695] worker_thread+0x192/0x360 [12135.732715] kthread+0x111/0x140 [12135.732733] ret_from_fork+0x29/0x50 [12135.732779] Fix it by using a raw_spinlock_t for locking instead. Also move the pr_err() out of the lock critical section and after put_cpu_ptr() to avoid indeterminate latency and the possibility of sleep with this call. [longman@redhat.com: don't hold percpu ref across pr_err(), per Miaohe] Link: https://lkml.kernel.org/r/20240807181130.1122660-1-longman@redhat.com Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com Fixes: 0f383b6dc96e ("locking/spinlock: Provide RT variant") Signed-off-by: Waiman Long Acked-by: Miaohe Lin Cc: "Huang, Ying" Cc: Juri Lelli Cc: Len Brown Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c9117..7066fc84f351 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } From 61ebe5a747da649057c37be1c37eb934b4af79ca Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Thu, 8 Aug 2024 20:19:56 +0800 Subject: [PATCH 1076/1523] mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 The __vmap_pages_range_noflush() assumes its argument pages** contains pages with the same page shift. However, since commit e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes __GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation failed for high order, the pages** may contain two different page shifts (high order and order-0). This could lead __vmap_pages_range_noflush() to perform incorrect mappings, potentially resulting in memory corruption. Users might encounter this as follows (vmap_allow_huge = true, 2M is for PMD_SIZE): kvmalloc(2M, __GFP_NOFAIL|GFP_X) __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP) vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0 vmap_pages_range() vmap_pages_range_noflush() __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens We can remove the fallback code because if a high-order allocation fails, __vmalloc_node_range_noprof() will retry with order-0. Therefore, it is unnecessary to fallback to order-0 here. Therefore, fix this by removing the fallback code. Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") Signed-off-by: Hailong Liu Reported-by: Tangquan Zheng Reviewed-by: Baoquan He Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Barry Song Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a1..af2de36549d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as From 40b760cfd44566bca791c80e0720d70d75382b84 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 9 Aug 2024 10:59:04 -0400 Subject: [PATCH 1077/1523] mm/numa: no task_numa_fault() call if PTE is changed When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") restructured do_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pte_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") Signed-off-by: Zi Yan Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Kefeng Wang Cc: Mel Gorman Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 34f8402d2046..3c01d68065be 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) From fd8c35a92910f4829b7c99841f39b1b952c259d5 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 9 Aug 2024 10:59:05 -0400 Subject: [PATCH 1078/1523] mm/numa: no task_numa_fault() call if PMD is changed When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") restructured do_huge_pmd_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pmd_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ Signed-off-by: Zi Yan Acked-by: David Hildenbrand Cc: Baolin Wang Cc: "Huang, Ying" Cc: Kefeng Wang Cc: Mel Gorman Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4be468e06a4..67c86a5d64a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); - - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* From af3b7d09a9934220a8136065a0e6985fe0b67a1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2024 15:32:30 +0300 Subject: [PATCH 1079/1523] selftests/mm: compaction_test: fix off by one in check_compaction() The "initial_nr_hugepages" variable is unsigned long so it takes up to 20 characters to print, plus 1 more character for the NUL terminator. Unfortunately, this buffer is not quite large enough for the terminator to fit. Also use snprintf() for a belt and suspenders approach. Link: https://lkml.kernel.org/r/87470c06-b45a-4e83-92ff-aac2e7b9c6ba@stanley.mountain Fixes: fb9293b6b015 ("selftests/mm: compaction_test: fix bogus test success and reduce probability of OOM-killer invocation") Signed-off-by: Dan Carpenter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/compaction_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53..2c3a0eb6b22d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ From 807174a93d24c456503692dc3f5af322ee0b640a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 9 Aug 2024 14:48:47 +0300 Subject: [PATCH 1080/1523] mm: fix endless reclaim on machines with unaccepted memory Unaccepted memory is considered unusable free memory, which is not counted as free on the zone watermark check. This causes get_page_from_freelist() to accept more memory to hit the high watermark, but it creates problems in the reclaim path. The reclaim path encounters a failed zone watermark check and attempts to reclaim memory. This is usually successful, but if there is little or no reclaimable memory, it can result in endless reclaim with little to no progress. This can occur early in the boot process, just after start of the init process when the only reclaimable memory is the page cache of the init executable and its libraries. Make unaccepted memory free from watermark check point of view. This way unaccepted memory will never be the trigger of memory reclaim. Accept more memory in the get_page_from_freelist() if needed. Link: https://lkml.kernel.org/r/20240809114854.3745464-2-kirill.shutemov@linux.intel.com Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Signed-off-by: Kirill A. Shutemov Reported-by: Jianxiong Gao Acked-by: David Hildenbrand Tested-by: Jianxiong Gao Cc: Borislav Petkov Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Mel Gorman Cc: Mike Rapoport (Microsoft) Cc: Tom Lendacky Cc: Vlastimil Babka Cc: [6.5+] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 875d76e8684a..8747087acee3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -6950,9 +6945,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6978,23 +6970,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7037,7 +7035,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } From 7c5e8d212d7d81991a580e7de3904ea213d9a852 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 9 Aug 2024 12:56:42 +0500 Subject: [PATCH 1081/1523] selftests: memfd_secret: don't build memfd_secret test on unsupported arches [1] mentions that memfd_secret is only supported on arm64, riscv, x86 and x86_64 for now. It doesn't support other architectures. I found the build error on arm and decided to send the fix as it was creating noise on KernelCI: memfd_secret.c: In function 'memfd_secret': memfd_secret.c:42:24: error: '__NR_memfd_secret' undeclared (first use in this function); did you mean 'memfd_secret'? 42 | return syscall(__NR_memfd_secret, flags); | ^~~~~~~~~~~~~~~~~ | memfd_secret Hence I'm adding condition that memfd_secret should only be compiled on supported architectures. Also check in run_vmtests script if memfd_secret binary is present before executing it. Link: https://lkml.kernel.org/r/20240812061522.1933054-1-usama.anjum@collabora.com Link: https://lore.kernel.org/all/20210518072034.31572-7-rppt@kernel.org/ [1] Link: https://lkml.kernel.org/r/20240809075642.403247-1-usama.anjum@collabora.com Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Shuah Khan Acked-by: Mike Rapoport (Microsoft) Cc: Albert Ou Cc: James Bottomley Cc: Mike Rapoport (Microsoft) Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 2 ++ tools/testing/selftests/mm/run_vmtests.sh | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 7b8a5def54a1..cfad627e8d94 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce..36045edb10de 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 From edb907a6133323e19311901a39dee68b1c6a2ef8 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 12 Aug 2024 14:20:17 +0800 Subject: [PATCH 1082/1523] crash: fix riscv64 crash memory reserve dead loop On RISCV64 Qemu machine with 512MB memory, cmdline "crashkernel=500M,high" will cause system stall as below: Zone ranges: DMA32 [mem 0x0000000080000000-0x000000009fffffff] Normal empty Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000080000000-0x000000008005ffff] node 0: [mem 0x0000000080060000-0x000000009fffffff] Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff] (stall here) commit 5d99cadf1568 ("crash: fix x86_32 crash memory reserve dead loop bug") fix this on 32-bit architecture. However, the problem is not completely solved. If `CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX` on 64-bit architecture, for example, when system memory is equal to CRASH_ADDR_LOW_MAX on RISCV64, the following infinite loop will also occur: -> reserve_crashkernel_generic() and high is true -> alloc at [CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX] fail -> alloc at [0, CRASH_ADDR_LOW_MAX] fail and repeatedly (because CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX). As Catalin suggested, do not remove the ",high" reservation fallback to ",low" logic which will change arm64's kdump behavior, but fix it by skipping the above situation similar to commit d2f32f23190b ("crash: fix x86_32 crash memory reserve dead loop"). After this patch, it print: cannot allocate crashkernel (size:0x1f400000) Link: https://lkml.kernel.org/r/20240812062017.2674441-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Acked-by: Baoquan He Cc: Albert Ou Cc: Dave Young Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_reserve.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd1..64d44a52c011 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Aug 2024 08:07:56 -0700 Subject: [PATCH 1083/1523] alloc_tag: introduce clear_page_tag_ref() helper function In several cases we are freeing pages which were not allocated using common page allocators. For such cases, in order to keep allocation accounting correct, we should clear the page tag to indicate that the page being freed is expected to not have a valid allocation tag. Introduce clear_page_tag_ref() helper function to be used for this. Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Pasha Tatashin Cc: Kees Cook Cc: Kent Overstreet Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10] Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 13 +++++++++++++ mm/mm_init.c | 10 +--------- mm/page_alloc.c | 9 +-------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d9..207f0c83c8e9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/mm/mm_init.c b/mm/mm_init.c index f9a60ffc5532..adc3127573cd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2459,15 +2459,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8747087acee3..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5815,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char void free_reserved_page(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } + clear_page_tag_ref(page); ClearPageReserved(page); init_page_count(page); __free_page(page); From 766c163c2068b45330664fb67df67268e588a22d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Aug 2024 08:07:57 -0700 Subject: [PATCH 1084/1523] alloc_tag: mark pages reserved during CMA activation as not tagged During CMA activation, pages in CMA area are prepared and then freed without being allocated. This triggers warnings when memory allocation debug config (CONFIG_MEM_ALLOC_PROFILING_DEBUG) is enabled. Fix this by marking these pages not tagged before freeing them. Link: https://lkml.kernel.org/r/20240813150758.855881-2-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan Acked-by: David Hildenbrand Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10] Signed-off-by: Andrew Morton --- mm/mm_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mm_init.c b/mm/mm_init.c index adc3127573cd..51960079875b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2244,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); From 2e6506e1c4eed2676a8412231046f31e10e240da Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 29 Jul 2024 10:13:06 +0800 Subject: [PATCH 1085/1523] mm/migrate: fix deadlock in migrate_pages_batch() on large folios Currently, migrate_pages_batch() can lock multiple locked folios with an arbitrary order. Although folio_trylock() is used to avoid deadlock as commit 2ef7dbb26990 ("migrate_pages: try migrate in batch asynchronously firstly") mentioned, it seems try_split_folio() is still missing. It was found by compaction stress test when I explicitly enable EROFS compressed files to use large folios, which case I cannot reproduce with the same workload if large folio support is off (current mainline). Typically, filesystem reads (with locked file-backed folios) could use another bdev/meta inode to load some other I/Os (e.g. inode extent metadata or caching compressed data), so the locking order will be: file-backed folios (A) bdev/meta folios (B) The following calltrace shows the deadlock: Thread 1 takes (B) lock and tries to take folio (A) lock Thread 2 takes (A) lock and tries to take folio (B) lock [Thread 1] INFO: task stress:1824 blocked for more than 30 seconds. Tainted: G OE 6.10.0-rc7+ #6 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:stress state:D stack:0 pid:1824 tgid:1824 ppid:1822 flags:0x0000000c Call trace: __switch_to+0xec/0x138 __schedule+0x43c/0xcb0 schedule+0x54/0x198 io_schedule+0x44/0x70 folio_wait_bit_common+0x184/0x3f8 <-- folio mapping ffff00036d69cb18 index 996 (**) __folio_lock+0x24/0x38 migrate_pages_batch+0x77c/0xea0 // try_split_folio (mm/migrate.c:1486:2) // migrate_pages_batch (mm/migrate.c:1734:16) <--- LIST_HEAD(unmap_folios) has .. folio mapping 0xffff0000d184f1d8 index 1711; (*) folio mapping 0xffff0000d184f1d8 index 1712; .. migrate_pages+0xb28/0xe90 compact_zone+0xa08/0x10f0 compact_node+0x9c/0x180 sysctl_compaction_handler+0x8c/0x118 proc_sys_call_handler+0x1a8/0x280 proc_sys_write+0x1c/0x30 vfs_write+0x240/0x380 ksys_write+0x78/0x118 __arm64_sys_write+0x24/0x38 invoke_syscall+0x78/0x108 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x148 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 [Thread 2] INFO: task stress:1825 blocked for more than 30 seconds. Tainted: G OE 6.10.0-rc7+ #6 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:stress state:D stack:0 pid:1825 tgid:1825 ppid:1822 flags:0x0000000c Call trace: __switch_to+0xec/0x138 __schedule+0x43c/0xcb0 schedule+0x54/0x198 io_schedule+0x44/0x70 folio_wait_bit_common+0x184/0x3f8 <-- folio = 0xfffffdffc6b503c0 (mapping == 0xffff0000d184f1d8 index == 1711) (*) __folio_lock+0x24/0x38 z_erofs_runqueue+0x384/0x9c0 [erofs] z_erofs_readahead+0x21c/0x350 [erofs] <-- folio mapping 0xffff00036d69cb18 range from [992, 1024] (**) read_pages+0x74/0x328 page_cache_ra_order+0x26c/0x348 ondemand_readahead+0x1c0/0x3a0 page_cache_sync_ra+0x9c/0xc0 filemap_get_pages+0xc4/0x708 filemap_read+0x104/0x3a8 generic_file_read_iter+0x4c/0x150 vfs_read+0x27c/0x330 ksys_pread64+0x84/0xd0 __arm64_sys_pread64+0x28/0x40 invoke_syscall+0x78/0x108 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x148 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 Link: https://lkml.kernel.org/r/20240729021306.398286-1-hsiangkao@linux.alibaba.com Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move") Signed-off-by: Gao Xiang Reviewed-by: "Huang, Ying" Acked-by: David Hildenbrand Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/migrate.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d5..923ea80ba744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; From ad899c301c880766cc709aad277991b3ab671b66 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Fri, 16 Aug 2024 10:01:59 +0300 Subject: [PATCH 1086/1523] char: xillybus: Refine workqueue handling As the wakeup work item now runs on a separate workqueue, it needs to be flushed separately along with flushing the device's workqueue. Also, move the destroy_workqueue() call to the end of the exit method, so that deinitialization is done in the opposite order of initialization. Fixes: ccbde4b128ef ("char: xillybus: Don't destroy workqueue from work item running on it") Cc: stable Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240816070200.50695-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 33ca0f4af390..e12d359194f8 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -2093,9 +2093,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2274,9 +2276,9 @@ static int __init xillyusb_init(void) static void __exit xillyusb_exit(void) { - destroy_workqueue(wakeup_wq); - usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); From 2374bf7558de915edc6ec8cb10ec3291dfab9594 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Fri, 16 Aug 2024 10:02:00 +0300 Subject: [PATCH 1087/1523] char: xillybus: Check USB endpoints when probing device Ensure, as the driver probes the device, that all endpoints that the driver may attempt to access exist and are of the correct type. All XillyUSB devices must have a Bulk IN and Bulk OUT endpoint at address 1. This is verified in xillyusb_setup_base_eps(). On top of that, a XillyUSB device may have additional Bulk OUT endpoints. The information about these endpoints' addresses is deduced from a data structure (the IDT) that the driver fetches from the device while probing it. These endpoints are checked in setup_channels(). A XillyUSB device never has more than one IN endpoint, as all data towards the host is multiplexed in this single Bulk IN endpoint. This is why setup_channels() only checks OUT endpoints. Reported-by: syzbot+eac39cba052f2e750dbe@syzkaller.appspotmail.com Cc: stable Closes: https://lore.kernel.org/all/0000000000001d44a6061f7a54ee@google.com/T/ Fixes: a53d1202aef1 ("char: xillybus: Add driver for XillyUSB (Xillybus variant for USB)"). Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240816070200.50695-2-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index e12d359194f8..45771b1a3716 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -1903,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1932,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1968,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1977,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } From cb2f92569a2b7809a0569b380b9a6f2afa368cb2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:17 +0300 Subject: [PATCH 1088/1523] drm/i915/display: support struct intel_atomic_state in to_intel_display() Add support for converting struct intel_atomic_state pointers to struct intel_display pointers. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ea6548ceab2f..bd290536a1b7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -2206,6 +2206,8 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) */ #define __drm_device_to_intel_display(p) \ (&to_i915(p)->display) +#define __intel_atomic_state_to_intel_display(p) \ + __drm_device_to_intel_display((p)->base.dev) #define __intel_connector_to_intel_display(p) \ __drm_device_to_intel_display((p)->base.dev) #define __intel_crtc_to_intel_display(p) \ @@ -2229,6 +2231,7 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) #define to_intel_display(p) \ _Generic(*p, \ __assoc(drm_device, p), \ + __assoc(intel_atomic_state, p), \ __assoc(intel_connector, p), \ __assoc(intel_crtc, p), \ __assoc(intel_crtc_state, p), \ From 9aec90f9e545fba64ca47c18c37c588fa0d71b22 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:18 +0300 Subject: [PATCH 1089/1523] drm/i915/display: convert intel_link_bw.c to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_link_bw.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-2-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_link_bw.c | 25 ++++++++++---------- drivers/gpu/drm/i915/display/intel_link_bw.h | 2 -- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index 5db0724b6520..e7a9b860fac6 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -25,12 +25,13 @@ void intel_link_bw_init_limits(struct intel_atomic_state *state, struct intel_link_bw_limits *limits) { + struct intel_display *display = to_intel_display(state); struct drm_i915_private *i915 = to_i915(state->base.dev); enum pipe pipe; limits->force_fec_pipes = 0; limits->bpp_limit_reached_pipes = 0; - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, intel_crtc_for_pipe(i915, pipe)); @@ -69,12 +70,12 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, u8 pipe_mask, const char *reason) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); enum pipe max_bpp_pipe = INVALID_PIPE; struct intel_crtc *crtc; int max_bpp_x16 = 0; - for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, pipe_mask) { + for_each_intel_crtc_in_pipe_mask(display->drm, crtc, pipe_mask) { struct intel_crtc_state *crtc_state; int link_bpp_x16; @@ -136,7 +137,7 @@ intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, struct intel_link_bw_limits *new_limits, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); if (pipe == INVALID_PIPE) return false; @@ -145,7 +146,7 @@ intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, old_limits->max_bpp_x16[pipe]) return false; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, new_limits->bpp_limit_reached_pipes & BIT(pipe))) return false; @@ -178,7 +179,7 @@ static int check_all_link_config(struct intel_atomic_state *state, } static bool -assert_link_limit_change_valid(struct drm_i915_private *i915, +assert_link_limit_change_valid(struct intel_display *display, const struct intel_link_bw_limits *old_limits, const struct intel_link_bw_limits *new_limits) { @@ -186,14 +187,14 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, enum pipe pipe; /* FEC can't be forced off after it was forced on. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, (old_limits->force_fec_pipes & new_limits->force_fec_pipes) != old_limits->force_fec_pipes)) return false; - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* The bpp limit can only decrease. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, new_limits->max_bpp_x16[pipe] > old_limits->max_bpp_x16[pipe])) return false; @@ -204,7 +205,7 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, } /* At least one limit must change. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !bpps_changed && new_limits->force_fec_pipes == old_limits->force_fec_pipes)) @@ -232,7 +233,7 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, int intel_link_bw_atomic_check(struct intel_atomic_state *state, struct intel_link_bw_limits *new_limits) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_link_bw_limits old_limits = *new_limits; int ret; @@ -240,7 +241,7 @@ int intel_link_bw_atomic_check(struct intel_atomic_state *state, if (ret != -EAGAIN) return ret; - if (!assert_link_limit_change_valid(i915, &old_limits, new_limits)) + if (!assert_link_limit_change_valid(display, &old_limits, new_limits)) return -EINVAL; return -EAGAIN; diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index 6b0ccfff59da..e69049cf178f 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -10,8 +10,6 @@ #include "intel_display_limits.h" -struct drm_i915_private; - struct intel_atomic_state; struct intel_crtc_state; From 1b9e8095fa3ac29e3a00ba1326976b479fd809e0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:19 +0300 Subject: [PATCH 1090/1523] drm/i915/display: convert intel_load_detect.c to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_load_detect.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-3-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_load_detect.c | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_load_detect.c b/drivers/gpu/drm/i915/display/intel_load_detect.c index d5a0aecf3e8f..b457c69dc0be 100644 --- a/drivers/gpu/drm/i915/display/intel_load_detect.c +++ b/drivers/gpu/drm/i915/display/intel_load_detect.c @@ -48,23 +48,22 @@ struct drm_atomic_state * intel_load_detect_get_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); struct intel_crtc *possible_crtc; struct intel_crtc *crtc = NULL; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_mode_config *config = &dev->mode_config; + struct drm_mode_config *config = &display->drm->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; struct intel_crtc_state *crtc_state; int ret; - drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, encoder->base.base.id, encoder->base.name); - drm_WARN_ON(dev, !drm_modeset_is_locked(&config->connection_mutex)); + drm_WARN_ON(display->drm, !drm_modeset_is_locked(&config->connection_mutex)); /* * Algorithm gets a little messy: @@ -89,7 +88,7 @@ intel_load_detect_get_pipe(struct drm_connector *connector, } /* Find an unused one (if possible) */ - for_each_intel_crtc(dev, possible_crtc) { + for_each_intel_crtc(display->drm, possible_crtc) { if (!(encoder->base.possible_crtcs & drm_crtc_mask(&possible_crtc->base))) continue; @@ -111,15 +110,15 @@ intel_load_detect_get_pipe(struct drm_connector *connector, * If we didn't find an unused CRTC, don't use any. */ if (!crtc) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "no pipe available for load-detect\n"); ret = -ENODEV; goto fail; } found: - state = drm_atomic_state_alloc(dev); - restore_state = drm_atomic_state_alloc(dev); + state = drm_atomic_state_alloc(display->drm); + restore_state = drm_atomic_state_alloc(display->drm); if (!state || !restore_state) { ret = -ENOMEM; goto fail; @@ -164,7 +163,7 @@ found: if (!ret) ret = drm_atomic_add_affected_planes(restore_state, &crtc->base); if (ret) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -172,7 +171,7 @@ found: ret = drm_atomic_commit(state); if (ret) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "failed to set mode on load-detect pipe\n"); goto fail; } @@ -204,13 +203,13 @@ void intel_load_detect_release_pipe(struct drm_connector *connector, struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_encoder *intel_encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_i915_private *i915 = to_i915(intel_encoder->base.dev); struct drm_encoder *encoder = &intel_encoder->base; int ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, encoder->base.id, encoder->name); @@ -219,7 +218,7 @@ void intel_load_detect_release_pipe(struct drm_connector *connector, ret = drm_atomic_helper_commit_duplicated_state(state, ctx); if (ret) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Couldn't release load detect pipe: %i\n", ret); drm_atomic_state_put(state); } From c6cbfc18138a22cfbffb208be92b18a531233528 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:20 +0300 Subject: [PATCH 1091/1523] drm/i915/alpm: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_alpm.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-4-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_alpm.c | 54 +++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index f4f05a859379..82ee778b2efe 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -139,7 +139,7 @@ static int _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int aux_less_wake_time, aux_less_wake_lines, silence_period, lfps_half_cycle; @@ -158,7 +158,7 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, lfps_half_cycle > PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) aux_less_wake_lines = ALPM_CTL_AUX_LESS_WAKE_TIME_MASK; intel_dp->alpm_parameters.aux_less_wake_lines = aux_less_wake_lines; @@ -171,10 +171,10 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int check_entry_lines; - if (DISPLAY_VER(i915) < 20) + if (DISPLAY_VER(display) < 20) return true; /* ALPM Entry Check = 2 + CEILING( 5us /tline ) */ @@ -187,7 +187,7 @@ static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, if (!_lnl_compute_aux_less_alpm_params(intel_dp, crtc_state)) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) check_entry_lines = 15; intel_dp->alpm_parameters.check_entry_lines = check_entry_lines; @@ -212,9 +212,9 @@ static int tgl_io_buffer_wake_time(void) static int io_buffer_wake_time(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 12) + if (DISPLAY_VER(display) >= 12) return tgl_io_buffer_wake_time(); else return skl_io_buffer_wake_time(); @@ -223,7 +223,7 @@ static int io_buffer_wake_time(const struct intel_crtc_state *crtc_state) bool intel_alpm_compute_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time; int tfw_exit_latency = 20; /* eDP spec */ int phy_wake = 4; /* eDP spec */ @@ -236,9 +236,9 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, fast_wake_time = precharge + preamble + phy_wake + tfw_exit_latency; - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) max_wake_lines = 68; - else if (DISPLAY_VER(i915) >= 12) + else if (DISPLAY_VER(display) >= 12) max_wake_lines = 12; else max_wake_lines = 8; @@ -255,7 +255,7 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, if (!_lnl_compute_alpm_params(intel_dp, crtc_state)) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) io_wake_lines = fast_wake_lines = max_wake_lines; /* According to Bspec lower limit should be set as 7 lines. */ @@ -269,7 +269,7 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int waketime_in_lines, first_sdp_position; int context_latency, guardband; @@ -277,7 +277,7 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, if (!intel_dp_is_edp(intel_dp)) return; - if (DISPLAY_VER(i915) < 20) + if (DISPLAY_VER(display) < 20) return; if (!intel_dp->as_sdp_supported) @@ -309,13 +309,13 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, static void lnl_alpm_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = dp_to_dig_port(intel_dp)->base.port; u32 alpm_ctl; - if (DISPLAY_VER(dev_priv) < 20 || (!intel_dp->psr.sel_update_enabled && - !intel_dp_is_edp(intel_dp))) + if (DISPLAY_VER(display) < 20 || + (!intel_dp->psr.sel_update_enabled && !intel_dp_is_edp(intel_dp))) return; /* @@ -329,16 +329,16 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, ALPM_CTL_AUX_LESS_SLEEP_HOLD_TIME_50_SYMBOLS | ALPM_CTL_AUX_LESS_WAKE_TIME(intel_dp->alpm_parameters.aux_less_wake_lines); - intel_de_write(dev_priv, - PORT_ALPM_CTL(dev_priv, port), + intel_de_write(display, + PORT_ALPM_CTL(display, port), PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE | PORT_ALPM_CTL_MAX_PHY_SWING_SETUP(15) | PORT_ALPM_CTL_MAX_PHY_SWING_HOLD(0) | PORT_ALPM_CTL_SILENCE_PERIOD( intel_dp->alpm_parameters.silence_period_sym_clocks)); - intel_de_write(dev_priv, - PORT_ALPM_LFPS_CTL(dev_priv, port), + intel_de_write(display, + PORT_ALPM_LFPS_CTL(display, port), PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(10) | PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION( intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | @@ -356,7 +356,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(intel_dp->alpm_parameters.check_entry_lines); - intel_de_write(dev_priv, ALPM_CTL(dev_priv, cpu_transcoder), alpm_ctl); + intel_de_write(display, ALPM_CTL(display, cpu_transcoder), alpm_ctl); } void intel_alpm_configure(struct intel_dp *intel_dp, @@ -368,14 +368,14 @@ void intel_alpm_configure(struct intel_dp *intel_dp, static int i915_edp_lobf_info_show(struct seq_file *m, void *data) { struct intel_connector *connector = m->private; - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; enum transcoder cpu_transcoder; u32 alpm_ctl; int ret; - ret = drm_modeset_lock_single_interruptible(&dev_priv->drm.mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&display->drm->mode_config.connection_mutex); if (ret) return ret; @@ -387,14 +387,14 @@ static int i915_edp_lobf_info_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); cpu_transcoder = crtc_state->cpu_transcoder; - alpm_ctl = intel_de_read(dev_priv, ALPM_CTL(dev_priv, cpu_transcoder)); + alpm_ctl = intel_de_read(display, ALPM_CTL(display, cpu_transcoder)); seq_printf(m, "LOBF status: %s\n", str_enabled_disabled(alpm_ctl & ALPM_CTL_LOBF_ENABLE)); seq_printf(m, "Aux-wake alpm status: %s\n", str_enabled_disabled(!(alpm_ctl & ALPM_CTL_ALPM_AUX_LESS_ENABLE))); seq_printf(m, "Aux-less alpm status: %s\n", str_enabled_disabled(alpm_ctl & ALPM_CTL_ALPM_AUX_LESS_ENABLE)); out: - drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + drm_modeset_unlock(&display->drm->mode_config.connection_mutex); return ret; } @@ -403,10 +403,10 @@ DEFINE_SHOW_ATTRIBUTE(i915_edp_lobf_info); void intel_alpm_lobf_debugfs_add(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct dentry *root = connector->base.debugfs_entry; - if (DISPLAY_VER(i915) < 20 || + if (DISPLAY_VER(display) < 20 || connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) return; From d0fc54a201d673013b031ebce32e5d8a7e3bfb70 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:21 +0300 Subject: [PATCH 1092/1523] drm/i915/lspcon: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_lspcon.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-5-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_lspcon.c | 115 ++++++++++---------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index 8b26354d6e53..f9db867fae89 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -79,33 +79,33 @@ static const char *lspcon_mode_name(enum drm_lspcon_mode mode) static bool lspcon_detect_vendor(struct intel_lspcon *lspcon) { - struct intel_dp *dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(dp); + struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); + struct intel_display *display = to_intel_display(intel_dp); struct drm_dp_dpcd_ident *ident; u32 vendor_oui; - if (drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd))) { - drm_err(&i915->drm, "Can't read description\n"); + if (drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd))) { + drm_err(display->drm, "Can't read description\n"); return false; } - ident = &dp->desc.ident; + ident = &intel_dp->desc.ident; vendor_oui = (ident->oui[0] << 16) | (ident->oui[1] << 8) | ident->oui[2]; switch (vendor_oui) { case LSPCON_VENDOR_MCA_OUI: lspcon->vendor = LSPCON_VENDOR_MCA; - drm_dbg_kms(&i915->drm, "Vendor: Mega Chips\n"); + drm_dbg_kms(display->drm, "Vendor: Mega Chips\n"); break; case LSPCON_VENDOR_PARADE_OUI: lspcon->vendor = LSPCON_VENDOR_PARADE; - drm_dbg_kms(&i915->drm, "Vendor: Parade Tech\n"); + drm_dbg_kms(display->drm, "Vendor: Parade Tech\n"); break; default: - drm_err(&i915->drm, "Invalid/Unknown vendor OUI\n"); + drm_err(display->drm, "Invalid/Unknown vendor OUI\n"); return false; } @@ -123,7 +123,7 @@ static u32 get_hdr_status_reg(struct intel_lspcon *lspcon) void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 hdr_caps; int ret; @@ -131,10 +131,10 @@ void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) &hdr_caps, 1); if (ret < 0) { - drm_dbg_kms(&i915->drm, "HDR capability detection failed\n"); + drm_dbg_kms(display->drm, "HDR capability detection failed\n"); lspcon->hdr_supported = false; } else if (hdr_caps & 0x1) { - drm_dbg_kms(&i915->drm, "LSPCON capable of HDR\n"); + drm_dbg_kms(display->drm, "LSPCON capable of HDR\n"); lspcon->hdr_supported = true; } } @@ -142,12 +142,12 @@ void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; struct i2c_adapter *ddc = &intel_dp->aux.ddc; if (drm_lspcon_get_mode(intel_dp->aux.drm_dev, ddc, ¤t_mode)) { - drm_dbg_kms(&i915->drm, "Error reading LSPCON mode\n"); + drm_dbg_kms(display->drm, "Error reading LSPCON mode\n"); return DRM_LSPCON_MODE_INVALID; } return current_mode; @@ -169,23 +169,23 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode mode) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; current_mode = lspcon_get_current_mode(lspcon); if (current_mode == mode) goto out; - drm_dbg_kms(&i915->drm, "Waiting for LSPCON mode %s to settle\n", + drm_dbg_kms(display->drm, "Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, lspcon_get_mode_settle_timeout(lspcon)); if (current_mode != mode) - drm_err(&i915->drm, "LSPCON mode hasn't settled\n"); + drm_err(display->drm, "LSPCON mode hasn't settled\n"); out: - drm_dbg_kms(&i915->drm, "Current LSPCON mode %s\n", + drm_dbg_kms(display->drm, "Current LSPCON mode %s\n", lspcon_mode_name(current_mode)); return current_mode; @@ -195,46 +195,46 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode mode) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int err; enum drm_lspcon_mode current_mode; struct i2c_adapter *ddc = &intel_dp->aux.ddc; err = drm_lspcon_get_mode(intel_dp->aux.drm_dev, ddc, ¤t_mode); if (err) { - drm_err(&i915->drm, "Error reading LSPCON mode\n"); + drm_err(display->drm, "Error reading LSPCON mode\n"); return err; } if (current_mode == mode) { - drm_dbg_kms(&i915->drm, "Current mode = desired LSPCON mode\n"); + drm_dbg_kms(display->drm, "Current mode = desired LSPCON mode\n"); return 0; } err = drm_lspcon_set_mode(intel_dp->aux.drm_dev, ddc, mode); if (err < 0) { - drm_err(&i915->drm, "LSPCON mode change failed\n"); + drm_err(display->drm, "LSPCON mode change failed\n"); return err; } lspcon->mode = mode; - drm_dbg_kms(&i915->drm, "LSPCON mode changed done\n"); + drm_dbg_kms(display->drm, "LSPCON mode changed done\n"); return 0; } static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 rev; if (drm_dp_dpcd_readb(&lspcon_to_intel_dp(lspcon)->aux, DP_DPCD_REV, &rev) != 1) { - drm_dbg_kms(&i915->drm, "Native AUX CH down\n"); + drm_dbg_kms(display->drm, "Native AUX CH down\n"); return false; } - drm_dbg_kms(&i915->drm, "Native AUX CH up, DPCD version: %d.%d\n", + drm_dbg_kms(display->drm, "Native AUX CH up, DPCD version: %d.%d\n", rev >> 4, rev & 0xf); return true; @@ -242,12 +242,12 @@ static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) static bool lspcon_probe(struct intel_lspcon *lspcon) { - int retry; - enum drm_dp_dual_mode_type adaptor_type; struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct i2c_adapter *ddc = &intel_dp->aux.ddc; + enum drm_dp_dual_mode_type adaptor_type; enum drm_lspcon_mode expected_mode; + int retry; expected_mode = lspcon_wake_native_aux_ch(lspcon) ? DRM_LSPCON_MODE_PCON : DRM_LSPCON_MODE_LS; @@ -263,13 +263,13 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) } if (adaptor_type != DRM_DP_DUAL_MODE_LSPCON) { - drm_dbg_kms(&i915->drm, "No LSPCON detected, found %s\n", + drm_dbg_kms(display->drm, "No LSPCON detected, found %s\n", drm_dp_get_dual_mode_type_name(adaptor_type)); return false; } /* Yay ... got a LSPCON device */ - drm_dbg_kms(&i915->drm, "LSPCON detected\n"); + drm_dbg_kms(display->drm, "LSPCON detected\n"); lspcon->mode = lspcon_wait_mode(lspcon, expected_mode); /* @@ -279,7 +279,7 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) */ if (lspcon->mode != DRM_LSPCON_MODE_PCON) { if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON) < 0) { - drm_err(&i915->drm, "LSPCON mode change to PCON failed\n"); + drm_err(display->drm, "LSPCON mode change to PCON failed\n"); return false; } } @@ -289,13 +289,13 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); unsigned long start = jiffies; while (1) { if (intel_digital_port_connected(&dig_port->base)) { - drm_dbg_kms(&i915->drm, "LSPCON recovering in PCON mode after %u ms\n", + drm_dbg_kms(display->drm, "LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; } @@ -306,7 +306,7 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) usleep_range(10000, 15000); } - drm_dbg_kms(&i915->drm, "LSPCON DP descriptor mismatch after resume\n"); + drm_dbg_kms(display->drm, "LSPCON DP descriptor mismatch after resume\n"); } static bool lspcon_parade_fw_ready(struct drm_dp_aux *aux) @@ -477,10 +477,10 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, unsigned int type, const void *frame, ssize_t len) { - bool ret = true; + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); + bool ret = true; switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -492,7 +492,7 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, frame, len); break; case HDMI_PACKET_TYPE_GAMUT_METADATA: - drm_dbg_kms(&i915->drm, "Update HDR metadata for lspcon\n"); + drm_dbg_kms(display->drm, "Update HDR metadata for lspcon\n"); /* It uses the legacy hsw implementation for the same */ hsw_write_infoframe(encoder, crtc_state, type, frame, len); break; @@ -501,7 +501,7 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, } if (!ret) { - drm_err(&i915->drm, "Failed to write infoframes\n"); + drm_err(display->drm, "Failed to write infoframes\n"); return; } } @@ -522,17 +522,17 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - ssize_t ret; - union hdmi_infoframe frame; - u8 buf[VIDEO_DIP_DATA_SIZE]; + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_i915_private *i915 = to_i915(encoder->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + union hdmi_infoframe frame; + u8 buf[VIDEO_DIP_DATA_SIZE]; + ssize_t ret; if (!lspcon->active) { - drm_err(&i915->drm, "Writing infoframes while LSPCON disabled ?\n"); + drm_err(display->drm, "Writing infoframes while LSPCON disabled ?\n"); return; } @@ -542,7 +542,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, conn_state->connector, adjusted_mode); if (ret < 0) { - drm_err(&i915->drm, "couldn't fill AVI infoframe\n"); + drm_err(display->drm, "couldn't fill AVI infoframe\n"); return; } @@ -583,7 +583,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, ret = hdmi_infoframe_pack(&frame, buf, sizeof(buf)); if (ret < 0) { - drm_err(&i915->drm, "Failed to pack AVI IF\n"); + drm_err(display->drm, "Failed to pack AVI IF\n"); return; } @@ -624,9 +624,9 @@ static bool _lspcon_read_avi_infoframe_enabled_parade(struct drm_dp_aux *aux) u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); bool infoframes_enabled; u32 val = 0; u32 mask, tmp; @@ -640,8 +640,8 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, val |= intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_AVI); if (lspcon->hdr_supported) { - tmp = intel_de_read(dev_priv, - HSW_TVIDEO_DIP_CTL(dev_priv, pipe_config->cpu_transcoder)); + tmp = intel_de_read(display, + HSW_TVIDEO_DIP_CTL(display, pipe_config->cpu_transcoder)); mask = VIDEO_DIP_ENABLE_GMP_HSW; if (tmp & mask) @@ -658,32 +658,32 @@ void lspcon_wait_pcon_mode(struct intel_lspcon *lspcon) bool lspcon_init(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_dp *intel_dp = &dig_port->dp; struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct drm_connector *connector = &intel_dp->attached_connector->base; lspcon->active = false; lspcon->mode = DRM_LSPCON_MODE_INVALID; if (!lspcon_probe(lspcon)) { - drm_err(&i915->drm, "Failed to probe lspcon\n"); + drm_err(display->drm, "Failed to probe lspcon\n"); return false; } if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd) != 0) { - drm_err(&i915->drm, "LSPCON DPCD read failed\n"); + drm_err(display->drm, "LSPCON DPCD read failed\n"); return false; } if (!lspcon_detect_vendor(lspcon)) { - drm_err(&i915->drm, "LSPCON vendor detection failed\n"); + drm_err(display->drm, "LSPCON vendor detection failed\n"); return false; } connector->ycbcr_420_allowed = true; lspcon->active = true; - drm_dbg_kms(&i915->drm, "Success: LSPCON init\n"); + drm_dbg_kms(display->drm, "Success: LSPCON init\n"); return true; } @@ -697,9 +697,8 @@ u32 intel_lspcon_infoframes_enabled(struct intel_encoder *encoder, void lspcon_resume(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *i915 = to_i915(dev); enum drm_lspcon_mode expected_mode; if (!intel_bios_encoder_is_lspcon(dig_port->base.devdata)) @@ -707,7 +706,7 @@ void lspcon_resume(struct intel_digital_port *dig_port) if (!lspcon->active) { if (!lspcon_init(dig_port)) { - drm_err(&i915->drm, "LSPCON init failed on port %c\n", + drm_err(display->drm, "LSPCON init failed on port %c\n", port_name(dig_port->base.port)); return; } @@ -724,7 +723,7 @@ void lspcon_resume(struct intel_digital_port *dig_port) return; if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON)) - drm_err(&i915->drm, "LSPCON resume failed\n"); + drm_err(display->drm, "LSPCON resume failed\n"); else - drm_dbg_kms(&i915->drm, "LSPCON resume success\n"); + drm_dbg_kms(display->drm, "LSPCON resume success\n"); } From 6276706f33cc38ec59ebf03b680240ff74740c8a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:22 +0300 Subject: [PATCH 1093/1523] drm/i915/display: convert dp aux backlight to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_dp_aux_backlight.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-6-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_dp_aux_backlight.c | 70 +++++++++++-------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 8ce60d53dcde..33f72db99b58 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -109,7 +109,7 @@ static bool is_intel_tcon_cap(const u8 tcon_cap[4]) static bool intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct drm_dp_aux *aux = &intel_dp->aux; struct intel_panel *panel = &connector->panel; @@ -122,7 +122,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) if (ret != sizeof(tcon_cap)) return false; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Detected %s HDR backlight interface version %d\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Detected %s HDR backlight interface version %d\n", connector->base.base.id, connector->base.name, is_intel_tcon_cap(tcon_cap) ? "Intel" : "unsupported", tcon_cap[0]); @@ -141,10 +142,10 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) * HDR static metadata we need to start maintaining table of * ranges for such panels. */ - if (i915->display.params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && + if (display->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type & BIT(HDMI_STATIC_METADATA_TYPE1))) { - drm_info(&i915->drm, + drm_info(display->drm, "[CONNECTOR:%d:%s] Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n", connector->base.base.id, connector->base.name, INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL); @@ -170,14 +171,15 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) static u32 intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 tmp; u8 buf[2] = {}; if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) != 1) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read current backlight mode from DPCD\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read current backlight mode from DPCD\n", connector->base.base.id, connector->base.name); return 0; } @@ -195,7 +197,8 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, sizeof(buf)) != sizeof(buf)) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read brightness from DPCD\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read brightness from DPCD\n", connector->base.base.id, connector->base.name); return 0; } @@ -253,8 +256,8 @@ static void intel_dp_aux_write_content_luminance(struct intel_connector *connector, struct hdr_output_metadata *hdr_metadata) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - struct drm_i915_private *i915 = to_i915(connector->base.dev); int ret; u8 buf[4]; @@ -270,7 +273,7 @@ intel_dp_aux_write_content_luminance(struct intel_connector *connector, INTEL_EDP_HDR_CONTENT_LUMINANCE, buf, sizeof(buf)); if (ret < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Content Luminance DPCD reg write failed, err:-%d\n", ret); } @@ -280,7 +283,7 @@ intel_dp_aux_fill_hdr_tcon_params(const struct drm_connector_state *conn_state, { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); /* * According to spec segmented backlight needs to be set whenever panel is in @@ -291,7 +294,7 @@ intel_dp_aux_fill_hdr_tcon_params(const struct drm_connector_state *conn_state, *ctrl |= INTEL_EDP_HDR_TCON_2084_DECODE_ENABLE; } - if (DISPLAY_VER(i915) < 11) + if (DISPLAY_VER(display) < 11) *ctrl &= ~INTEL_EDP_HDR_TCON_TONE_MAPPING_ENABLE; if (panel->backlight.edp.intel_cap.supports_2020_gamut && @@ -311,9 +314,9 @@ static void intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state, u32 level) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct hdr_output_metadata *hdr_metadata; int ret; @@ -323,7 +326,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); if (ret != 1) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read current backlight control mode: %d\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read current backlight control mode: %d\n", connector->base.base.id, connector->base.name, ret); return; } @@ -346,7 +350,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, if (ctrl != old_ctrl && drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) != 1) - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to configure DPCD brightness controls\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to configure DPCD brightness controls\n", connector->base.base.id, connector->base.name); if (intel_dp_in_hdr_mode(conn_state)) { @@ -377,7 +382,7 @@ static const char *dpcd_vs_pwm_str(bool aux) static void intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); int ret; @@ -392,7 +397,7 @@ intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) INTEL_EDP_HDR_PANEL_LUMINANCE_OVERRIDE, buf, sizeof(buf)); if (ret < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Panel Luminance DPCD reg write failed, err:-%d\n", ret); } @@ -400,20 +405,21 @@ intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) static int intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct drm_luminance_range_info *luminance_range = &connector->base.display_info.luminance_range; int ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDR backlight is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] SDR backlight is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.intel_cap.sdr_uses_aux)); if (!panel->backlight.edp.intel_cap.sdr_uses_aux) { ret = panel->backlight.pwm_funcs->setup(connector, pipe); if (ret < 0) { - drm_err(&i915->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s] Failed to setup SDR backlight controls through PWM: %d\n", connector->base.base.id, connector->base.name, ret); return ret; @@ -430,7 +436,8 @@ intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pi intel_dp_aux_write_panel_luminance_override(connector); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Using AUX HDR interface for backlight control (range %d..%d)\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Using AUX HDR interface for backlight control (range %d..%d)\n", connector->base.base.id, connector->base.name, panel->backlight.min, panel->backlight.max); @@ -501,9 +508,9 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); u16 current_level; u8 current_mode; int ret; @@ -514,17 +521,19 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, if (ret < 0) return ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_enable)); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_set)); if (!panel->backlight.edp.vesa.info.aux_set || !panel->backlight.edp.vesa.info.aux_enable) { ret = panel->backlight.pwm_funcs->setup(connector, pipe); if (ret < 0) { - drm_err(&i915->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s] Failed to setup PWM backlight controls for eDP backlight: %d\n", connector->base.base.id, connector->base.name, ret); return ret; @@ -553,7 +562,8 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, } } - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Using AUX VESA interface for backlight control\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Using AUX VESA interface for backlight control\n", connector->base.base.id, connector->base.name); return 0; @@ -562,11 +572,12 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, static bool intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) { - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX Backlight Control Supported!\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX Backlight Control Supported!\n", connector->base.base.id, connector->base.name); return true; } @@ -591,16 +602,15 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = { int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); struct drm_device *dev = connector->base.dev; struct intel_panel *panel = &connector->panel; - struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); bool try_intel_interface = false, try_vesa_interface = false; /* Check the VBT and user's module parameters to figure out which * interfaces to probe */ - switch (i915->display.params.enable_dpcd_backlight) { + switch (display->params.enable_dpcd_backlight) { case INTEL_DP_AUX_BACKLIGHT_OFF: return -ENODEV; case INTEL_DP_AUX_BACKLIGHT_AUTO: From 7d085bb17e3cab501218a7a4604aff6c4a8b9585 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 19:41:23 +0300 Subject: [PATCH 1094/1523] drm/i915/hti: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_hti.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240813164123.2674462-7-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- .../drm/i915/display/intel_display_driver.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 3 ++- drivers/gpu/drm/i915/display/intel_hti.c | 20 +++++++++---------- drivers/gpu/drm/i915/display/intel_hti.h | 8 ++++---- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 926cf3751593..25ff3ff0ab95 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4900,7 +4900,7 @@ void intel_ddi_init(struct intel_display *display, * driver. In that case we should skip initializing the corresponding * outputs. */ - if (intel_hti_uses_phy(dev_priv, phy)) { + if (intel_hti_uses_phy(display, phy)) { drm_dbg_kms(&dev_priv->drm, "PORT %c / PHY %c reserved by HTI\n", port_name(port), phy_name(phy)); return; diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 328d8b5a6b66..eced20d2ce6e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -453,7 +453,7 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) if (i915->display.cdclk.max_cdclk_freq == 0) intel_update_max_cdclk(i915); - intel_hti_init(i915); + intel_hti_init(display); /* Just disable it once at startup */ intel_vga_disable(i915); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 292d163036b1..f490b2157828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3339,6 +3339,7 @@ static int icl_get_combo_phy_dpll(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -3379,7 +3380,7 @@ static int icl_get_combo_phy_dpll(struct intel_atomic_state *state, } /* Eliminate DPLLs from consideration if reserved by HTI */ - dpll_mask &= ~intel_hti_dpll_mask(i915); + dpll_mask &= ~intel_hti_dpll_mask(display); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, diff --git a/drivers/gpu/drm/i915/display/intel_hti.c b/drivers/gpu/drm/i915/display/intel_hti.c index a92d008d4e6e..19d1f196d9fb 100644 --- a/drivers/gpu/drm/i915/display/intel_hti.c +++ b/drivers/gpu/drm/i915/display/intel_hti.c @@ -9,33 +9,33 @@ #include "intel_hti.h" #include "intel_hti_regs.h" -void intel_hti_init(struct drm_i915_private *i915) +void intel_hti_init(struct intel_display *display) { /* * If the platform has HTI, we need to find out whether it has reserved * any display resources before we create our display outputs. */ - if (DISPLAY_INFO(i915)->has_hti) - i915->display.hti.state = intel_de_read(i915, HDPORT_STATE); + if (DISPLAY_INFO(display)->has_hti) + display->hti.state = intel_de_read(display, HDPORT_STATE); } -bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy) +bool intel_hti_uses_phy(struct intel_display *display, enum phy phy) { - if (drm_WARN_ON(&i915->drm, phy == PHY_NONE)) + if (drm_WARN_ON(display->drm, phy == PHY_NONE)) return false; - return i915->display.hti.state & HDPORT_ENABLED && - i915->display.hti.state & HDPORT_DDI_USED(phy); + return display->hti.state & HDPORT_ENABLED && + display->hti.state & HDPORT_DDI_USED(phy); } -u32 intel_hti_dpll_mask(struct drm_i915_private *i915) +u32 intel_hti_dpll_mask(struct intel_display *display) { - if (!(i915->display.hti.state & HDPORT_ENABLED)) + if (!(display->hti.state & HDPORT_ENABLED)) return 0; /* * Note: This is subtle. The values must coincide with what's defined * for the platform. */ - return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->display.hti.state); + return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, display->hti.state); } diff --git a/drivers/gpu/drm/i915/display/intel_hti.h b/drivers/gpu/drm/i915/display/intel_hti.h index 2893d6668657..b692571c5558 100644 --- a/drivers/gpu/drm/i915/display/intel_hti.h +++ b/drivers/gpu/drm/i915/display/intel_hti.h @@ -8,11 +8,11 @@ #include -struct drm_i915_private; +struct intel_display; enum phy; -void intel_hti_init(struct drm_i915_private *i915); -bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy); -u32 intel_hti_dpll_mask(struct drm_i915_private *i915); +void intel_hti_init(struct intel_display *display); +bool intel_hti_uses_phy(struct intel_display *display, enum phy phy); +u32 intel_hti_dpll_mask(struct intel_display *display); #endif /* __INTEL_HTI_H__ */ From fbc64aafc99baabe4d4e4d5a58d6113aad7909c7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 15 Aug 2024 15:00:02 +0300 Subject: [PATCH 1095/1523] drm/i915: make intel_display_power_domain_str() static The function isn't used outside of intel_display_power.c. Make it static. Suggested-by: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240815120002.3472727-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/display/intel_display_power.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index cf8b38f2ebf5..39ab3117265c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -36,7 +36,7 @@ for_each_power_well_reverse(__dev_priv, __power_well) \ for_each_if(test_bit((__domain), (__power_well)->domains.bits)) -const char * +static const char * intel_display_power_domain_str(enum intel_display_power_domain domain) { switch (domain) { diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 0962f6aaeee6..425452c5a469 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -183,9 +183,6 @@ void intel_display_power_resume(struct drm_i915_private *i915); void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state); -const char * -intel_display_power_domain_str(enum intel_display_power_domain domain); - bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, From 8d019b15ddd55d6dc5685b1f51902c4aa8e01939 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:54 +0300 Subject: [PATCH 1096/1523] selftests: net: local_termination: refactor macvlan creation/deletion This will be used in other subtests as well; make new macvlan_create() and macvlan_destroy() functions. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..36f3d577d0be 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -247,19 +247,29 @@ bridge_destroy() ip link del br0 } +macvlan_create() +{ + local lower=$1 + + ip link add link $lower name macvlan0 type macvlan mode private + ip link set macvlan0 address $MACVLAN_ADDR + ip link set macvlan0 up +} + +macvlan_destroy() +{ + ip link del macvlan0 +} + standalone() { h1_create h2_create - - ip link add link $h2 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + macvlan_create $h2 run_test $h2 - ip link del macvlan0 - + macvlan_destroy h2_destroy h1_destroy } @@ -268,15 +278,11 @@ bridge() { h1_create bridge_create - - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + macvlan_create br0 run_test br0 - ip link del macvlan0 - + macvlan_destroy bridge_destroy h1_destroy } From 4261fa35185c0112acca0496d3732c8fcfe1dcf2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:55 +0300 Subject: [PATCH 1097/1523] selftests: net: local_termination: parameterize sending interface In future changes we will want to subject the DUT, $h2, to additional VLAN-tagged traffic. For that, we need to run the tests using $h1.100 as a sending interface, rather than the currently hardcoded $h1. Add a parameter to run_test() and modify its 2 callers to explicitly pass $h1, as was implicit before. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 36f3d577d0be..92f0e242d119 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -104,44 +104,45 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name sleep 1 @@ -267,7 +268,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h2 + run_test $h1 $h2 macvlan_destroy h2_destroy @@ -280,7 +281,7 @@ bridge() bridge_create macvlan_create br0 - run_test br0 + run_test $h1 br0 macvlan_destroy bridge_destroy From df7cf5cc551c7c0a92520e91e1184993784c6386 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:56 +0300 Subject: [PATCH 1098/1523] selftests: net: local_termination: parameterize test name There are upcoming tests which verify the RX filtering of a bridge (or bridge port), but under differing vlan_filtering conditions. Since we currently print $h2 (the DUT) in the log_test() output, it becomes necessary to make a further distinction between tests, to not give the user the impression that the exact same thing is run twice. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 92f0e242d119..af284edaf401 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -68,10 +68,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +82,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -106,6 +107,7 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local test_name="$1"; shift local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) @@ -150,61 +152,61 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name \ "Unicast IPv4 to unknown MAC address, allmulti" \ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" tcpdump_cleanup $rcv_if_name } From 5b8e74182ed3d4f1c38c626e6120275ca9d92bee Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:57 +0300 Subject: [PATCH 1099/1523] selftests: net: local_termination: add one more test for VLAN-aware bridges The current bridge() test is for packet reception on a VLAN-unaware bridge. Some things are different enough with VLAN-aware bridges that it's worth renaming this test into vlan_unaware_bridge(), and add a new vlan_aware_bridge() test. The two will share the same implementation: bridge() becomes a common function, which receives $vlan_filtering as an argument. Rename it to test_bridge() at the same time, because just bridge() pollutes the global namespace and we cannot invoke the binary with the same name from the iproute2 package currently. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index af284edaf401..5aa364b40e33 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -233,7 +233,9 @@ h2_destroy() bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up @@ -277,10 +279,12 @@ standalone() h1_destroy } -bridge() +test_bridge() { + local vlan_filtering=$1 + h1_create - bridge_create + bridge_create $vlan_filtering macvlan_create br0 run_test $h1 br0 @@ -290,6 +294,16 @@ bridge() h1_destroy } +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + cleanup() { pre_cleanup From 5fea8bb009744bbb90b3f6ca41c558429ee4c849 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:58 +0300 Subject: [PATCH 1100/1523] selftests: net: local_termination: introduce new tests which capture VLAN behavior Add more coverage to the local termination selftest as follows: - 8021q upper of $h2 - 8021q upper of $h2, where $h2 is a port of a VLAN-unaware bridge - 8021q upper of $h2, where $h2 is a port of a VLAN-aware bridge - 8021q upper of VLAN-unaware br0, which is the upper of $h2 - 8021q upper of VLAN-aware br0, which is the upper of $h2 Especially the cases with traffic sent through the VLAN upper of a VLAN-aware bridge port will be immediately relevant when we will start transmitting PTP packets as an additional kind of traffic. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 117 ++++++++++++++++-- 1 file changed, 110 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 5aa364b40e33..e22c6a693bef 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -231,6 +233,30 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { local vlan_filtering=$1 @@ -241,14 +267,10 @@ bridge_create() ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } @@ -272,7 +294,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h1 $h2 + run_test $h1 $h2 "$h2" macvlan_destroy h2_destroy @@ -285,11 +307,13 @@ test_bridge() h1_create bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 + run_test $h1 br0 "vlan_filtering=$vlan_filtering bridge" macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 bridge_destroy h1_destroy } @@ -304,6 +328,85 @@ vlan_aware_bridge() test_bridge 1 } +test_vlan() +{ + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local vlan_filtering=$1 + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy + bridge_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local vlan_filtering=$1 + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 +} + cleanup() { pre_cleanup From 9aa3749ca4a880c1a59720aab3eacf344ed8d68d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:59 +0300 Subject: [PATCH 1101/1523] selftests: net: local_termination: don't use xfail_on_veth() xfail_on_veth() for this test is an incorrect approximation which gives false positives and false negatives. When local_termination fails with "reception succeeded, but should have failed", it is because the DUT ($h2) accepts packets even when not configured as promiscuous. This is not something specific to veth; even the bridge behaves that way, but this is not captured by the xfail_on_veth test. The IFF_UNICAST_FLT flag is not explicitly exported to user space, but it can somewhat be determined from the interface's behavior. We have to create a macvlan upper with a different MAC address. This forces a dev_uc_add() call in the kernel. When the unicast filtering list is not empty, but the device doesn't support IFF_UNICAST_FLT, __dev_set_rx_mode() force-enables promiscuity on the interface, to ensure correct behavior (that the requested address is received). We can monitor the change in the promiscuity flag and infer from it whether the device supports unicast filtering. There is no equivalent thing for allmulti, unfortunately. We never know what's hiding behind a device which has allmulti=off. Whether it will actually perform RX multicast filtering of unknown traffic is a strong "maybe". The bridge driver, for example, completely ignores the flag. We'll have to keep the xfail behavior, but instead of XFAIL on just veth, always XFAIL. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 57 ++++++++++++++++++ .../net/forwarding/local_termination.sh | 58 ++++++++++++++----- 2 files changed, 99 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff..718d04a4f72d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -1113,6 +1118,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2229,3 +2267,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index e22c6a693bef..80ea4c10d764 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -109,9 +109,11 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local no_unicast_flt=$1; shift local test_name="$1"; shift local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name @@ -160,26 +162,26 @@ run_test() "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false "$test_name" + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false "$test_name" + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ @@ -197,7 +199,7 @@ run_test() "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ false "$test_name" @@ -290,11 +292,17 @@ macvlan_destroy() standalone() { + local no_unicast_flt=true + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_create h2_create macvlan_create $h2 - run_test $h1 $h2 "$h2" + run_test $h1 $h2 $no_unicast_flt "$h2" macvlan_destroy h2_destroy @@ -303,6 +311,7 @@ standalone() test_bridge() { + local no_unicast_flt=true local vlan_filtering=$1 h1_create @@ -310,7 +319,7 @@ test_bridge() simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 "vlan_filtering=$vlan_filtering bridge" + run_test $h1 br0 $no_unicast_flt "vlan_filtering=$vlan_filtering bridge" macvlan_destroy simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 @@ -330,11 +339,17 @@ vlan_aware_bridge() test_vlan() { + local no_unicast_flt=true + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_vlan_create h2_vlan_create macvlan_create $h2.100 - run_test $h1.100 $h2.100 "VLAN upper" + run_test $h1.100 $h2.100 $no_unicast_flt "VLAN upper" macvlan_destroy h2_vlan_destroy @@ -343,14 +358,23 @@ test_vlan() vlan_over_bridged_port() { + local no_unicast_flt=true local vlan_filtering=$1 + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + h1_vlan_create h2_vlan_create bridge_create $vlan_filtering macvlan_create $h2.100 - run_test $h1.100 $h2.100 "VLAN over vlan_filtering=$vlan_filtering bridged port" + run_test $h1.100 $h2.100 $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" macvlan_destroy bridge_destroy @@ -370,6 +394,7 @@ vlan_over_vlan_aware_bridged_port() vlan_over_bridge() { + local no_unicast_flt=true local vlan_filtering=$1 h1_vlan_create @@ -383,7 +408,8 @@ vlan_over_bridge() bridge vlan add dev br0 vid 100 self fi - run_test $h1.100 br0.100 "VLAN over vlan_filtering=$vlan_filtering bridge" + run_test $h1.100 br0.100 $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" if [ $vlan_filtering = 1 ]; then bridge vlan del dev br0 vid 100 self From 237979504264912a9797dabc0db35126e705fe0d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:00 +0300 Subject: [PATCH 1102/1523] selftests: net: local_termination: add PTP frames to the mix A breakage in the felix DSA driver shows we do not have enough test coverage. More generally, it is sufficiently special that it is likely drivers will treat it differently. This is not meant to be a full PTP test, it just makes sure that PTP packets sent to the different addresses corresponding to their profiles are received correctly. The local_termination selftest seemed like the most appropriate place for this addition. PTP RX/TX in some cases makes no sense (over a bridge) and this is why $skip_ptp exists. And in others - PTP over a bridge port - the IP stack needs convincing through the available bridge netfilter hooks to leave the PTP packets alone and not stolen by the bridge rx_handler. It is safe to assume that users have that figured out already. This is a driver level test, and by using tcpdump, all that extra setup is out of scope here. send_non_ip() was an unfinished idea; written but never used. Replace it with a more generic send_raw(), and send 3 PTP packet types times 3 transports. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/forwarding/local_termination.sh | 161 ++++++++++++++++-- 1 file changed, 148 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 80ea4c10d764..648868f74604 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -39,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -49,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + pkt="${pkt/00:00:de:ad:be:ef/$smac}" + + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -109,6 +170,7 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local skip_ptp=$1; shift local no_unicast_flt=$1; shift local test_name="$1"; shift local smac=$(mac_get $send_if_name) @@ -150,6 +212,35 @@ run_test() mc_route_destroy $rcv_if_name mc_route_destroy $send_if_name + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi + sleep 1 tcpdump_stop $rcv_if_name @@ -212,6 +303,44 @@ run_test() "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ true "$test_name" + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi + tcpdump_cleanup $rcv_if_name } @@ -293,6 +422,7 @@ macvlan_destroy() standalone() { local no_unicast_flt=true + local skip_ptp=false if [ $(has_unicast_flt $h2) = yes ]; then no_unicast_flt=false @@ -302,7 +432,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h1 $h2 $no_unicast_flt "$h2" + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" macvlan_destroy h2_destroy @@ -313,13 +443,15 @@ test_bridge() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=true h1_create bridge_create $vlan_filtering simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 $no_unicast_flt "vlan_filtering=$vlan_filtering bridge" + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" macvlan_destroy simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 @@ -340,6 +472,7 @@ vlan_aware_bridge() test_vlan() { local no_unicast_flt=true + local skip_ptp=false if [ $(has_unicast_flt $h2) = yes ]; then no_unicast_flt=false @@ -349,7 +482,7 @@ test_vlan() h2_vlan_create macvlan_create $h2.100 - run_test $h1.100 $h2.100 $no_unicast_flt "VLAN upper" + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" macvlan_destroy h2_vlan_destroy @@ -360,6 +493,7 @@ vlan_over_bridged_port() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=false # br_manage_promisc() will not force a single vlan_filtering port to # promiscuous mode, so we should still expect unicast filtering to take @@ -373,7 +507,7 @@ vlan_over_bridged_port() bridge_create $vlan_filtering macvlan_create $h2.100 - run_test $h1.100 $h2.100 $no_unicast_flt \ + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ "VLAN over vlan_filtering=$vlan_filtering bridged port" macvlan_destroy @@ -396,6 +530,7 @@ vlan_over_bridge() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=true h1_vlan_create bridge_create $vlan_filtering @@ -408,7 +543,7 @@ vlan_over_bridge() bridge vlan add dev br0 vid 100 self fi - run_test $h1.100 br0.100 $no_unicast_flt \ + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ "VLAN over vlan_filtering=$vlan_filtering bridge" if [ $vlan_filtering = 1 ]; then From e29b82ef27616777e21c07dc263a8769cbdaa358 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:01 +0300 Subject: [PATCH 1103/1523] selftests: net: bridge_vlan_aware: test that other TPIDs are seen as untagged The bridge VLAN implementation w.r.t. VLAN protocol is described in merge commit 1a0b20b25732 ("Merge branch 'bridge-next'"). We are only sensitive to those VLAN tags whose TPID is equal to the bridge's vlan_protocol. Thus, an 802.1ad VLAN should be treated as 802.1Q-untagged. Add 3 tests which validate that: - 802.1ad-tagged traffic is learned into the PVID of an 802.1Q-aware bridge - Double-tagged traffic is forwarded when just the PVID of the port is present in the VLAN group of the ports - Double-tagged traffic is not forwarded when the PVID of the port is absent from the VLAN group of the ports The test passes with both veth and ocelot. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/forwarding/bridge_vlan_aware.sh | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..90f8a244ea90 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare From 67c3ca2c5cfe6a50772514e3349b5e7b3b0fac03 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:02 +0300 Subject: [PATCH 1104/1523] net: mscc: ocelot: use ocelot_xmit_get_vlan_info() also for FDMA and register injection Problem description ------------------- On an NXP LS1028A (felix DSA driver) with the following configuration: - ocelot-8021q tagging protocol - VLAN-aware bridge (with STP) spanning at least swp0 and swp1 - 8021q VLAN upper interfaces on swp0 and swp1: swp0.700, swp1.700 - ptp4l on swp0.700 and swp1.700 we see that the ptp4l instances do not see each other's traffic, and they all go to the grand master state due to the ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES condition. Jumping to the conclusion for the impatient ------------------------------------------- There is a zero-day bug in the ocelot switchdev driver in the way it handles VLAN-tagged packet injection. The correct logic already exists in the source code, in function ocelot_xmit_get_vlan_info() added by commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). But it is used only for normal NPI-based injection with the DSA "ocelot" tagging protocol. The other injection code paths (register-based and FDMA-based) roll their own wrong logic. This affects and was noticed on the DSA "ocelot-8021q" protocol because it uses register-based injection. By moving ocelot_xmit_get_vlan_info() to a place that's common for both the DSA tagger and the ocelot switch library, it can also be called from ocelot_port_inject_frame() in ocelot.c. We need to touch the lines with ocelot_ifh_port_set()'s prototype anyway, so let's rename it to something clearer regarding what it does, and add a kernel-doc. ocelot_ifh_set_basic() should do. Investigation notes ------------------- Debugging reveals that PTP event (aka those carrying timestamps, like Sync) frames injected into swp0.700 (but also swp1.700) hit the wire with two VLAN tags: 00000000: 01 1b 19 00 00 00 00 01 02 03 04 05 81 00 02 bc ~~~~~~~~~~~ 00000010: 81 00 02 bc 88 f7 00 12 00 2c 00 00 02 00 00 00 ~~~~~~~~~~~ 00000020: 00 00 00 00 00 00 00 00 00 00 00 01 02 ff fe 03 00000030: 04 05 00 01 00 04 00 00 00 00 00 00 00 00 00 00 00000040: 00 00 The second (unexpected) VLAN tag makes felix_check_xtr_pkt() -> ptp_classify_raw() fail to see these as PTP packets at the link partner's receiving end, and return PTP_CLASS_NONE (because the BPF classifier is not written to expect 2 VLAN tags). The reason why packets have 2 VLAN tags is because the transmission code treats VLAN incorrectly. Neither ocelot switchdev, nor felix DSA, declare the NETIF_F_HW_VLAN_CTAG_TX feature. Therefore, at xmit time, all VLANs should be in the skb head, and none should be in the hwaccel area. This is done by: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; } But ocelot_port_inject_frame() handles things incorrectly: ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); void ocelot_ifh_port_set(struct sk_buff *skb, void *ifh, int port, u32 rew_op) { (...) if (vlan_tag) ocelot_ifh_set_vlan_tci(ifh, vlan_tag); (...) } The way __vlan_hwaccel_push_inside() pushes the tag inside the skb head is by calling: static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { skb->vlan_present = 0; } which does _not_ zero out skb->vlan_tci as seen by skb_vlan_tag_get(). This means that ocelot, when it calls skb_vlan_tag_get(), sees (and uses) a residual skb->vlan_tci, while the same VLAN tag is _already_ in the skb head. The trivial fix for double VLAN headers is to replace the content of ocelot_ifh_port_set() with: if (skb_vlan_tag_present(skb)) ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb)); but this would not be correct either, because, as mentioned, vlan_hw_offload_capable() is false for us, so we'd be inserting dead code and we'd always transmit packets with VID=0 in the injection frame header. I can't actually test the ocelot switchdev driver and rely exclusively on code inspection, but I don't think traffic from 8021q uppers has ever been injected properly, and not double-tagged. Thus I'm blaming the introduction of VLAN fields in the injection header - early driver code. As hinted at in the early conclusion, what we _want_ to happen for VLAN transmission was already described once in commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). ocelot_xmit_get_vlan_info() intends to ensure that if the port through which we're transmitting is under a VLAN-aware bridge, the outer VLAN tag from the skb head is stripped from there and inserted into the injection frame header (so that the packet is processed in hardware through that actual VLAN). And in all other cases, the packet is sent with VID=0 in the injection frame header, since the port is VLAN-unaware and has logic to strip this VID on egress (making it invisible to the wire). Fixes: 08d02364b12f ("net: mscc: fix the injection header") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 29 +++++++++++---- drivers/net/ethernet/mscc/ocelot_fdma.c | 2 +- include/linux/dsa/ocelot.h | 47 +++++++++++++++++++++++++ include/soc/mscc/ocelot.h | 3 +- net/dsa/tag_ocelot.c | 37 ++----------------- 5 files changed, 75 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ed2fb44500b0..69a4e5a90475 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1193,17 +1193,34 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_can_inject); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag) +/** + * ocelot_ifh_set_basic - Set basic information in Injection Frame Header + * @ifh: Pointer to Injection Frame Header memory + * @ocelot: Switch private data structure + * @port: Egress port number + * @rew_op: Egress rewriter operation for PTP + * @skb: Pointer to socket buffer (packet) + * + * Populate the Injection Frame Header with basic information for this skb: the + * analyzer bypass bit, destination port, VLAN info, egress rewriter info. + */ +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + u64 vlan_tci, tag_type; + + ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, + &tag_type); + ocelot_ifh_set_bypass(ifh, 1); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); - ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C); - if (vlan_tag) - ocelot_ifh_set_vlan_tci(ifh, vlan_tag); + ocelot_ifh_set_tag_type(ifh, tag_type); + ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) ocelot_ifh_set_rew_op(ifh, rew_op); } -EXPORT_SYMBOL(ocelot_ifh_port_set); +EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) @@ -1214,7 +1231,7 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); for (i = 0; i < OCELOT_TAG_LEN / 4; i++) ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 312a46832154..87b59cc5e441 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -666,7 +666,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); memset(ifh, 0, OCELOT_TAG_LEN); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; } diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index dca2969015d8..6fbfbde68a37 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,8 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include +#include #include #include #include @@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) return rew_op; } +/** + * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame + * @skb: Pointer to socket buffer + * @br: Pointer to bridge device that the port is under, if any + * @vlan_tci: + * @tag_type: + * + * If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not + * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). + * Anyway, VID 0 is fine because it is stripped on egress for these port modes, + * and source address learning is not performed for packets injected from the + * CPU anyway, so it doesn't matter that the VID is "wrong". + */ +static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, + struct net_device *br, + u64 *vlan_tci, u64 *tag_type) +{ + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + vlan_remove_tag(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + #endif diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a37b29f4b4c..ed18e6bafc8d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -969,7 +969,8 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index e0e4300bfbd3..bf6608fc6be7 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -8,40 +8,6 @@ #define OCELOT_NAME "ocelot" #define SEVILLE_NAME "seville" -/* If the port is under a VLAN-aware bridge, remove the VLAN header from the - * payload and move it into the DSA tag, which will make the switch classify - * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, - * which is the pvid of standalone and VLAN-unaware bridge ports. - */ -static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, - u64 *vlan_tci, u64 *tag_type) -{ - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct vlan_ethhdr *hdr; - u16 proto, tci; - - if (!br || !br_vlan_enabled(br)) { - *vlan_tci = 0; - *tag_type = IFH_TAG_TYPE_C; - return; - } - - hdr = skb_vlan_eth_hdr(skb); - br_vlan_get_proto(br, &proto); - - if (ntohs(hdr->h_vlan_proto) == proto) { - vlan_remove_tag(skb, &tci); - *vlan_tci = tci; - } else { - rcu_read_lock(); - br_vlan_get_pvid_rcu(br, &tci); - rcu_read_unlock(); - *vlan_tci = tci; - } - - *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; -} - static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { @@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, u32 rew_op = 0; u64 qos_class; - ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci, + &tag_type); qos_class = netdev_get_num_tc(netdev) ? netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; From e1b9e80236c540fa85d76e2d510d1b38e1968c5d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:03 +0300 Subject: [PATCH 1105/1523] net: mscc: ocelot: fix QoS class for injected packets with "ocelot-8021q" There are 2 distinct code paths (listed below) in the source code which set up an injection header for Ocelot(-like) switches. Code path (2) lacks the QoS class and source port being set correctly. Especially the improper QoS classification is a problem for the "ocelot-8021q" alternative DSA tagging protocol, because we support tc-taprio and each packet needs to be scheduled precisely through its time slot. This includes PTP, which is normally assigned to a traffic class other than 0, but would be sent through TC 0 nonetheless. The code paths are: (1) ocelot_xmit_common() from net/dsa/tag_ocelot.c - called only by the standard "ocelot" DSA tagging protocol which uses NPI-based injection - sets up bit fields in the tag manually to account for a small difference (destination port offset) between Ocelot and Seville. Namely, ocelot_ifh_set_dest() is omitted out of ocelot_xmit_common(), because there's also seville_ifh_set_dest(). (2) ocelot_ifh_set_basic(), called by: - ocelot_fdma_prepare_skb() for FDMA transmission of the ocelot switchdev driver - ocelot_port_xmit() -> ocelot_port_inject_frame() for register-based transmission of the ocelot switchdev driver - felix_port_deferred_xmit() -> ocelot_port_inject_frame() for the DSA tagger ocelot-8021q when it must transmit PTP frames (also through register-based injection). sets the bit fields according to its own logic. The problem is that (2) doesn't call ocelot_ifh_set_qos_class(). Copying that logic from ocelot_xmit_common() fixes that. Unfortunately, although desirable, it is not easily possible to de-duplicate code paths (1) and (2), and make net/dsa/tag_ocelot.c directly call ocelot_ifh_set_basic()), because of the ocelot/seville difference. This is the "minimal" fix with some logic duplicated (but at least more consolidated). Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 10 +++++++++- drivers/net/ethernet/mscc/ocelot_fdma.c | 1 - 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 69a4e5a90475..9301716e21d5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1208,13 +1208,21 @@ void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, u32 rew_op, struct sk_buff *skb) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct net_device *dev = skb->dev; u64 vlan_tci, tag_type; + int qos_class; ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, &tag_type); + qos_class = netdev_get_num_tc(dev) ? + netdev_get_prio_tc_map(dev, skb->priority) : skb->priority; + + memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(ifh, 1); + ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports)); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); + ocelot_ifh_set_qos_class(ifh, qos_class); ocelot_ifh_set_tag_type(ifh, tag_type); ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) @@ -1225,7 +1233,7 @@ EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) { - u32 ifh[OCELOT_TAG_LEN / 4] = {0}; + u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 87b59cc5e441..00326ae8c708 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -665,7 +665,6 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); - memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; From c5e12ac3beb0dd3a718296b2d8af5528e9ab728e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:04 +0300 Subject: [PATCH 1106/1523] net: mscc: ocelot: serialize access to the injection/extraction groups As explained by Horatiu Vultur in commit 603ead96582d ("net: sparx5: Add spinlock for frame transmission from CPU") which is for a similar hardware design, multiple CPUs can simultaneously perform injection or extraction. There are only 2 register groups for injection and 2 for extraction, and the driver only uses one of each. So we'd better serialize access using spin locks, otherwise frame corruption is possible. Note that unlike in sparx5, FDMA in ocelot does not have this issue because struct ocelot_fdma_tx_ring already contains an xmit_lock. I guess this is mostly a problem for NXP LS1028A, as that is dual core. I don't think VSC7514 is. So I'm blaming the commit where LS1028A (aka the felix DSA driver) started using register-based packet injection and extraction. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 11 +++++ drivers/net/ethernet/mscc/ocelot.c | 52 ++++++++++++++++++++++ drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 ++ include/soc/mscc/ocelot.h | 9 ++++ 4 files changed, 76 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index e554699f06d4..8d31ff18c5c7 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -528,7 +528,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) * so we need to be careful that there are no extra frames to be * dequeued over MMIO, since we would never know to discard them. */ + ocelot_lock_xtr_grp_bh(ocelot, 0); ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp_bh(ocelot, 0); return 0; } @@ -1518,6 +1520,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) int port = xmit_work->dp->index; int retries = 10; + ocelot_lock_inj_grp(ocelot, 0); + do { if (ocelot_can_inject(ocelot, 0)) break; @@ -1526,6 +1530,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) } while (--retries); if (!retries) { + ocelot_unlock_inj_grp(ocelot, 0); dev_err(ocelot->dev, "port %d failed to inject skb\n", port); ocelot_port_purge_txtstamp_skb(ocelot, port, skb); @@ -1535,6 +1540,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + ocelot_unlock_inj_grp(ocelot, 0); + consume_skb(skb); kfree(xmit_work); } @@ -1694,6 +1701,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) if (!felix->info->quirk_no_xtr_irq) return false; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; unsigned int type; @@ -1730,6 +1739,8 @@ out: ocelot_drain_cpu_queue(ocelot, 0); } + ocelot_unlock_xtr_grp(ocelot, grp); + return true; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9301716e21d5..f4e027a6fe95 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1099,6 +1099,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, } EXPORT_SYMBOL(ocelot_ptp_rx_timestamp); +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp); + +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp); + +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp); + +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp); + +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __acquires(&ocelot->xtr_lock) +{ + spin_lock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh); + +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __releases(&ocelot->xtr_lock) +{ + spin_unlock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh); + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) { u64 timestamp, src_port, len; @@ -1109,6 +1151,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) u32 val, *buf; int err; + lockdep_assert_held(&ocelot->xtr_lock); + err = ocelot_xtr_poll_xfh(ocelot, grp, xfh); if (err) return err; @@ -1184,6 +1228,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) { u32 val = ocelot_read(ocelot, QS_INJ_STATUS); + lockdep_assert_held(&ocelot->inj_lock); + if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp)))) return false; if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))) @@ -1236,6 +1282,8 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; + lockdep_assert_held(&ocelot->inj_lock); + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); @@ -1272,6 +1320,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { + lockdep_assert_held(&ocelot->xtr_lock); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) ocelot_read_rix(ocelot, QS_XTR_RD, grp); } @@ -2954,6 +3004,8 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->fwd_domain_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); + spin_lock_init(&ocelot->inj_lock); + spin_lock_init(&ocelot->xtr_lock); ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0); if (!ocelot->owq) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 993212c3a7da..c09dd2e3343c 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) struct ocelot *ocelot = arg; int grp = 0, err; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; @@ -69,6 +71,8 @@ out: if (err < 0) ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp(ocelot, grp); + return IRQ_HANDLED; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ed18e6bafc8d..462c653e1017 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -813,6 +813,9 @@ struct ocelot { const u32 *const *map; struct list_head stats_regions; + spinlock_t inj_lock; + spinlock_t xtr_lock; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; @@ -966,6 +969,12 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); /* Packet I/O */ +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); From 93e4649efa964201c73b0a03c35c04a0d6fc809f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:05 +0300 Subject: [PATCH 1107/1523] net: dsa: provide a software untagging function on RX for VLAN-aware bridges Through code analysis, I realized that the ds->untag_bridge_pvid logic is contradictory - see the newly added FIXME above the kernel-doc for dsa_software_untag_vlan_unaware_bridge(). Moreover, for the Felix driver, I need something very similar, but which is actually _not_ contradictory: untag the bridge PVID on RX, but for VLAN-aware bridges. The existing logic does it for VLAN-unaware bridges. Since I don't want to change the functionality of drivers which were supposedly properly tested with the ds->untag_bridge_pvid flag, I have introduced a new one: ds->untag_vlan_aware_bridge_pvid, and I have refactored the DSA reception code into a common path for both flags. TODO: both flags should be unified under a single ds->software_vlan_untag, which users of both current flags should set. This is not something that can be carried out right away. It needs very careful examination of all drivers which make use of this functionality, since some of them actually get this wrong in the first place. For example, commit 9130c2d30c17 ("net: dsa: microchip: ksz8795: Use software untagging on CPU port") uses this in a driver which has ds->configure_vlan_while_not_filtering = true. The latter mechanism has been known for many years to be broken by design: https://lore.kernel.org/netdev/CABumfLzJmXDN_W-8Z=p9KyKUVi_HhS7o_poBkeKHS2BkAiyYpw@mail.gmail.com/ and we have the situation of 2 bugs canceling each other. There is no private VLAN, and the port follows the PVID of the VLAN-unaware bridge. So, it's kinda ok for that driver to use the ds->untag_bridge_pvid mechanism, in a broken way. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 16 ++++-- net/dsa/tag.c | 5 +- net/dsa/tag.h | 141 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 121 insertions(+), 41 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index b06f97ae3da1..d7a6c2930277 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -403,14 +403,18 @@ struct dsa_switch { */ u32 configure_vlan_while_not_filtering:1; - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the - * bridge's default_pvid VLAN tagged frames to offer a consistent - * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge - * device. + /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. + * DEPRECATED: Do NOT set this field in new drivers. Instead look at + * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; + /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. + * Useful if the switch cannot preserve the VLAN tag as seen on the + * wire for user port ingress, and chooses to send all frames as + * VLAN-tagged to the CPU, including those which were originally + * untagged. + */ + u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 6e402d49afd3..79ad105902d9 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -105,8 +105,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, p = netdev_priv(skb->dev); - if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { - nskb = dsa_untag_bridge_pvid(skb); + if (unlikely(cpu_dp->ds->untag_bridge_pvid || + cpu_dp->ds->untag_vlan_aware_bridge_pvid)) { + nskb = dsa_software_vlan_untag(skb); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/tag.h b/net/dsa/tag.h index f6b9c73718df..d5707870906b 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -44,46 +44,81 @@ static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, return NULL; } -/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged - * frames as untagged, since the bridge will not untag them. +/** + * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge can process tagged packets. Software like STP/PTP may not. The + * bridge can also process untagged packets, to the same effect as if they were + * tagged with the PVID of the ingress port. So packets tagged with the PVID of + * the bridge port must be software-untagged, to support both use cases. */ -static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) { - struct dsa_port *dp = dsa_user_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *dev = skb->dev; - struct net_device *upper_dev; - u16 vid, pvid, proto; + u16 pvid, proto; int err; - if (!br || br_vlan_enabled(br)) - return skb; - err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { - skb = skb_vlan_untag(skb); - if (!skb) - return NULL; - } - - if (!skb_vlan_tag_present(skb)) - return skb; - - vid = skb_vlan_tag_get_id(skb); - - /* We already run under an RCU read-side critical section since - * we are called from netif_receive_skb_list_internal(). - */ - err = br_vlan_get_pvid_rcu(dev, &pvid); + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); if (err) - return skb; + return; - if (vid != pvid) - return skb; + if (vid == pvid && skb->vlan_proto == htons(proto)) + __vlan_hwaccel_clear_tag(skb); +} + +/** + * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run + * on the plain port, or on a VLAN upper interface). Maybe packets are coming + * to software as tagged with a driver-defined VID which is NOT equal to the + * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration + * should NOT be committed to hardware). DSA needs a method for this private + * VID to be communicated by software to it, and if packets are tagged with it, + * software-untag them. Note: the private VID may be different per bridge, to + * support the FDB isolation use case. + * + * FIXME: this is currently implemented based on the broken assumption that + * the "private VID" used by the driver in VLAN-unaware mode is equal to the + * bridge PVID. It should not be, except for a coincidence; the bridge PVID is + * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that + * this function removes is wrong. + * + * All users of ds->untag_bridge_pvid should fix their drivers, if necessary, + * to make the two independent. Only then, if there still remains a need to + * strip the private VID from packets, then a new ds->ops->get_private_vid() + * API shall be introduced to communicate to DSA what this VID is, which needs + * to be stripped here. + */ +static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) +{ + struct net_device *upper_dev; + u16 pvid, proto; + int err; + + err = br_vlan_get_proto(br, &proto); + if (err) + return; + + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; + + if (vid != pvid || skb->vlan_proto != htons(proto)) + return; /* The sad part about attempting to untag from DSA is that we * don't know, unless we check, if the skb will end up in @@ -95,10 +130,50 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * definitely keep the tag, to make sure it keeps working. */ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); - if (upper_dev) + if (!upper_dev) + __vlan_hwaccel_clear_tag(skb); +} + +/** + * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path + * @skb: Pointer to socket buffer (packet) + * + * Receive path method for switches which cannot avoid tagging all packets + * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or + * ds->untag_vlan_aware_bridge_pvid is set to true. + * + * As a side effect of this method, any VLAN tag from the skb head is moved + * to hwaccel. + */ +static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_user_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u16 vid; + + /* software untagging for standalone ports not yet necessary */ + if (!br) return skb; - __vlan_hwaccel_clear_tag(skb); + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + if (br_vlan_enabled(br)) { + if (dp->ds->untag_vlan_aware_bridge_pvid) + dsa_software_untag_vlan_aware_bridge(skb, br, vid); + } else { + if (dp->ds->untag_bridge_pvid) + dsa_software_untag_vlan_unaware_bridge(skb, br, vid); + } return skb; } From f1288fd7293b91442ad7420394c252a252ecaa30 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:06 +0300 Subject: [PATCH 1108/1523] net: dsa: felix: fix VLAN tag loss on CPU reception with ocelot-8021q There is a major design bug with ocelot-8021q, which is that it expects more of the hardware than the hardware can actually do. The short summary of the issue is that when a port is under a VLAN-aware bridge and we use this tagging protocol, VLAN upper interfaces of this port do not see RX traffic. We use VCAP ES0 (egress rewriter) rules towards the tag_8021q CPU port to encapsulate packets with an outer tag, later stripped by software, that depends on the source user port. We do this so that packets can be identified in ocelot_rcv(). To be precise, we create rules with push_outer_tag = OCELOT_ES0_TAG and push_inner_tag = 0. With this configuration, we expect the switch to keep the inner tag configuration as found in the packet (if it was untagged on user port ingress, keep it untagged, otherwise preserve the VLAN tag unmodified as the inner tag towards the tag_8021q CPU port). But this is not what happens. Instead, table "Tagging Combinations" from the user manual suggests that when the ES0 action is "PUSH_OUTER_TAG=1 and PUSH_INNER_TAG=0", there will be "no inner tag". Experimentation further clarifies what this means. It appears that this "inner tag" which is not pushed into the packet on its egress towards the CPU is none other than the classified VLAN. When the ingress user port is standalone or under a VLAN-unaware bridge, the classified VLAN is a discardable quantity: it is a fixed value - the result of ocelot_vlan_unaware_pvid()'s configuration, and actually independent of the VID from any 802.1Q header that may be in the frame. It is actually preferable to discard the "inner tag" in this case. The problem is when the ingress port is under a VLAN-aware bridge. Then, the classified VLAN is taken from the frame's 802.1Q header, with a fallback on the bridge port's PVID. It would be very good to not discard the "inner tag" here, because if we do, we break communication with any 8021q VLAN uppers that the port might have. These have a processing path outside the bridge. There seems to be nothing else we can do except to change the configuration for VCAP ES0 rules, to actually push the inner VLAN into the frame. There are 2 options for that, first is to push a fixed value specified in the rule, and second is to push a fixed value, plus (aka arithmetic +) the classified VLAN. We choose the second option, and we select that fixed value as 0. Thus, what is pushed in the inner tag is just the classified VLAN. From there, we need to perform software untagging, in the receive path, of stuff that was untagged on the wire. Fixes: 7c83a7c539ab ("net: dsa: add a second tagger for Ocelot switches based on tag_8021q") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 115 +++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 8d31ff18c5c7..4a705f7333f4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -61,11 +61,46 @@ static int felix_cpu_port_for_conduit(struct dsa_switch *ds, return cpu_dp->index; } +/** + * felix_update_tag_8021q_rx_rule - Update VCAP ES0 tag_8021q rule after + * vlan_filtering change + * @outer_tagging_rule: Pointer to VCAP filter on which the update is performed + * @vlan_filtering: Current bridge VLAN filtering setting + * + * Source port identification for tag_8021q is done using VCAP ES0 rules on the + * CPU port(s). The ES0 tag B (inner tag from the packet) can be configured as + * either: + * - push_inner_tag=0: the inner tag is never pushed into the frame + * (and we lose info about the classified VLAN). This is + * good when the classified VLAN is a discardable quantity + * for the software RX path: it is either set to + * OCELOT_STANDALONE_PVID, or to + * ocelot_vlan_unaware_pvid(bridge). + * - push_inner_tag=1: the inner tag is always pushed. This is good when the + * classified VLAN is not a discardable quantity (the port + * is under a VLAN-aware bridge, and software needs to + * continue processing the packet in the same VLAN as the + * hardware). + * The point is that what is good for a VLAN-unaware port is not good for a + * VLAN-aware port, and vice versa. Thus, the RX tagging rules must be kept in + * sync with the VLAN filtering state of the port. + */ +static void +felix_update_tag_8021q_rx_rule(struct ocelot_vcap_filter *outer_tagging_rule, + bool vlan_filtering) +{ + if (vlan_filtering) + outer_tagging_rule->action.push_inner_tag = OCELOT_ES0_TAG; + else + outer_tagging_rule->action.push_inner_tag = OCELOT_NO_ES0_TAG; +} + /* Set up VCAP ES0 rules for pushing a tag_8021q VLAN towards the CPU such that * the tagger can perform RX source port identification. */ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, - int upstream, u16 vid) + int upstream, u16 vid, + bool vlan_filtering) { struct ocelot_vcap_filter *outer_tagging_rule; struct ocelot *ocelot = ds->priv; @@ -96,6 +131,14 @@ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD; outer_tagging_rule->action.tag_a_vid_sel = 1; outer_tagging_rule->action.vid_a_val = vid; + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + outer_tagging_rule->action.tag_b_tpid_sel = OCELOT_TAG_TPID_SEL_8021Q; + /* Leave TAG_B_VID_SEL at 0 (Classified VID + VID_B_VAL). Since we also + * leave VID_B_VAL at 0, this makes ES0 tag B (the inner tag) equal to + * the classified VID, which we need to see in the DSA tagger's receive + * path. Note: the inner tag is only visible in the packet when pushed + * (push_inner_tag == OCELOT_ES0_TAG). + */ err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL); if (err) @@ -227,6 +270,7 @@ static int felix_tag_8021q_vlan_del_tx(struct dsa_switch *ds, int port, u16 vid) static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, u16 flags) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; @@ -234,11 +278,12 @@ static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, * membership, which we aren't. So we don't need to add any VCAP filter * for the CPU port. */ - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); if (err) return err; } @@ -258,10 +303,11 @@ add_tx_failed: static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { @@ -278,11 +324,41 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) del_tx_failed: dsa_switch_for_each_cpu_port(cpu_dp, ds) - felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); return err; } +static int felix_update_tag_8021q_rx_rules(struct dsa_switch *ds, int port, + bool vlan_filtering) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = ds->priv; + struct dsa_port *cpu_dp; + unsigned long cookie; + int err; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + cookie = OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, + cpu_dp->index); + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + cookie, false); + + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + + err = ocelot_vcap_filter_replace(ocelot, outer_tagging_rule); + if (err) + return err; + } + + return 0; +} + static int felix_trap_get_cpu_port(struct dsa_switch *ds, const struct ocelot_vcap_filter *trap) { @@ -532,6 +608,16 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) ocelot_drain_cpu_queue(ocelot, 0); ocelot_unlock_xtr_grp_bh(ocelot, 0); + /* Problem: when using push_inner_tag=1 for ES0 tag B, we lose info + * about whether the received packets were VLAN-tagged on the wire, + * since they are always tagged on egress towards the CPU port. + * + * Since using push_inner_tag=1 is unavoidable for VLAN-aware bridges, + * we must work around the fallout by untagging in software to make + * untagged reception work more or less as expected. + */ + ds->untag_vlan_aware_bridge_pvid = true; + return 0; } @@ -556,6 +642,8 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds) ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index); dsa_tag_8021q_unregister(ds); + + ds->untag_vlan_aware_bridge_pvid = false; } static unsigned long felix_tag_8021q_get_host_fwd_mask(struct dsa_switch *ds) @@ -1010,8 +1098,23 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; + bool using_tag_8021q; + struct felix *felix; + int err; - return ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + err = ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + if (err) + return err; + + felix = ocelot_to_felix(ocelot); + using_tag_8021q = felix->tag_proto == DSA_TAG_PROTO_OCELOT_8021Q; + if (using_tag_8021q) { + err = felix_update_tag_8021q_rx_rules(ds, port, enabled); + if (err) + return err; + } + + return 0; } static int felix_vlan_add(struct dsa_switch *ds, int port, From 36dd1141be70b5966906919714dc504a24c65ddf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:07 +0300 Subject: [PATCH 1109/1523] net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0] and realized that not only is its basic VLAN classification pipeline improper for offloading vlan_protocol 802.1ad bridges, but also improper for offloading regular 802.1Q bridges already. Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by bridged ports, but this switch treats it as if it was 802.1Q-tagged with the same VID as in the 802.1ad header. This is markedly different to what the Linux bridge expects; see the "other_tpid()" function in tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh. An idea came to me that the VCAP IS1 TCAM is more powerful than I'm giving it credit for, and that it actually overwrites the classified VID before the VLAN Table lookup takes place. In other words, it can be used even to save a packet from being dropped on ingress due to VLAN membership. Add a sophisticated TCAM rule hardcoded into the driver to force the switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol 802.1Q. Regarding the lifetime of the filter: eventually the bridge will disappear, and vlan_filtering on the port will be restored to 0 for standalone mode. Then the filter will be deleted. [0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/ Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 188 ++++++++++++++++++++++-- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 + include/soc/mscc/ocelot_vcap.h | 2 + 3 files changed, 180 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f4e027a6fe95..3d72aa7b1305 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -2607,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a3..5734b86aed5b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { From b9b6ee6fe258ce4d89592593efcd3d798c418859 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Aug 2024 16:25:19 +0200 Subject: [PATCH 1110/1523] thermal: gov_bang_bang: Call __thermal_cdev_update() directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of clearing the "updated" flag for each cooling device affected by the trip point crossing in bang_bang_control() and walking all thermal instances to run thermal_cdev_update() for all of the affected cooling devices, call __thermal_cdev_update() directly for each of them. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Kästle Reviewed-by: Zhang Rui Cc: 6.10+ # 6.10+ Link: https://patch.msgid.link/13583081.uLZWGnKmhe@rjwysocki.net --- drivers/thermal/gov_bang_bang.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 4a2e869b9538..b9474c6af72b 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -71,12 +71,9 @@ static void bang_bang_control(struct thermal_zone_device *tz, dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ + __thermal_cdev_update(instance->cdev); mutex_unlock(&instance->cdev->lock); } - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) - thermal_cdev_update(instance->cdev); } static struct thermal_governor thermal_gov_bang_bang = { From 84248e35d9b60e03df7276627e4e91fbaf80f73d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Aug 2024 16:26:42 +0200 Subject: [PATCH 1111/1523] thermal: gov_bang_bang: Split bang_bang_control() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the setting of the thermal instance target state from bang_bang_control() into a separate function that will be also called in a different place going forward. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Kästle Reviewed-by: Zhang Rui Cc: 6.10+ # 6.10+ Link: https://patch.msgid.link/3313587.aeNJFYEL58@rjwysocki.net --- drivers/thermal/gov_bang_bang.c | 42 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index b9474c6af72b..87cff3ea77a9 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,6 +13,27 @@ #include "thermal_core.h" +static void bang_bang_set_instance_target(struct thermal_instance *instance, + unsigned int target) +{ + if (instance->target != 0 && instance->target != 1 && + instance->target != THERMAL_NO_TARGET) + pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", + instance->target, instance->name); + + /* + * Enable the fan when the trip is crossed on the way up and disable it + * when the trip is crossed on the way down. + */ + instance->target = target; + + dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + + mutex_lock(&instance->cdev->lock); + __thermal_cdev_update(instance->cdev); + mutex_unlock(&instance->cdev->lock); +} + /** * bang_bang_control - controls devices associated with the given zone * @tz: thermal_zone_device @@ -54,25 +75,8 @@ static void bang_bang_control(struct thermal_zone_device *tz, tz->temperature, trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; - - if (instance->target != 0 && instance->target != 1 && - instance->target != THERMAL_NO_TARGET) - pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", - instance->target, instance->name); - - /* - * Enable the fan when the trip is crossed on the way up and - * disable it when the trip is crossed on the way down. - */ - instance->target = crossed_up; - - dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); - - mutex_lock(&instance->cdev->lock); - __thermal_cdev_update(instance->cdev); - mutex_unlock(&instance->cdev->lock); + if (instance->trip == trip) + bang_bang_set_instance_target(instance, crossed_up); } } From 5f64b4a1ab1b0412446d42e1fc2964c2cdb60b27 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Aug 2024 16:27:33 +0200 Subject: [PATCH 1112/1523] thermal: gov_bang_bang: Add .manage() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After recent changes, the Bang-bang governor may not adjust the initial configuration of cooling devices to the actual situation. Namely, if a cooling device bound to a certain trip point starts in the "on" state and the thermal zone temperature is below the threshold of that trip point, the trip point may never be crossed on the way up in which case the state of the cooling device will never be adjusted because the thermal core will never invoke the governor's .trip_crossed() callback. [Note that there is no issue if the zone temperature is at the trip threshold or above it to start with because .trip_crossed() will be invoked then to indicate the start of thermal mitigation for the given trip.] To address this, add a .manage() callback to the Bang-bang governor and use it to ensure that all of the thermal instances managed by the governor have been initialized properly and the states of all of the cooling devices involved have been adjusted to the current zone temperature as appropriate. Fixes: 530c932bdf75 ("thermal: gov_bang_bang: Use .trip_crossed() instead of .throttle()") Link: https://lore.kernel.org/linux-pm/1bfbbae5-42b0-4c7d-9544-e98855715294@piie.net/ Cc: 6.10+ # 6.10+ Signed-off-by: Rafael J. Wysocki Acked-by: Peter Kästle Reviewed-by: Zhang Rui Link: https://patch.msgid.link/8419356.T7Z3S40VBb@rjwysocki.net --- drivers/thermal/gov_bang_bang.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 87cff3ea77a9..bc55e0698bfa 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -26,6 +26,7 @@ static void bang_bang_set_instance_target(struct thermal_instance *instance, * when the trip is crossed on the way down. */ instance->target = target; + instance->initialized = true; dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); @@ -80,8 +81,37 @@ static void bang_bang_control(struct thermal_zone_device *tz, } } +static void bang_bang_manage(struct thermal_zone_device *tz) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; + + if (tz->temperature >= td->threshold || + trip->temperature == THERMAL_TEMP_INVALID || + trip->type == THERMAL_TRIP_CRITICAL || + trip->type == THERMAL_TRIP_HOT) + continue; + + /* + * If the initial cooling device state is "on", but the zone + * temperature is not above the trip point, the core will not + * call bang_bang_control() until the zone temperature reaches + * the trip point temperature which may be never. In those + * cases, set the initial state of the cooling device to 0. + */ + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (!instance->initialized && instance->trip == trip) + bang_bang_set_instance_target(instance, 0); + } + } +} + static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, + .manage = bang_bang_manage, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); From 6e6f58a170ea98e44075b761f2da42a5aec47dfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Aug 2024 16:29:11 +0200 Subject: [PATCH 1113/1523] thermal: gov_bang_bang: Use governor_data to reduce overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After running once, the for_each_trip_desc() loop in bang_bang_manage() is pure needless overhead because it is not going to make any changes unless a new cooling device has been bound to one of the trips in the thermal zone or the system is resuming from sleep. For this reason, make bang_bang_manage() set governor_data for the thermal zone and check it upfront to decide whether or not it needs to do anything. However, governor_data needs to be reset in some cases to let bang_bang_manage() know that it should walk the trips again, so add an .update_tz() callback to the governor and make the core additionally invoke it during system resume. To avoid affecting the other users of that callback unnecessarily, add a special notification reason for system resume, THERMAL_TZ_RESUME, and also pass it to __thermal_zone_device_update() called during system resume for consistency. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Kästle Reviewed-by: Zhang Rui Cc: 6.10+ # 6.10+ Link: https://patch.msgid.link/2285575.iZASKD2KPV@rjwysocki.net --- drivers/thermal/gov_bang_bang.c | 18 ++++++++++++++++++ drivers/thermal/thermal_core.c | 3 ++- include/linux/thermal.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index bc55e0698bfa..daed67d19efb 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -86,6 +86,10 @@ static void bang_bang_manage(struct thermal_zone_device *tz) const struct thermal_trip_desc *td; struct thermal_instance *instance; + /* If the code below has run already, nothing needs to be done. */ + if (tz->governor_data) + return; + for_each_trip_desc(tz, td) { const struct thermal_trip *trip = &td->trip; @@ -107,11 +111,25 @@ static void bang_bang_manage(struct thermal_zone_device *tz) bang_bang_set_instance_target(instance, 0); } } + + tz->governor_data = (void *)true; +} + +static void bang_bang_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + /* + * Let bang_bang_manage() know that it needs to walk trips after binding + * a new cdev and after system resume. + */ + if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME) + tz->governor_data = NULL; } static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, .manage = bang_bang_manage, + .update_tz = bang_bang_update_tz, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 95c399f94744..e6669aeda1ff 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1728,7 +1728,8 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_debug_tz_resume(tz); thermal_zone_device_init(tz); - __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + thermal_governor_update_tz(tz, THERMAL_TZ_RESUME); + __thermal_zone_device_update(tz, THERMAL_TZ_RESUME); complete(&tz->resume); tz->resuming = false; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474..b86ddca46b9e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** From 145082ebfcf08f4fd254c467abf4aa58b4d38505 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 12 Aug 2024 14:17:21 +0200 Subject: [PATCH 1114/1523] Documentation/llvm: turn make command for ccache into code block The command provided to use ccache with clang is not a literal code block. Once built, the documentation displays the '' symbols as a " character, which is wrong, and the command can not be applied as provided. Turn the command into a literal code block. Signed-off-by: Javier Carrasco Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index bb5c44f8bd1c..6dc66b4f31a7 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -126,7 +126,7 @@ Ccache ``ccache`` can be used with ``clang`` to improve subsequent builds, (though KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds -in order to avoid 100% cache misses, see Reproducible_builds_ for more info): +in order to avoid 100% cache misses, see Reproducible_builds_ for more info):: KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang" From 2e5d47fe7839298fa096970e184aac9bf82c3bd3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:39 -0700 Subject: [PATCH 1115/1523] drm/xe/uc: Use managed bo for HuC and GSC objects Drmm actions are not the right ones to clean up BOs and we should use devm instead. However, we can also instead just allocate the objects using the managed_bo function, which will internally register the correct cleanup call and therefore allows us to simplify the code. While at it, switch to drmm_kzalloc for the GSC proxy allocation to further simplify the cleanup. Cc: John Harrison Cc: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 12 +++-------- drivers/gpu/drm/xe/xe_gsc_proxy.c | 36 ++++++------------------------- drivers/gpu/drm/xe/xe_huc.c | 19 +++++----------- 3 files changed, 14 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 77ce44e845c5..8a9b3c50a588 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -450,11 +450,6 @@ static void free_resources(struct drm_device *drm, void *arg) xe_exec_queue_put(gsc->q); gsc->q = NULL; } - - if (gsc->private) { - xe_bo_unpin_map_no_vm(gsc->private); - gsc->private = NULL; - } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) @@ -474,10 +469,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) if (!hwe) return -ENODEV; - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 28e6a7a1d282..2d6ea8c01445 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -371,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = { .unbind = xe_gsc_proxy_component_unbind, }; -static void proxy_channel_free(struct drm_device *drm, void *arg) -{ - struct xe_gsc *gsc = arg; - - if (!gsc->proxy.bo) - return; - - if (gsc->proxy.to_csme) { - kfree(gsc->proxy.to_csme); - gsc->proxy.to_csme = NULL; - gsc->proxy.from_csme = NULL; - } - - if (gsc->proxy.bo) { - iosys_map_clear(&gsc->proxy.to_gsc); - iosys_map_clear(&gsc->proxy.from_gsc); - xe_bo_unpin_map_no_vm(gsc->proxy.bo); - gsc->proxy.bo = NULL; - } -} - static int proxy_channel_alloc(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -400,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_bo *bo; void *csme; - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) return -ENOMEM; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - kfree(csme); + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) return PTR_ERR(bo); - } gsc->proxy.bo = bo; gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); @@ -419,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index bec4366e5513..f5459f97af23 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } -static void free_gsc_pkt(struct drm_device *drm, void *arg) -{ - struct xe_huc *huc = arg; - - xe_bo_unpin_map_no_vm(huc->gsc_pkt); - huc->gsc_pkt = NULL; -} - #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K static int huc_alloc_gsc_pkt(struct xe_huc *huc) { @@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_bo *bo; /* we use a single object for both input and output */ - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, - PXP43_HUC_AUTH_INOUT_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), + PXP43_HUC_AUTH_INOUT_SIZE * 2, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); huc->gsc_pkt = bo; - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + return 0; } int xe_huc_init(struct xe_huc *huc) From 5a891a0e69f134f53cc91b409f38e5ea1cafaf0a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:40 -0700 Subject: [PATCH 1116/1523] drm/xe/uc: Use devm to register cleanup that includes exec_queues Exec_queue cleanup requires HW access, so we need to use devm instead of drmm for it. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 8a9b3c50a588..8a137cb83318 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -495,7 +495,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index da63be550d4d..be5b9d8cfa5f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); From db3461a7743817ad7c73553902231b096616813a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 21:02:08 -0700 Subject: [PATCH 1117/1523] drm/xe: Use for_each_remote_tile rather than manual check Replace for_each_tile plus a check against primary tile with for_each_remote_tile in tiles_fini. The latter macro does this for us. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240816040208.62695-1-matthew.brost@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f5bdb540e823..3fd462fda625 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -29,9 +29,8 @@ static void tiles_fini(void *arg) struct xe_tile *tile; int id; - for_each_tile(tile, xe, id) - if (tile != xe_device_get_root_tile(xe)) - tile->mmio.regs = NULL; + for_each_remote_tile(tile, xe, id) + tile->mmio.regs = NULL; } /* From c2f6e16a6771eaefba6bb35f6803fe7217822d41 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 15 Aug 2024 13:02:55 -0400 Subject: [PATCH 1118/1523] bcachefs: Increase size of cuckoo hash table on too many rehashes Also, improve the calculation of the new table size, so that it can shrink when needed. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d..f70eb2127d32 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; From 075cabf324c3fd790d6ba39ff9db33a30b954fe2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:31:29 -0400 Subject: [PATCH 1119/1523] bcachefs: Fix forgetting to pass trans to fsck_err() Reported-by: syzbot+e3938cd6d761b78750e6@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b69ef4b3de6e..ad517ef744e5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -925,7 +925,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", From 9482f3b05332a624508a91c2ab2cf3527328a6a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:41:46 -0400 Subject: [PATCH 1120/1523] bcachefs: avoid overflowing LRU_TIME_BITS for cached data lru Reported-by: syzbot+510b0b28f8e6de64d307@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 260e7fa83d05..fd790b03fbe1 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -150,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ From 99c87fe0f584f8d778a323141504d1ba5c89a4a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:44:49 -0400 Subject: [PATCH 1121/1523] bcachefs: fix incorrect i_state usage Reported-by: syzbot+95e40eae71609e40d851@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 15fc41e63b6c..94c392abef65 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new From 437ad4534a15ccf538ddb440cb29d3f445251605 Mon Sep 17 00:00:00 2001 From: Jesus Narvaez Date: Thu, 8 Aug 2024 13:49:43 -0700 Subject: [PATCH 1122/1523] drm/i915/guc: Change GEM_WARN_ON to guc_err to prevent taints in CI This warning was supposed to catch a harmless issue, but changing to guc_error should prevent kernel taints in CI runs. Signed-off-by: Jesus Narvaez Reviewed-by: Jonathan Cavitt Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240808204943.911727-1-jesus.narvaez@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9400d0eb682b..c3a5d9e1288e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2014,11 +2014,12 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) /* * Technically possible for either of these values to be non-zero here, - * but very unlikely + harmless. Regardless let's add a warn so we can + * but very unlikely + harmless. Regardless let's add an error so we can * see in CI if this happens frequently / a precursor to taking down the * machine. */ - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + if (atomic_read(&guc->outstanding_submission_g2h)) + guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); From 8a0f58ec4728ec04c654645bf8ed4070821ebbf3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 15 Aug 2024 10:26:03 -0700 Subject: [PATCH 1123/1523] drm/xe: Add debugfs to dump GuC's hwconfig Although the query uapi is the official way to get at the GuC's hwconfig table contents, it's still useful to have a quick debugfs interface to dump the table in a human-readable format while debugging the driver. Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240815172602.2729146-3-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 ++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 57 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.h | 2 + 3 files changed, 70 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5125d76ccfac..8f95d3a5949b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -19,6 +19,7 @@ #include "xe_gt_sriov_vf_debugfs.h" #include "xe_gt_stats.h" #include "xe_gt_topology.h" +#include "xe_guc_hwconfig.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -270,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int hwconfig(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_guc_hwconfig_dump(>->uc.guc, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, @@ -288,6 +298,7 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index d9b570a154a2..025bad701556 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -6,6 +6,7 @@ #include "xe_guc_hwconfig.h" #include +#include #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -103,3 +104,59 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); } + +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) +{ + size_t size = xe_guc_hwconfig_size(guc); + u32 *hwconfig; + u64 num_dw; + u32 extra_bytes; + int i = 0; + + if (size == 0) { + drm_printf(p, "No hwconfig available\n"); + return; + } + + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) { + drm_printf(p, "Error: could not allocate hwconfig memory\n"); + return; + } + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 attribute = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (i + len_dw > num_dw) { + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", + attribute, len_dw, num_dw - i); + len_dw = num_dw - i; + } + + /* + * If it's a single dword (as most hwconfig attributes are), + * then it's probably a number that makes sense to display + * in decimal form. In the rare cases where it's more than + * one dword, just print it in hex form and let the user + * figure out how to interpret it. + */ + if (len_dw == 1) + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); + else + drm_printf(p, "[%2u] = { %*ph }\n", attribute, + (int)(len_dw * sizeof(u32)), &hwconfig[i]); + i += len_dw; + } + + if (i < num_dw || extra_bytes) + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", + (num_dw - i) * sizeof(u32) + extra_bytes); + + kfree(hwconfig); +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index b5794d641900..7df315900e1c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -8,10 +8,12 @@ #include +struct drm_printer; struct xe_guc; int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); #endif From fc7c7498db3da52efe874029a9c2ccc0ad2646ee Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 15 Aug 2024 10:26:04 -0700 Subject: [PATCH 1124/1523] drm/xe/mcr: Try to derive dss_per_grp from hwconfig attributes When steering MCR register ranges of type "DSS," the group_id and instance_id values are calculated by dividing the DSS pool according to the size of a gslice or cslice, depending on the platform. These values haven't changed much on past platforms, so we've been able to hardcode the proper divisor so far. However the layout may not be so fixed on future platforms so the proper, future-proof way to determine this is by using some of the attributes from the GuC's hwconfig table. The hwconfig has two attributes reflecting the architectural maximum slice and subslice counts (i.e., before any fusing is considered) that can be used for the purposes of calculating MCR steering targets. If the hwconfig is lacking the necessary values (which should only be possible on older platforms before these attributes were added), we can still fall back to the old hardcoded values. Going forward the hwconfig is expected to always provide the information we need on newer platforms, and any failure to do so will be considered a bug in the firmware that will prevent us from switching to the buggy firmware release. It's worth noting that over time GuC's hwconfig has provided a couple different keys with similar-sounding descriptions. For our purposes here, we only trust the newer key "70" which has supplanted the similarly-named key "2" that existed on older platforms. Bspec: 73210 Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240815172602.2729146-4-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_gt_mcr.c | 40 +++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_gt_types.h | 6 +++++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 40 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.h | 1 + 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 6d948a469126..7d7bd0be6233 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -8,8 +8,10 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" +#include "xe_guc_hwconfig.h" #include "xe_mmio.h" #include "xe_sriov.h" @@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { + struct xe_guc *guc = >->uc.guc; + u32 max_slices = 0, max_subslices = 0; + int ret; + + /* + * Try to query the GuC's hwconfig table for the maximum number of + * slices and subslices. These don't reflect the platform's actual + * slice/DSS counts, just the physical layout by which we should + * determine the steering targets. On older platforms with older GuC + * firmware releases it's possible that these attributes may not be + * included in the table, so we can always fall back to the old + * hardcoded layouts. + */ +#define HWCONFIG_ATTR_MAX_SLICES 1 +#define HWCONFIG_ATTR_MAX_SUBSLICES 70 + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, + &max_slices); + if (ret < 0 || max_slices == 0) + goto fallback; + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, + &max_subslices); + if (ret < 0 || max_subslices == 0) + goto fallback; + + return DIV_ROUND_UP(max_subslices, max_slices); + +fallback: + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt) */ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - int dss_per_grp = dss_per_group(gt); - xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); - *group = dss / dss_per_grp; - *instance = dss % dss_per_grp; + *group = dss / gt->steering_dss_per_grp; + *instance = dss % gt->steering_dss_per_grp; } static void init_steering_dss(struct xe_gt *gt) { + gt->steering_dss_per_grp = dss_per_group(gt); + xe_gt_mcr_get_dss_steering(gt, min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8e46c0969bc7..82cf9f631736 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -376,6 +376,12 @@ struct xe_gt { u16 instance_target; } steering[NUM_STEERING_TYPES]; + /** + * @steering_dss_per_grp: number of DSS per steering group (gslice, + * cslice, etc.). + */ + unsigned int steering_dss_per_grp; + /** * @mcr_lock: protects the MCR_SELECTOR register for the duration * of a steered operation diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 025bad701556..af2c817d552c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -160,3 +160,43 @@ void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) kfree(hwconfig); } + +/* + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. + */ +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) +{ + size_t size = xe_guc_hwconfig_size(guc); + u64 num_dw = div_u64(size, sizeof(u32)); + u32 *hwconfig; + bool found = false; + int i = 0; + + if (num_dw == 0) + return -EINVAL; + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 key = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (key != attribute) { + i += len_dw; + continue; + } + + *val = hwconfig[i]; + found = true; + break; + } + + kfree(hwconfig); + + return found ? 0 : -ENOENT; +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index 7df315900e1c..ab4e5038236e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -15,5 +15,6 @@ int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); #endif From f9e491c8633277d5398f384cf1fd1a477e04363f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 14:09:06 +0530 Subject: [PATCH 1125/1523] drm/amdgpu: add cp queue registers for gfx9_4_3 ipdump Add gfx9 support of CP queue registers for all queues to be used by devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 98 ++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 59417feac9a5..5af4abca759d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -151,6 +151,47 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) }; +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -976,7 +1017,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) { uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); - uint32_t *ptr, num_xcc; + uint32_t *ptr, num_xcc, inst; num_xcc = NUM_XCC(adev->gfx.xcc_mask); @@ -987,6 +1028,19 @@ static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) } else { adev->gfx.ip_dump_core = ptr; } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } } static int gfx_v9_4_3_sw_init(void *handle) @@ -1117,6 +1171,7 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); return 0; } @@ -4329,8 +4384,9 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) static void gfx_v9_4_3_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t i, j, k; + uint32_t num_xcc, reg, num_inst; + uint32_t xcc_id, xcc_offset, inst_offset; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4347,6 +4403,42 @@ static void gfx_v9_4_3_ip_dump(void *handle) GET_INST(GC, xcc_id))); } amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, + GET_INST(GC, xcc_id)); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); + } + inst_offset += reg_count; + } + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); } static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { From 37ee1456239cc5680f672f37417e52db2349965b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 8 Aug 2024 14:23:07 +0530 Subject: [PATCH 1126/1523] drm/amdgpu: add cp queue registers print for gfx9_4_3 Add gfx9_4_3 print support of CP queue registers for all queues to be used by devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 42 +++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5af4abca759d..7b4ae197eb49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4362,8 +4362,9 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + uint32_t i, j, k; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t num_xcc, reg, num_inst; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4379,6 +4380,43 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) gc_reg_list_9_4_3[i].reg_name, adev->gfx.ip_dump_core[xcc_offset + i]); } + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n", + num_xcc, + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, + "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", + xcc_id, i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + } + inst_offset += reg_count; + } + } + } + } } static void gfx_v9_4_3_ip_dump(void *handle) From 406792dc2a5c82e2f312e10c3c2c887de6ef80a4 Mon Sep 17 00:00:00 2001 From: Soham Dandapat Date: Mon, 29 Jul 2024 11:59:11 +0530 Subject: [PATCH 1127/1523] drm/amdgpu: Return earlier in amdgpu_sw_ring_ib_end if mcbp is off As we don't trigger preemption is sw ring muxer when mcbp is disabled,so return earlier in amdgpu_sw_ring_ib_end function if mcbp is disabled ,not required to call amdgpu_ring_mux_end_ib Signed-off-by: Soham Dandapat Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index d234b7ccfaaf..1c66da1c3fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -410,7 +410,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) return; amdgpu_ring_mux_end_ib(mux, ring); } From 57a372f67688dc7aee23a2a00bcaf6188f592934 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Apr 2024 13:11:53 -0400 Subject: [PATCH 1128/1523] drm/amdgpu: add new ring reset callback Use this to reset just a single ring. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 582053f1cd56..c7f15edeb367 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -235,6 +235,7 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); }; struct amdgpu_ring { @@ -334,6 +335,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); From 15789fa0f0e29cf802f30d0e308da9c6b18c116a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 14:38:20 -0400 Subject: [PATCH 1129/1523] drm/amdgpu: add per ring reset support (v5) If a specific job is hung, try and reset just the ring associated with the job. v2: move to amdgpu_job.c v3: fix drm_sched_stop() handling when ring reset fails v4: drop unnecessary amdgpu_fence_driver_clear_job_fences() and drm_sched_increase_karma() v5: rework sched_stop handling Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 908e13455152..c2de3fd17245 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,6 +72,25 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched, true); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); From fb0a5834a338329bc665c7ce2b89f3e376557565 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 12 Jun 2024 15:49:38 +0800 Subject: [PATCH 1130/1523] drm/amdgpu: increase the reset counter for the queue reset Update the reset counter for the amdgpu_cs_query_reset_state() Acked-by: Vitaly Prosyak Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c2de3fd17245..c6a1783fc9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -84,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (!r) { if (amdgpu_ring_sched_ready(ring)) drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); amdgpu_fence_driver_force_completion(ring); if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched, true); From 5fb4d2a77113d3ebaa5c9dcdbef8b7bdfdeeffb2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 17:23:14 -0400 Subject: [PATCH 1131/1523] drm/amdgpu/gfx9: add ring reset callback Add ring reset callback for compute. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 38 +++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ab10a05c7885..b70cdb59c384 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7118,6 +7118,43 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } +static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* reset the ring */ + ring->wptr = 0; + *ring->wptr_cpu_addr = 0; + amdgpu_ring_clear_ring(ring); + + return amdgpu_ring_test_ring(ring); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7364,6 +7401,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, + .reset = gfx_v9_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { From b5e1a3874fe3cde73a4b02870bf3e8fa43777c5c Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Tue, 11 Jun 2024 18:06:44 +0800 Subject: [PATCH 1132/1523] drm/amdgpu/gfx9: remap queue after reset successfully Kiq command unmap_queues only does the dequeueing action. We have to map the queue back with clean mqd. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 ++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b70cdb59c384..cd18c10a290d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3742,7 +3742,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -3754,8 +3754,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3819,7 +3819,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kcq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -7147,11 +7147,29 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - /* reset the ring */ - ring->wptr = 0; - *ring->wptr_cpu_addr = 0; - amdgpu_ring_clear_ring(ring); - + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r){ + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + r = amdgpu_ring_test_ring(kiq_ring); + if (r){ + DRM_ERROR("fail to remap queue\n"); + return r; + } return amdgpu_ring_test_ring(ring); } From fdbd69486b468e4963b4ef9f76901d3788252dd5 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Tue, 2 Jul 2024 09:03:49 +0800 Subject: [PATCH 1133/1523] drm/amdgpu/gfx9: wait for reset done before remap There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cd18c10a290d..f87e6e9c7d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int i, r; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; + /* make sure dequeue is complete*/ + gfx_v9_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_0_unset_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)){ - DRM_ERROR("fail to resv mqd_obj\n"); + dev_err(adev->dev, "fail to resv mqd_obj\n"); return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); @@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); - if (r){ - DRM_ERROR("fail to unresv mqd_obj\n"); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); - if (r){ + if (r) { DRM_ERROR("fail to remap queue\n"); return r; } From 5d0112f77793c0351faee6c723a6fb9191c12be6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 17:24:03 -0400 Subject: [PATCH 1134/1523] drm/amdgpu/gfx9.4.3: add ring reset callback Add ring reset callback for compute. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7b4ae197eb49..f1c73bc1bd95 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3426,6 +3426,43 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* reset the ring */ + ring->wptr = 0; + *ring->wptr_cpu_addr = 0; + amdgpu_ring_clear_ring(ring); + + return amdgpu_ring_test_ring(ring); +} + enum amdgpu_gfx_cp_ras_mem_id { AMDGPU_GFX_CP_MEM1 = 1, AMDGPU_GFX_CP_MEM2, @@ -4536,6 +4573,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, + .reset = gfx_v9_4_3_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { From 6f38589e170f7649bb4b5efbff6b681c31433440 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 14 Jun 2024 13:05:32 +0800 Subject: [PATCH 1135/1523] drm/amdgpu/gfx9.4.3: remap queue after reset successfully Kiq command unmap_queues only does the dequeueing action. We have to map the queue back with clean mqd. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 36 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f1c73bc1bd95..44c6e2d44722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2080,7 +2080,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -2092,8 +2092,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2178,7 +2178,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3455,11 +3455,29 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - /* reset the ring */ - ring->wptr = 0; - *ring->wptr_cpu_addr = 0; - amdgpu_ring_clear_ring(ring); - + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r){ + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + r = amdgpu_ring_test_ring(kiq_ring); + if (r){ + DRM_ERROR("fail to remap queue\n"); + return r; + } return amdgpu_ring_test_ring(ring); } From 4c953e53cc34f8601b7b7c6286c65322452d35fe Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 28 Jun 2024 11:48:22 +0800 Subject: [PATCH 1136/1523] drm/amdgpu/gfx_9.4.3: wait for reset done before remap There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 39 +++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 44c6e2d44722..9a740020243d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3433,7 +3433,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int r, i; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -3455,9 +3455,28 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, if (r) return r; + /* make sure dequeue is complete*/ + gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)){ - DRM_ERROR("fail to resv mqd_obj\n"); + dev_err(adev->dev, "fail to resv mqd_obj\n"); return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); @@ -3467,15 +3486,23 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); - if (r){ - DRM_ERROR("fail to unresv mqd_obj\n"); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + r = amdgpu_ring_test_ring(kiq_ring); - if (r){ - DRM_ERROR("fail to remap queue\n"); + if (r) { + dev_err(adev->dev, "fail to remap queue\n"); return r; } return amdgpu_ring_test_ring(ring); From 186020c16650d6f7a05774ef318ae9056aae4f21 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 12:12:42 +0800 Subject: [PATCH 1137/1523] drm/amdgpu/gfx: add a new kiq_pm4_funcs callback for reset_hw_queue Add reset_hw_queue in kiq_pm4_funcs callbacks. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 86d3fa7eef90..6fe77e483bb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -138,6 +138,10 @@ struct kiq_pm4_funcs { void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, uint16_t pasid, uint32_t flush_type, bool all_hub); + void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring, + uint32_t queue_type, uint32_t me_id, + uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid); /* Packet sizes */ int set_resources_size; int map_queues_size; From 2e9bbdd7b7cb5f364a917e7c6eff2a2c36f11895 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 12:24:31 +0800 Subject: [PATCH 1138/1523] drm/amdgpu/gfx9: implement reset_hw_queue for gfx9 Using mmio to do queue reset. Enter safe mode when writing registers. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f87e6e9c7d6b..02ff70f4b416 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -893,6 +893,8 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) @@ -1004,12 +1006,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } + +static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + gfx_v9_0_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + gfx_v9_0_unset_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, From 4dc4422f11b041a9cb7a86f2f9bc310e48620fa2 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 4 Jul 2024 14:51:58 +0800 Subject: [PATCH 1139/1523] drm/amdgpu/gfx9.4.3: implement reset_hw_queue for gfx9.4.3 Using mmio to do queue reset. Enter safe mode before writing mmio registers. v2: set register instance offset according to xcc id. Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9a740020243d..18cb6d45d54f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -200,6 +200,8 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) @@ -311,12 +313,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } +static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, .kiq_query_status = gfx_v9_4_3_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, From 31ef969301e58daaaed1728690e16192b77b9028 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 10:20:56 -0400 Subject: [PATCH 1140/1523] drm/amdgpu/gfx9: per queue reset only on bare metal It's not supported under SR-IOV at the moment. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 02ff70f4b416..cad13e01dd3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7164,6 +7164,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 18cb6d45d54f..092e229f4097 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3471,6 +3471,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r, i; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; From c4f503551fd65e50e657219c9bfc2987c51805ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 17 Jul 2024 19:02:50 -0400 Subject: [PATCH 1141/1523] drm/amdgpu/gfx9: add ring reset callback for gfx Add ring reset callback for gfx. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cad13e01dd3a..6373c8caaabc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7155,6 +7155,51 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) + return -ENOMEM; + gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v9_0_ring_emit_reg_wait(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); + gfx_v9_0_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + + return amdgpu_ring_test_ring(ring); +} + static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { @@ -7391,6 +7436,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .reset = gfx_v9_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { From 27ef61f9617478c432bc477d4eed8963deb89f24 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 17:59:47 -0400 Subject: [PATCH 1142/1523] drm/amdgpu/gfx9: use proper rlc safe mode helpers Rather than open coding it for the queue reset. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6373c8caaabc..f8f5bb96d486 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1015,7 +1015,7 @@ static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t q unsigned i; /* enter save mode */ - gfx_v9_0_set_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); @@ -1037,7 +1037,7 @@ static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t q soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* exit safe mode */ - gfx_v9_0_unset_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { @@ -7233,7 +7233,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, return r; /* make sure dequeue is complete*/ - gfx_v9_0_set_safe_mode(adev, 0); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); for (i = 0; i < adev->usec_timeout; i++) { @@ -7245,7 +7245,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, r = -ETIMEDOUT; soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - gfx_v9_0_unset_safe_mode(adev, 0); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (r) { dev_err(adev->dev, "fail to wait on hqd deactive\n"); return r; From a48f31fb78265d992c75d45bea215998367d4956 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:04:44 -0400 Subject: [PATCH 1143/1523] drm/amdgpu/gfx9.4.3: use proper rlc safe mode helpers Rather than open coding it for the queue reset. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 092e229f4097..9215666a6318 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -321,7 +321,7 @@ static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t unsigned i; /* enter save mode */ - gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); @@ -343,7 +343,7 @@ static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* exit safe mode */ - gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { @@ -3495,7 +3495,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, return r; /* make sure dequeue is complete*/ - gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); for (i = 0; i < adev->usec_timeout; i++) { @@ -3507,7 +3507,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, r = -ETIMEDOUT; soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); mutex_unlock(&adev->srbm_mutex); - gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); if (r) { dev_err(adev->dev, "fail to wait on hqd deactive\n"); return r; From d082e5cde489caf7b29e966a9dbbc3fb43fb9164 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:20:44 -0400 Subject: [PATCH 1144/1523] drm/amdgpu/gfx9.4.3: use rlc safe mode for soft recovery Protect the MMIO access with safe mode. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9215666a6318..dd146322f209 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3054,7 +3054,9 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); } static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( From 3ec2ad7c34c412bd9264cd1ff235d0812be90e82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jul 2024 18:20:57 -0400 Subject: [PATCH 1145/1523] drm/amdgpu/gfx9: use rlc safe mode for soft recovery Protect the MMIO access with safe mode. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f8f5bb96d486..db21fb951e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5907,7 +5907,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, From 0f2c243dbfa008cec2dad03ea074156b6b176a03 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 13 Aug 2024 22:34:26 +0530 Subject: [PATCH 1146/1523] drm/amdgpu: remove ME0 registers from mi300 dump Remove ME0 registers from MI300 gfx_9_4_3 ipdump MI300 does not have gfx ME and hence those register are just empty one and could be dropped. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 37 ------------------------- 1 file changed, 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index dd146322f209..619ff3ec2c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -75,42 +75,11 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), @@ -122,11 +91,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), @@ -139,11 +105,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), From 89ec85d16eb8110d88c273d1d34f1fe5a70ba8cc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 13 Aug 2024 13:51:48 +0800 Subject: [PATCH 1147/1523] drm/amdgpu: fixing rlc firmware loading failure issue Skip rlc firmware validation to ignore firmware header size mismatch issues. This restores the workaround added in commit 849e133c973c ("drm/amdgpu: Fix the null pointer when load rlc firmware") Fixes: 3af2c80ae2f5 ("drm/amdgpu: refine gfx10 firmware loading") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3551 Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 75a6ca645964..ca983a014ba0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; From 3fbaf475a5b8361ebee7da18964db809e37518b7 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 6 Aug 2024 10:19:04 -0700 Subject: [PATCH 1148/1523] drm/radeon/evergreen_cs: fix int overflow errors in cs track offsets Several cs track offsets (such as 'track->db_s_read_offset') either are initialized with or plainly take big enough values that, once shifted 8 bits left, may be hit with integer overflow if the resulting values end up going over u32 limit. Same goes for a few instances of 'surf.layer_size * mslice' multiplications that are added to 'offset' variable - they may potentially overflow as well and need to be validated properly. While some debug prints in this code section take possible overflow issues into account, simply casting to (unsigned long) may be erroneous in its own way, as depending on CPU architecture one is liable to get different results. Fix said problems by: - casting 'offset' to fixed u64 data type instead of ambiguous unsigned long. - casting one of the operands in vulnerable to integer overflow cases to u64. - adjust format specifiers in debug prints to properly represent 'offset' values. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 285484e2d55e ("drm/radeon: add support for evergreen/ni tiling informations v11") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 62 +++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 1fe6e0d883c7..675a649fa7ab 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -395,7 +395,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -433,14 +433,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch @@ -448,14 +448,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i */ if (!surf.mode) { uint32_t *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -467,7 +467,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -476,9 +476,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -562,7 +562,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -608,18 +608,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -627,18 +627,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -659,7 +659,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -706,34 +706,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } From 20588d5afce3992ff4fc9b61085e3e1affbac620 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 12 Aug 2024 20:24:15 +0800 Subject: [PATCH 1149/1523] drm/amd: Remove unused declarations amdgpu_gart_table_vram_pin() and amdgpu_gart_table_vram_unpin() has been removed since commit 575e55ee4fbc ("drm/amdgpu: recover gart table at resume") remain the declarations untouched in the header files. Besides, amdgpu_dm_display_resume() has also beed removed since commit a80aa93de1a0 ("drm/amd/display: Unify dm resume sequence into a single call"). So, let's remove this unused declarations. Signed-off-by: Zhang Zekun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 2 -- 2 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f3980b40f2ce..937de21a7142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1575,13 +1575,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif -#if defined(CONFIG_DRM_AMD_DC) -int amdgpu_dm_display_resume(struct amdgpu_device *adev ); -#else -static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } -#endif - - void amdgpu_register_gpu_instance(struct amdgpu_device *adev); void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8283d682f543..7cc980bf4725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, From 27a74c125d029d0606b81ef865bb68dd975ca2f7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 23 Jul 2024 13:08:55 +0530 Subject: [PATCH 1150/1523] drm/amdgpu: add vcn ip dump ptr in vcn global struct Add pointer to the vcn ip dump in the vcn global structure to be accessible for all vcn version via global adev. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c87d68d4be53..2a1f3dbb14d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { From ab10f7748789fa9247949b530d8ee7d56eafe9a3 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:35:41 +0530 Subject: [PATCH 1151/1523] drm/amdgpu: add vcn_v3_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v3_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 78 ++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..693eb676c01d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +162,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +284,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +331,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2251,34 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,7 +2297,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, .print_ip_state = NULL, }; From 6d88c0f94ac07ffc9f08e459cca036f4af08617d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Jul 2024 16:48:28 +0530 Subject: [PATCH 1152/1523] drm/amdgpu: add print support for vcn_v3_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v3_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 35 ++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 693eb676c01d..65dd68b32280 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2251,6 +2251,39 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + bool is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v3_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2298,7 +2331,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { From f685b38455b0c990a0f018a17b238d8ffb5acccc Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:17:11 +0530 Subject: [PATCH 1153/1523] drm/amdgpu: add vcn_v5_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v5_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 76 ++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..a30a42b1ba03 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,40 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +117,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +173,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +217,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1343,34 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,7 +1389,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, .print_ip_state = NULL, }; From 4af8071b654dbd9b8bd003ab8e49eb16b4cb9fcd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 15:50:23 -0400 Subject: [PATCH 1154/1523] drm/amdgpu/gfx8: add ring reset callback for gfx Add ring reset callback for gfx. v2: fix operator precedence (kernel test robot) Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 75 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/vid.h | 1 + 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a1963e6c5cab..bc8295812cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6149,6 +6149,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. @@ -6172,7 +6173,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6380,6 +6382,34 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6856,6 +6886,48 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } +static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6923,6 +6995,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, + .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 80ce42aacc0c..b61f6b838ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -246,6 +246,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data From d479158f6502a3698b91829fa03bd3f2ea38efe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2024 15:59:20 -0400 Subject: [PATCH 1155/1523] drm/amdgpu/gfx7: add ring reset callback for gfx Add ring reset callback for gfx. v2: fix operator precedence (kernel test robot) Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cikd.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 76 ++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 55982c0064b5..06088d52d81c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -364,6 +364,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5fbdef04c9aa..f146806c4633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2114,6 +2114,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2133,7 +2135,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4921,6 +4924,76 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4972,6 +5045,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, + .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { From b5be054c585110b2c5c1b180136800e8c41c7bb4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 15:36:19 -0400 Subject: [PATCH 1156/1523] drm/amdgpu/gfx11: enter safe mode before touching CP_INT_CNTL Need to enter safe mode before touching GC MMIO. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 22bb35278691..98261000e022 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4781,6 +4781,8 @@ static int gfx_v11_0_soft_reset(void *handle) int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfx_v11_0_set_safe_mode(adev, 0); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); @@ -4788,8 +4790,6 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { From 76acba7b7f12517990f326fabfecb6f55e334233 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 16:37:33 -0400 Subject: [PATCH 1157/1523] drm/amdgpu/gfx11: add a mutex for the gfx semaphore This will be used in more places in the future so add a mutex. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a6b8d0ba4758..482db4ebcc4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4059,6 +4059,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6fe77e483bb7..17b945b545b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -444,6 +444,8 @@ struct amdgpu_gfx { uint32_t *ip_dump_core; uint32_t *ip_dump_compute_queues; uint32_t *ip_dump_gfx_queues; + + struct mutex reset_sem_mutex; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 98261000e022..01f220ee4561 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4743,10 +4743,12 @@ static int gfx_v11_0_wait_for_idle(void *handle) } static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - int req) + bool req) { u32 i, tmp, val; + if (req) + mutex_lock(&adev->gfx.reset_sem_mutex); for (i = 0; i < adev->usec_timeout; i++) { /* Request with MeId=2, PipeId=0 */ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); @@ -4767,6 +4769,8 @@ static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, } udelay(1); } + if (!req) + mutex_unlock(&adev->gfx.reset_sem_mutex); if (i >= adev->usec_timeout) return -EINVAL; @@ -4814,7 +4818,7 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; @@ -4829,7 +4833,7 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); /* release the gfx mutex */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + r = gfx_v11_0_request_gfx_index_mutex(adev, false); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; From 478efcb90b074f0fdd18e62b30ce09140bd69022 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2024 16:39:30 -0400 Subject: [PATCH 1158/1523] drm/amdgpu/gfx11: export gfx_v11_0_request_gfx_index_mutex() It will be used by the queue reset code. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 01f220ee4561..5685aee479df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4742,8 +4742,8 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - bool req) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) { u32 i, tmp, val; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h index 10cfc29c27c9..157a5c812259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h @@ -26,4 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block; +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); + #endif From 5b7a59de4845460a313d93d4839258bfb982357c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:35:05 -0400 Subject: [PATCH 1159/1523] drm/amdgpu/mes: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 43 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 ++++++ 2 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index c598c3edff7e..04a4f0dfec15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -793,6 +793,49 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) +{ + unsigned long flags; + struct amdgpu_mes_queue *queue; + struct amdgpu_mes_gang *gang; + struct mes_reset_queue_input queue_input; + int r; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* remove the mes gang from idr list */ + spin_lock_irqsave(&adev->mes.queue_id_lock, flags); + + queue = idr_find(&adev->mes.queue_id_idr, queue_id); + if (!queue) { + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + amdgpu_mes_unlock(&adev->mes); + DRM_ERROR("queue id %d doesn't exist\n", queue_id); + return -EINVAL; + } + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + + DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", + queue->doorbell_off); + + gang = queue->gang; + queue_input.doorbell_offset = queue->doorbell_off; + queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; + + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue, queue id = %d\n", + queue_id); + + amdgpu_mes_unlock(&adev->mes); + + return 0; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 548e724e3a75..5c8867d2380a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -248,6 +248,11 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_reset_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -360,6 +365,9 @@ struct amdgpu_mes_funcs { int (*reset_legacy_queue)(struct amdgpu_mes *mes, struct mes_reset_legacy_queue_input *input); + + int (*reset_hw_queue)(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -387,6 +395,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties *qprops, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); From d4f1fde734eb73767015272dd2e8af1440b30a9b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:48:07 -0400 Subject: [PATCH 1160/1523] drm/amdgpu/mes11: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c0340ee3dec0..6f5a80519af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -360,6 +360,26 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -636,6 +656,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, .reset_legacy_queue = mes_v11_0_reset_legacy_queue, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, From 32aada4d0a35ee613ffed23090bc23e1b40da419 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 13:48:40 -0400 Subject: [PATCH 1161/1523] drm/amdgpu/mes12: add API for user queue reset Add API for resetting user queues. Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 35cd6ad73912..47a73f6ae4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -350,6 +350,32 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -723,6 +749,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, .reset_legacy_queue = mes_v12_0_reset_legacy_queue, + .reset_hw_queue = mes_v12_0_reset_hw_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, From f3c958ab857927e1ef2cc6806fcb0eb3f36c923a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 1 Aug 2024 19:19:27 +0530 Subject: [PATCH 1162/1523] drm/amdgpu: add print support for vcn_v5_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v5_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index a30a42b1ba03..c305386358b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1343,6 +1343,38 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v5_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1390,7 +1422,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, .dump_ip_state = vcn_v5_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v5_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { From 8962915044364bb7c36b3018f74371a798aee46d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:49:20 +0530 Subject: [PATCH 1163/1523] drm/amdgpu: add vcn_v4_0_3 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0_3. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 81 ++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b62..77cc6807d119 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,42 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + #define NORMALIZE_VCN_REG_OFFSET(offset) \ (offset & 0x1FFFF) @@ -92,6 +128,8 @@ static int vcn_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, r, vcn_inst; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -159,6 +197,15 @@ static int vcn_v4_0_3_sw_init(void *handle) } } + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -194,6 +241,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1684,6 +1733,36 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off, inst_id; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(VCN, i); + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], + inst_id)); + } +} + static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, @@ -1702,7 +1781,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_3_dump_ip_state, .print_ip_state = NULL, }; From 9d87dac3f9adbe30d545c577aab483dfce71143d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:31:21 +0530 Subject: [PATCH 1164/1523] drm/amdgpu: add vcn_v4_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 80 ++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 776c539bfdda..abd5a0793e58 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -137,6 +173,8 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -200,6 +238,15 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -239,6 +286,8 @@ static int vcn_v4_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -2109,6 +2158,35 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2127,7 +2205,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_dump_ip_state, .print_ip_state = NULL, }; From 46553db49cf7b7dce95879ee0725f7d95de3c184 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:57:09 +0530 Subject: [PATCH 1165/1523] drm/amdgpu: add vcn_v4_0_5 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v4_0_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 79 ++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8d75061f9f38..b05bfe1dad75 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -97,6 +133,8 @@ static int vcn_v4_0_5_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1733,6 +1781,35 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1751,7 +1828,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v4_0_5_dump_ip_state, .print_ip_state = NULL, }; From dc57edda816df4fb43bfc2809675e91d15994195 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:52:27 +0530 Subject: [PATCH 1166/1523] drm/amdgpu: add print support for vcn_v4_0_3 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0_3. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 77cc6807d119..0fda70336300 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1733,6 +1733,38 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_3_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1782,7 +1814,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, .dump_ip_state = vcn_v4_0_3_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_3_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { From 3a50a51d04d2ca5066949073274e70191104f8e5 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:43:07 +0530 Subject: [PATCH 1167/1523] drm/amdgpu: add print support for vcn_v4_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index abd5a0793e58..26c6f10a8c8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2158,6 +2158,38 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2206,7 +2238,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, .dump_ip_state = vcn_v4_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { From 439c3b124e9ee704766040d5182ccdaeb4d45499 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 12:58:46 +0530 Subject: [PATCH 1168/1523] drm/amdgpu: add print support for vcn_v4_0_5 ip dump Add support for logging the registers in devcoredump buffer for vcn_v4_0_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index b05bfe1dad75..b1fd226b7efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1781,6 +1781,38 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v4_0_5_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1829,7 +1861,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, .dump_ip_state = vcn_v4_0_5_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v4_0_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { From 837cc7f1bf2bbebb05781efb6bc1b10d5c2e9308 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:06:24 +0530 Subject: [PATCH 1169/1523] drm/amdgpu: add vcn_v1_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v1_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 78 ++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a280b9fecb77..f0c4b705c4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -45,6 +45,42 @@ #define mmUVD_REG_XX_MASK_1_0 0x05ac #define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -90,6 +126,8 @@ static int vcn_v1_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ @@ -161,6 +199,14 @@ static int vcn_v1_0_sw_init(void *handle) r = jpeg_v1_0_sw_init(handle); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return r; } @@ -184,6 +230,8 @@ static int vcn_v1_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1877,6 +1925,34 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1895,7 +1971,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v1_0_dump_ip_state, .print_ip_state = NULL, }; From ef9f3b5fd9d2594766c60b1e12b0e72e4918512c Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:08:22 +0530 Subject: [PATCH 1170/1523] drm/amdgpu: add print support for vcn_v1_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v1_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index f0c4b705c4e7..ecdfbfefd66a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1925,6 +1925,38 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v1_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1972,7 +2004,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, .dump_ip_state = vcn_v1_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v1_0_print_ip_state, }; /* From 2239aaa204f1c5002018a02903df7e45a0e0e503 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:18:09 +0530 Subject: [PATCH 1171/1523] drm/amdgpu: add vcn_v2_0 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v2_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 79 ++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d3d096909a7f..710d054e96f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -53,6 +53,42 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); @@ -96,6 +132,8 @@ static int vcn_v2_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared; @@ -184,6 +222,15 @@ static int vcn_v2_0_sw_init(void *handle) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -213,6 +260,8 @@ static int vcn_v2_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1985,6 +2034,34 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2003,7 +2080,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v2_0_dump_ip_state, .print_ip_state = NULL, }; From b910cacb4e70066238feafaf3f2430ef2c8c3b12 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:19:42 +0530 Subject: [PATCH 1172/1523] drm/amdgpu: add print support for vcn_v2_0 ip dump Add support for logging the registers in devcoredump buffer for vcn_v2_0. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 710d054e96f8..bfd067e2d2f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2034,6 +2034,38 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v2_0_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2081,7 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, .dump_ip_state = vcn_v2_0_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v2_0_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { From 0eea81ee2e12900bd5276558434b675b52ab2d5d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:23:55 +0530 Subject: [PATCH 1173/1523] drm/amdgpu: add vcn_v2_5 ip dump support Add support of vcn ip dump in the devcoredump for vcn_v2_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 80 ++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 96f60c303161..343a9667e03a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,43 @@ #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); @@ -122,6 +159,8 @@ static int vcn_v2_5_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { @@ -241,6 +280,15 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -277,6 +325,8 @@ static int vcn_v2_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1876,6 +1926,34 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -1894,7 +1972,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, .print_ip_state = NULL, }; From bc62abe1b92db4e027a92a6799f2193bb93970ea Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:26:31 +0530 Subject: [PATCH 1174/1523] drm/amdgpu: add print support for vcn_v2_5 ip dump Add support for logging the registers in devcoredump buffer for vcn_v2_5. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 343a9667e03a..661eef38aec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1926,6 +1926,38 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + static void vcn_v2_5_dump_ip_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1973,7 +2005,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = NULL, + .print_ip_state = vcn_v2_5_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { From 1a2103d68547407a098c237d7cade79b4ad2b88f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 5 Aug 2024 17:27:58 +0530 Subject: [PATCH 1175/1523] drm/amdgpu: add vcn ip dump support for vcn_v2_6 Add support for logging the registers in devcoredump buffer for vcn_v2_6. Signed-off-by: Sunil Khatri Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 661eef38aec9..04e9e806e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -2026,8 +2026,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = From 2dc3851ef7d9c5439ea8e9623fc36878f3b40649 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 10:28:24 -0400 Subject: [PATCH 1176/1523] drm/amdgpu/sdma5.2: limit wptr workaround to sdma 5.2.1 The workaround seems to cause stability issues on other SDMA 5.2.x IPs. Fixes: a03ebf116303 ("drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3556 Acked-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d740255edf5a..bc9b240a3488 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -225,14 +225,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " From f49280ffd254e718ee01ef515fe91854fdf005cf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 19:06:36 -0400 Subject: [PATCH 1177/1523] drm/amdgpu: handle enforce isolation on non-0 gfxhub Some chips have more than one gfxhub so check if we are a gfxhub rather than just gfxhub 0. Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b6a8bddada4c..6608eeb61e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -484,7 +484,7 @@ error: bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); + (enforce_isolation && AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, From aec773a1fb0253494b85b073f46a0ba1d798b726 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 6 Jun 2024 13:12:40 +0530 Subject: [PATCH 1178/1523] drm/amdgpu: Add infrastructure for Cleaner Shader feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleaner shader is used by the CP firmware to clean LDS and GPRs between processes on the CUs. This adds an internal API for GFX IP code to allocate and initialize the cleaner shader. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 35 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 14 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9be8cafdcecc..4ed69fcfe9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1416,3 +1416,38 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); } + +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size) +{ + if (!adev->gfx.enable_cleaner_shader) + return -EOPNOTSUPP; + + return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) + memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, + cleaner_shader_size); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 17b945b545b4..09379ef7388f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -446,6 +446,14 @@ struct amdgpu_gfx { uint32_t *ip_dump_gfx_queues; struct mutex reset_sem_mutex; + + /* cleaner shader */ + struct amdgpu_bo *cleaner_shader_obj; + unsigned int cleaner_shader_size; + u64 cleaner_shader_gpu_addr; + void *cleaner_shader_cpu_ptr; + const void *cleaner_shader_ptr; + bool enable_cleaner_shader; }; struct amdgpu_gfx_ras_reg_entry { @@ -547,6 +555,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, void *ras_error_status, void (*func)(struct amdgpu_device *adev, void *ras_error_status, int xcc_id)); +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size); +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From ee7a846ea27bcbef5182d15923339a7bf182ec65 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 12 Mar 2024 14:22:26 -0400 Subject: [PATCH 1179/1523] drm/amdgpu: Emit cleaner shader at end of IB submission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the emission of a cleaner shader at the end of the IB submission process. This is achieved by adding a new function pointer, `emit_cleaner_shader`, to the `amdgpu_ring_funcs` structure. If the `emit_cleaner_shader` function is set in the ring functions, it is called during the VM flush process. The cleaner shader is only emitted if the `enable_cleaner_shader` flag is set in the `amdgpu_device` structure. This allows the cleaner shader emission to be controlled on a per-device basis. By emitting a cleaner shader at the end of the IB submission, we can ensure that the VM state is properly cleaned up after each submission. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index c7f15edeb367..f93f51002201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -236,6 +236,7 @@ struct amdgpu_ring_funcs { void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + void (*emit_cleaner_shader)(struct amdgpu_ring *ring); }; struct amdgpu_ring { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcb729094521..71ef3308be92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -681,6 +681,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader) + ring->funcs->emit_cleaner_shader(ring); + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -742,6 +746,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } From 96595204195d7e13736a84295e217316610d4cdb Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 21:35:26 +0530 Subject: [PATCH 1180/1523] drm/amdgpu: Make enforce_isolation setting per GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit makes enforce_isolation setting to be per GPU and per partition by adding the enforce_isolation array to the adev structure. The adev variable is set based on the global enforce_isolation module parameter during device initialization. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU is used to determine whether to enforce isolation between graphics and compute processes on that GPU. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU and partition is used to determine whether to enforce isolation between graphics and compute processes on that GPU and partition. This allows the enforce_isolation setting to be controlled individually for each GPU and each partition, which is useful in a system with multiple GPUs and partitions where different isolation settings might be desired for different GPUs and partitions. v2: fix loop in amdgpu_vmid_mgr_init() (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 3 ++- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 937de21a7142..0dceeea235cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1162,6 +1162,8 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + + bool enforce_isolation[MAX_XCP]; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 78b3c067fea7..5d5ba1e3d90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1110,7 +1110,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 482db4ebcc4b..e623af740aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + for (i = 0; i < MAX_XCP; i++) + adev->enforce_isolation[i] = !!enforce_isolation; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6608eeb61e5a..92d27d32de41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -476,15 +476,19 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && AMDGPU_IS_GFXHUB(vmhub)); + (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? + vm->root.bo->xcp_id : 0] && + AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, @@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } } /* alloc a default reserved vmid to enforce isolation */ - if (enforce_isolation) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..4012fb2dd08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, From 19cff16559a4f2d763faf4f8392bf86d3a21b93c Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 2 Aug 2024 14:22:26 -0400 Subject: [PATCH 1181/1523] drm/amdgpu: abort KIQ waits when there is a pending reset Stop waiting for the KIQ to return back when there is a reset pending. It's quite likely that the KIQ will never response. Signed-off-by: Koenig Christian Suggested-by: Lazar Lijo Tested-by: Victor Skvortsov Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b49b3650fd62..17a19d49d30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb5..1cb920abc2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); From 1fc2ac428ef7d2ab9e8e19efe7ec3e58aea51bf3 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 16 Aug 2024 12:15:23 -0600 Subject: [PATCH 1182/1523] io_uring: fix user_data field name in comment io_uring_cqe's user_data field refers to `sqe->data`, but io_uring_sqe does not have a data field. Fix the comment to say `sqe->user_data`. Signed-off-by: Caleb Sander Mateos Link: https://github.com/axboe/liburing/pull/1206 Link: https://lore.kernel.org/r/20240816181526.3642732-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac..adc2524fd8e3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; From 534f7eff9239c1b0af852fc33f5af2b62c00eddf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 16 Aug 2024 10:40:38 +0930 Subject: [PATCH 1183/1523] btrfs: only enable extent map shrinker for DEBUG builds Although there are several patches improving the extent map shrinker, there are still reports of too frequent shrinker behavior, taking too much CPU for the kswapd process. So let's only enable extent shrinker for now, until we got more comprehensive understanding and a better solution. Link: https://lore.kernel.org/linux-btrfs/3df4acd616a07ef4d2dc6bad668701504b412ffc.camel@intelfx.name/ Link: https://lore.kernel.org/linux-btrfs/c30fd6b3-ca7a-4759-8a53-d42878bf84f7@gmail.com/ Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps") CC: stable@vger.kernel.org # 6.10+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 11044e9e2cb1..98fa0f382480 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2402,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) From f232de7cdb4b99adb2c7f2bc5e0b7e4e1292873b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Aug 2024 10:16:08 +0300 Subject: [PATCH 1184/1523] net/mlx5e: SHAMPO, Fix page leak When SHAMPO is used, a receive queue currently almost always leaks one page on shutdown. A page has MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (8) headers. These headers are tracked in the SHAMPO bitmap. Each page is released when the last header index in the group is processed. During header allocation, there can be leftovers from a page that will be used in a subsequent allocation. This is normally fine, except for the following scenario (simplified a bit): 1) Allocate N new page fragments, showing only the relevant last 4 fragments: 0: new page 1: new page 2: new page 3: new page 4: page from previous allocation 5: page from previous allocation 6: page from previous allocation 7: page from previous allocation 2) NAPI processes header indices 4-7 because they are the oldest allocated. Bit 7 will be set to 0. 3) Receive queue shutdown occurs. All the remaining bits are being iterated on to release the pages. But the page assigned to header indices 0-3 will not be freed due to what happened in step 2. This patch fixes the issue by making sure that on allocation, header fragments are always allocated in groups of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE so that there is never a partial page left over between allocations. A more appropriate fix would be a refactoring of mlx5e_alloc_rx_hd_mpwqe() and mlx5e_build_shampo_hd_umr(). But this refactoring is too big for net. It will be targeted for net-next. Fixes: e839ac9a89cb ("net/mlx5e: SHAMPO, Simplify header page release in teardown") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 225da8d691fc..23aa555ca0ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -735,6 +735,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); + ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); if (!ksm_entries) return 0; From 94e521937839475b83bac46e4d3ccba332e12064 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Aug 2024 10:16:09 +0300 Subject: [PATCH 1185/1523] net/mlx5e: SHAMPO, Release in progress headers The change in the fixes tag cleaned up too much: it removed the part that was releasing header pages that were posted via UMR but haven't been acknowledged yet on the ICOSQ. This patch corrects this omission by setting the bits between pi and ci to on when shutting down a queue with SHAMPO. To be consistent with the Striding RQ code, this action is done in mlx5e_free_rx_missing_descs(). Fixes: e839ac9a89cb ("net/mlx5e: SHAMPO, Simplify header page release in teardown") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 25 +++++++++++-------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bb5da42edc23..d9e241423bc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -998,6 +998,7 @@ void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode); +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5df904639b0c..583fa24a7ae9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1236,6 +1236,14 @@ void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) rq->mpwqe.actual_wq_head = wq->head; rq->mpwqe.umr_in_progress = 0; rq->mpwqe.umr_completed = 0; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 len; + + len = (shampo->pi - shampo->ci) & shampo->hd_per_wq; + mlx5e_shampo_fill_umr(rq, len); + } } void mlx5e_free_rx_descs(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 23aa555ca0ae..de9d01036c28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -963,26 +963,31 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) sq->cc = sqcc; } -static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, - struct mlx5e_icosq *sq) +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len) { - struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); - struct mlx5e_shampo_hd *shampo; - /* assume 1:1 relationship between RQ and icosq */ - struct mlx5e_rq *rq = &c->rq; - int end, from, len = umr.len; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int end, from, full_len = len; - shampo = rq->mpwqe.shampo; end = shampo->hd_per_wq; from = shampo->ci; - if (from + len > shampo->hd_per_wq) { + if (from + len > end) { len -= end - from; bitmap_set(shampo->bitmap, from, end - from); from = 0; } bitmap_set(shampo->bitmap, from, len); - shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); + shampo->ci = (shampo->ci + full_len) & (shampo->hd_per_wq - 1); +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + + mlx5e_shampo_fill_umr(rq, umr.len); } int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) From a07e953dafe5ebd88942dc861dfb06eaf055fb07 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 15 Aug 2024 10:16:10 +0300 Subject: [PATCH 1186/1523] net/mlx5e: XPS, Fix oversight of Multi-PF Netdev changes The offending commit overlooked the Multi-PF Netdev changes. Revert mlx5e_set_default_xps_cpumasks to incorporate Multi-PF Netdev changes. Fixes: bcee093751f8 ("net/mlx5e: Modifying channels number and updating TX queues") Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 583fa24a7ae9..16b67c457b60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3028,15 +3028,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = priv->mdev; - int num_comp_vectors, ix, irq; - - num_comp_vectors = mlx5_comp_vectors_max(mdev); + int ix; for (ix = 0; ix < params->num_channels; ix++) { - cpumask_clear(priv->scratchpad.cpumask); + int num_comp_vectors, irq, vec_ix; + struct mlx5_core_dev *mdev; - for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + num_comp_vectors = mlx5_comp_vectors_max(mdev); + cpumask_clear(priv->scratchpad.cpumask); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); + + for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); From 607e1df7bd47fe91cab85a97f57870a26d066137 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 15 Aug 2024 10:16:11 +0300 Subject: [PATCH 1187/1523] net/mlx5: Fix IPsec RoCE MPV trace call Prevent the call trace below from happening, by not allowing IPsec creation over a slave, if master device doesn't support IPsec. WARNING: CPU: 44 PID: 16136 at kernel/locking/rwsem.c:240 down_read+0x75/0x94 Modules linked in: esp4_offload esp4 act_mirred act_vlan cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa mst_pciconf(OE) nfsv3 nfs_acl nfs lockd grace fscache netfs xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill cuse fuse rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd kvm_amd kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel sha1_ssse3 dell_smbios ib_uverbs aesni_intel crypto_simd dcdbas wmi_bmof dell_wmi_descriptor cryptd pcspkr ib_core acpi_ipmi sp5100_tco ccp i2c_piix4 ipmi_si ptdma k10temp ipmi_devintf ipmi_msghandler acpi_power_meter acpi_cpufreq ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 drm_kms_helper syscopyarea sysfillrect mlx5_core sysimgblt fb_sys_fops cec ahci libahci mlxfw drm pci_hyperv_intf libata tg3 sha256_ssse3 tls megaraid_sas i2c_algo_bit psample wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: mst_pci] CPU: 44 PID: 16136 Comm: kworker/44:3 Kdump: loaded Tainted: GOE 5.15.0-20240509.el8uek.uek7_u3_update_v6.6_ipsec_bf.x86_64 #2 Hardware name: Dell Inc. PowerEdge R7525/074H08, BIOS 2.0.3 01/15/2021 Workqueue: events xfrm_state_gc_task RIP: 0010:down_read+0x75/0x94 Code: 00 48 8b 45 08 65 48 8b 14 25 80 fc 01 00 83 e0 02 48 09 d0 48 83 c8 01 48 89 45 08 5d 31 c0 89 c2 89 c6 89 c7 e9 cb 88 3b 00 <0f> 0b 48 8b 45 08 a8 01 74 b2 a8 02 75 ae 48 89 c2 48 83 ca 02 f0 RSP: 0018:ffffb26387773da8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffa08b658af900 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ff886bc5e1366f2f RDI: 0000000000000000 RBP: ffffa08b658af940 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffa0a9bfb31540 R13: ffffa0a9bfb37900 R14: 0000000000000000 R15: ffffa0a9bfb37905 FS: 0000000000000000(0000) GS:ffffa0a9bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a45ed814e8 CR3: 000000109038a000 CR4: 0000000000350ee0 Call Trace: ? show_trace_log_lvl+0x1d6/0x2f9 ? show_trace_log_lvl+0x1d6/0x2f9 ? mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] ? down_read+0x75/0x94 ? __warn+0x80/0x113 ? down_read+0x75/0x94 ? report_bug+0xa4/0x11d ? handle_bug+0x35/0x8b ? exc_invalid_op+0x14/0x75 ? asm_exc_invalid_op+0x16/0x1b ? down_read+0x75/0x94 ? down_read+0xe/0x94 mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] mlx5_ipsec_fs_roce_tx_destroy+0xb1/0x130 [mlx5_core] tx_destroy+0x1b/0xc0 [mlx5_core] tx_ft_put+0x53/0xc0 [mlx5_core] mlx5e_xfrm_free_state+0x45/0x90 [mlx5_core] ___xfrm_state_destroy+0x10f/0x1a2 xfrm_state_gc_task+0x81/0xa9 process_one_work+0x1f1/0x3c6 worker_thread+0x53/0x3e4 ? process_one_work.cold+0x46/0x3c kthread+0x127/0x144 ? set_kthread_struct+0x60/0x52 ret_from_fork+0x22/0x2d ---[ end trace 5ef7896144d398e1 ]--- Fixes: dfbd229abeee ("net/mlx5: Configure IPsec steering for egress RoCEv2 MPV traffic") Reviewed-by: Leon Romanovsky Signed-off-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index 234cd00f71a1..b7d4b1a2baf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } @@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } From 0e49d3ff12501adaafaf6fdb19699f021d1eda1c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 23:48:58 -0400 Subject: [PATCH 1188/1523] bcachefs: Fix locking in __bch2_trans_mark_dev_sb() We run this in full RW mode now, so we have to guard against the superblock buffer being reallocated. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 5 +---- fs/bcachefs/buckets.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a947..eb3002c4eae7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ad517ef744e5..be2bbd248631 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -915,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -1046,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1063,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; From 2fa62ce91a52e704716d08f9a8eb3f9e7e04710d Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 14 Aug 2024 12:01:24 +0800 Subject: [PATCH 1189/1523] scsi: MAINTAINERS: Update HiSilicon SAS controller driver maintainer Add Yihang Li as the maintainer of the HiSilicon SAS controller driver, replacing Xiang Chen. Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20240814040124.1376195-1-liyihang9@huawei.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..f96bc870a664 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10173,7 +10173,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: Xiang Chen +M: Yihang Li S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt From f03e94f23b04c2b71c0044c1534921b3975ef10c Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Tue, 13 Aug 2024 13:34:10 +0800 Subject: [PATCH 1190/1523] scsi: core: Fix the return value of scsi_logical_block_count() scsi_logical_block_count() should return the block count of a given SCSI command. The original implementation ended up shifting twice, leading to an incorrect count being returned. Fix the conversion between bytes and logical blocks. Cc: stable@vger.kernel.org Fixes: 6a20e21ae1e2 ("scsi: core: Add helper to return number of logical blocks in a request") Signed-off-by: Chaotian Jing Link: https://lore.kernel.org/r/20240813053534.7720-1-chaotian.jing@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 45c40d200154..8ecfb94049db 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -234,7 +234,7 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd) { - unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT; + unsigned int shift = ilog2(scmd->device->sector_size); return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift; } From a0c9fe5eecc97680323ee83780ea3eaf440ba1b7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 15 Aug 2024 16:37:13 +0100 Subject: [PATCH 1191/1523] tc-testing: don't access non-existent variable on exception Since commit 255c1c7279ab ("tc-testing: Allow test cases to be skipped") the variable test_ordinal doesn't exist in call_pre_case(). So it should not be accessed when an exception occurs. This resolves the following splat: ... During handling of the above exception, another exception occurred: Traceback (most recent call last): File ".../tdc.py", line 1028, in main() File ".../tdc.py", line 1022, in main set_operation_mode(pm, parser, args, remaining) File ".../tdc.py", line 966, in set_operation_mode catresults = test_runner_serial(pm, args, alltests) File ".../tdc.py", line 642, in test_runner_serial (index, tsr) = test_runner(pm, args, alltests) File ".../tdc.py", line 536, in test_runner res = run_one_test(pm, args, index, tidx) File ".../tdc.py", line 419, in run_one_test pm.call_pre_case(tidx) File ".../tdc.py", line 146, in call_pre_case print('test_ordinal is {}'.format(test_ordinal)) NameError: name 'test_ordinal' is not defined Fixes: 255c1c7279ab ("tc-testing: Allow test cases to be skipped") Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20240815-tdc-test-ordinal-v1-1-0255c122a427@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636f..4f255cec0c22 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise From cd06b713a6880997ca5aecac8e33d5f9c541749e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 16 Aug 2024 11:55:10 +0530 Subject: [PATCH 1192/1523] scsi: ufs: core: Add a quirk for handling broken LSDBS field in controller capabilities register 'Legacy Queue & Single Doorbell Support (LSDBS)' field in the controller capabilities register is supposed to report whether the legacy single doorbell mode is supported in the controller or not. But some controllers report '1' in this field which corresponds to 'LSDB not supported', but they indeed support LSDB. So let's add a quirk to handle those controllers. If the quirk is enabled by the controller driver, then LSDBS register field will be ignored and legacy single doorbell mode is assumed to be enabled always. Tested-by: Amit Pundir Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-1-e6fe0e18e2a3@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +++++- include/ufs/ufshcd.h | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0b3d0c8e0dda..a6f818cdef0e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2426,7 +2426,11 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) * 0h: legacy single doorbell support is available * 1h: indicate that legacy single doorbell support has been removed */ - hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); + if (!(hba->quirks & UFSHCD_QUIRK_BROKEN_LSDBS_CAP)) + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); + else + hba->lsdb_sup = true; + if (!hba->mcq_sup) return 0; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cac0cdb9a916..0fd2aebac728 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -676,6 +676,14 @@ enum ufshcd_quirks { * the standard best practice for managing keys). */ UFSHCD_QUIRK_KEYS_IN_PRDT = 1 << 24, + + /* + * This quirk indicates that the controller reports the value 1 (not + * supported) in the Legacy Single DoorBell Support (LSDBS) bit of the + * Controller Capabilities register although it supports the legacy + * single doorbell mode. + */ + UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, }; enum ufshcd_caps { From ea593e028a9cc523557b4084a61d87ae69e2f270 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 16 Aug 2024 11:55:11 +0530 Subject: [PATCH 1193/1523] scsi: ufs: qcom: Add UFSHCD_QUIRK_BROKEN_LSDBS_CAP for SM8550 SoC SM8550 SoC has the UFSHCI 4.0 compliant UFS controller and only supports legacy single doorbell mode without MCQ. But due to a hardware bug, it reports 1 in the 'Legacy Queue & Single Doorbell Support (LSDBS)' field of the Controller Capabilities register. This field is supposed to read as 0 if legacy single doorbell mode is supported and 1 otherwise. Starting with commit 0c60eb0cc320 ("scsi: ufs: core: Check LSDBS cap when !mcq"), ufshcd driver is now relying on the LSDBS field to decide when to use the legacy doorbell mode if MCQ is not supported. And this ends up breaking UFS on SM8550: ufshcd-qcom 1d84000.ufs: ufshcd_init: failed to initialize (legacy doorbell mode not supported) ufshcd-qcom 1d84000.ufs: error -EINVAL: Initialization failed with error -22 So use the UFSHCD_QUIRK_BROKEN_LSDBS_CAP quirk for SM8550 SoC so that the ufshcd driver could use legacy doorbell mode correctly. Fixes: 0c60eb0cc320 ("scsi: ufs: core: Check LSDBS cap when !mcq") Tested-by: Amit Pundir Reviewed-by: Bart Van Assche Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-2-e6fe0e18e2a3@linaro.org Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 810e637047d0..c87fdc849c62 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -857,6 +857,9 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) if (host->hw_ver.major > 0x3) hba->quirks |= UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH; + + if (of_device_is_compatible(hba->dev->of_node, "qcom,sm8550-ufshc")) + hba->quirks |= UFSHCD_QUIRK_BROKEN_LSDBS_CAP; } static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host) @@ -1847,7 +1850,8 @@ static void ufs_qcom_remove(struct platform_device *pdev) } static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = { - { .compatible = "qcom,ufshc"}, + { .compatible = "qcom,ufshc" }, + { .compatible = "qcom,sm8550-ufshc" }, {}, }; MODULE_DEVICE_TABLE(of, ufs_qcom_of_match); From cd612b57c3672487ae8565855eaf9e83862eccc5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 16 Aug 2024 13:59:56 +0100 Subject: [PATCH 1194/1523] scsi: MAINTAINERS: Add header files to SCSI SUBSYSTEM This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking [1]. In this case the files with "net" in their name. [1] https://lore.kernel.org/netdev/20240816-net-mnt-v1-0-ef946b47ced4@kernel.org/ As part of that effort these files came up: * include/uapi/scsi/scsi_netlink_fc.h * include/uapi/scsi/scsi_netlink.h Unlike all the other matching files, these one seem to relate more closely to SCSI than Networking, so I have added them to the SCSI SUBSYSTEM section. In order to simplify things, and for consistency, I have added the entire include/uapi/scsi rather than the individual files. Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240816-scsi-mnt-v1-1-439af8b1c28b@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f96bc870a664..9a33ab69abab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20351,6 +20351,7 @@ F: Documentation/devicetree/bindings/scsi/ F: drivers/scsi/ F: drivers/ufs/ F: include/scsi/ +F: include/uapi/scsi/ SCSI TAPE DRIVER M: Kai Mäkisara From cbaac68987b8699397df29413b33bd51f5255255 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 16 Aug 2024 20:53:10 -0400 Subject: [PATCH 1195/1523] scsi: sd: Do not attempt to configure discard unless LBPME is set Commit f874d7210d88 ("scsi: sd: Keep the discard mode stable") attempted to address an issue where one mode of discard operation got configured prior to the device completing full discovery. Unfortunately this change assumed discard was always enabled on the device. Do not attempt to configure discard unless LBPME is enabled. Link: https://lore.kernel.org/r/20240817005325.3319384-1-martin.petersen@oracle.com Fixes: f874d7210d88 ("scsi: sd: Keep the discard mode stable") Reported-by: Chris Bainbridge Tested-by: Chris Bainbridge Tested-by: Shin'ichiro Kawasaki Tested-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 699f4f9674d9..dad3991397cf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3308,6 +3308,9 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { + if (!sdkp->lbpme) + return SD_LBP_FULL; + if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) From 0523374e303051c94457df9170f12a752d8b09cf Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 13 Aug 2024 17:06:18 +0300 Subject: [PATCH 1196/1523] drm/i915/gt: remove stray declaration of intel_gt_release_all() When intel_gt_release_all() was removed from the code in commit e89950553385 ("drm/i915: do not clean GT table on error path"), its declaration in the header file remained. Remove it. Signed-off-by: Luca Coelho Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240813140618.387553-1-luciano.coelho@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index b73555889d50..998ca029b73a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -174,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) int intel_gt_probe_all(struct drm_i915_private *i915); int intel_gt_tiles_init(struct drm_i915_private *i915); -void intel_gt_release_all(struct drm_i915_private *i915); #define for_each_gt(gt__, i915__, id__) \ for ((id__) = 0; \ From 852856e3b6f679c694dd5ec41e5a3c11aa46640b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 20:40:33 -0700 Subject: [PATCH 1197/1523] drm/xe: Use reserved copy engine for user binds on faulting devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User binds map to engines with can fault, faults depend on user binds completion, thus we can deadlock. Avoid this by using reserved copy engine for user binds on faulting devices. While we are here, normalize bind queue creation with a helper. v2: - Pass in extensions to bind queue creation (CI) v3: - s/resevered/reserved (Lucas) - Fix NULL hwe check (Jonathan) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240816034033.53837-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 122 +++++++++++++++-------------- drivers/gpu/drm/xe/xe_exec_queue.h | 6 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 8 +- 4 files changed, 70 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7d170d37fdbe..7f0b33098182 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -166,7 +166,8 @@ err_post_alloc: struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -186,7 +187,54 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) + return ERR_PTR(-EINVAL); + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -418,34 +466,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == XE_ENGINE_CLASS_COPY) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -507,8 +527,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -533,37 +554,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; + + for_each_tile(tile, xe, id) { struct xe_exec_queue *new; - u32 flags; + u32 flags = EXEC_QUEUE_FLAG_VM; - if (xe_gt_is_media_type(gt)) - continue; + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = xe_hw_engine_lookup(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); - - xe_pm_runtime_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ded77b0f3b90..99139368ba6e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags); + enum xe_engine_class class, + u32 flags, u64 extensions); +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a2d0ce3c59bf..cbf54be224c9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -442,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f54594db643e..b15348d4809a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1482,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; struct xe_exec_queue *q; u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; From f784750c670f7c5ac572590ddad77a89b4c997bf Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:26 +0200 Subject: [PATCH 1198/1523] drm/xe/hw_engine_group: Introduce xe_hw_engine_group A xe_hw_engine_group is a group of hw engines. Two hw engines belong to the same xe_hw_engine_group if one hw engine cannot make progress while the other is stuck on a page fault. Typically, hw engines of the same group share some resources such as EUs, but this really depends on the hardware configuration of the platforms. The simple engines partitioning proposed here might be too conservative but is intended to work for existing platforms. It can be optimized later if more sets of independent engines are identified. The hw engine groups are intended to be used in the context of faulting long-running jobs submissions. v2: Move to own files, improve error handling (Matt Brost) v3: Fix build issue reported by CI, improve commit message (Matt Roper) v4: Fix kernel doc v5: Add switch case for XE_ENGINE_CLASS_OTHER Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-2-francois.dugast@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 4 + drivers/gpu/drm/xe/xe_hw_engine_group.c | 102 ++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 16 +++ drivers/gpu/drm/xe/xe_hw_engine_group_types.h | 51 +++++++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + 6 files changed, 176 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.c create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e11392b5dd3d..b9670ae09a9e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -66,6 +66,7 @@ xe-y += xe_bb.o \ xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ + xe_hw_engine_group.o \ xe_hw_fence.o \ xe_huc.o \ xe_irq.o \ diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index e195022ca836..aa9b2b16a6e0 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -23,6 +23,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -790,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt) } hw_engine_setup_logical_mapping(gt); + err = xe_hw_engine_setup_groups(gt); + if (err) + return err; return 0; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c new file mode 100644 index 000000000000..1d109c08c7a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine_group.h" + +static void +hw_engine_group_free(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine_group *group = arg; + + kfree(group); +} + +static struct xe_hw_engine_group * +hw_engine_group_alloc(struct xe_device *xe) +{ + struct xe_hw_engine_group *group; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + init_rwsem(&group->mode_sem); + INIT_LIST_HEAD(&group->exec_queue_list); + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); + if (err) + return ERR_PTR(err); + + return group; +} + +/** + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt + * @gt: The gt for which groups are setup + * + * Return: 0 on success, negative error code on error. + */ +int xe_hw_engine_setup_groups(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; + struct xe_device *xe = gt_to_xe(gt); + int err; + + group_rcs_ccs = hw_engine_group_alloc(xe); + if (IS_ERR(group_rcs_ccs)) { + err = PTR_ERR(group_rcs_ccs); + goto err_group_rcs_ccs; + } + + group_bcs = hw_engine_group_alloc(xe); + if (IS_ERR(group_bcs)) { + err = PTR_ERR(group_bcs); + goto err_group_bcs; + } + + group_vcs_vecs = hw_engine_group_alloc(xe); + if (IS_ERR(group_vcs_vecs)) { + err = PTR_ERR(group_vcs_vecs); + goto err_group_vcs_vecs; + } + + for_each_hw_engine(hwe, gt, id) { + switch (hwe->class) { + case XE_ENGINE_CLASS_COPY: + hwe->hw_engine_group = group_bcs; + break; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + hwe->hw_engine_group = group_rcs_ccs; + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + hwe->hw_engine_group = group_vcs_vecs; + break; + case XE_ENGINE_CLASS_OTHER: + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } + } + + return 0; + +err_group_vcs_vecs: + kfree(group_vcs_vecs); +err_group_bcs: + kfree(group_bcs); +err_group_rcs_ccs: + kfree(group_rcs_ccs); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h new file mode 100644 index 000000000000..c2648f87f7ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_H_ +#define _XE_HW_ENGINE_GROUP_H_ + +#include "xe_hw_engine_group_types.h" + +struct drm_device; +struct xe_gt; + +int xe_hw_engine_setup_groups(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h new file mode 100644 index 000000000000..92b6e0712c03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ +#define _XE_HW_ENGINE_GROUP_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/** + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw + * engine group + * + * @EXEC_MODE_LR: execution in long-running mode + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode + */ +enum xe_hw_engine_group_execution_mode { + EXEC_MODE_LR, + EXEC_MODE_DMA_FENCE, +}; + +/** + * struct xe_hw_engine_group - Hardware engine group + * + * hw engines belong to the same group if they share hardware resources in a way + * that prevents them from making progress when one is stuck on a page fault. + */ +struct xe_hw_engine_group { + /** + * @exec_queue_list: list of exec queues attached to this + * xe_hw_engine_group + */ + struct list_head exec_queue_list; + /** @resume_work: worker to resume faulting LR exec queues */ + struct work_struct resume_work; + /** @resume_wq: workqueue to resume faulting LR exec queues */ + struct workqueue_struct *resume_wq; + /** + * @mode_sem: used to protect this group's hardware resources and ensure + * mutual exclusion between execution only in faulting LR mode and + * execution only in DMA_FENCE mode + */ + struct rw_semaphore mode_sem; + /** @cur_mode: current execution mode of this hw engine group */ + enum xe_hw_engine_group_execution_mode cur_mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 70e6434f150d..39f24012d0f4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -150,6 +150,8 @@ struct xe_hw_engine { struct xe_hw_engine_class_intf *eclass; /** @oa_unit: oa unit for this hw engine */ struct xe_oa_unit *oa_unit; + /** @hw_engine_group: the group of hw engines this one belongs to */ + struct xe_hw_engine_group *hw_engine_group; }; /** From 3dc6da76ae55d667fca2c9f9497e8ea1a27497b1 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:27 +0200 Subject: [PATCH 1199/1523] drm/xe/guc_submit: Make suspend_wait interruptible Rely on wait_event_interruptible_timeout() to put the process to sleep with TASK_INTERRUPTIBLE. It allows using this function in interruptible context. v2: Propagate error on wait_event_interruptible_timeout (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-3-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index be5b9d8cfa5f..fbbe6a487bbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1628,11 +1628,11 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - guc_read_stopped(guc), - HZ * 5); + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + !READ_ONCE(q->guc->suspend_pending) || + exec_queue_killed(q) || + guc_read_stopped(guc), + HZ * 5); if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1642,7 +1642,7 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) return -ETIME; } - return 0; + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) From 7970cb36966c9b9183255dc097ae0446300eebcf Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:28 +0200 Subject: [PATCH 1200/1523] 'drm/xe/hw_engine_group: Register hw engine group's exec queues Add helpers to safely add and delete the exec queues attached to a hw engine group, and make use them at the time of creation and destruction of the exec queues. Keeping track of them is required to control the execution mode of the hw engine group. v2: Improve error handling and robustness, suspend exec queues created in fault mode if group in dma-fence mode, init queue link (Matt Brost) v3: Delete queue from hw engine group when it is destroyed by the user, also clean up at the time of closing the file in case the user did not destroy the queue v4: Use correct list when checking if empty, do not add the queue if VM is in xe_vm_in_preempt_fence_mode (Matt Brost) v5: Remove unrelated newline, add checks and asserts for group, unwind on suspend failure (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-4-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_device.c | 3 ++ drivers/gpu/drm/xe/xe_exec_queue.c | 11 ++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 + drivers/gpu/drm/xe/xe_hw_engine_group.c | 66 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++ 5 files changed, 86 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 206328387150..b6db7e082d88 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" #include "xe_memirq.h" @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) * vm->lock taken during xe_exec_queue_kill(). */ xa_for_each(&xef->exec_queue.xa, idx, q) { + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); xe_exec_queue_kill(q); xe_exec_queue_put(q); } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7f0b33098182..ebc80add10ac 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -624,6 +626,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } mutex_lock(&xef->exec_queue.lock); @@ -815,6 +823,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1408b02eea53..315f874426bd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -142,6 +142,8 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ + struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 1d109c08c7a6..b362af7ffab5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -5,9 +5,12 @@ #include +#include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_engine_group.h" +#include "xe_vm.h" static void hw_engine_group_free(struct drm_device *drm, void *arg) @@ -100,3 +103,66 @@ err_group_rcs_ccs: return err; } + +/** + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group + * @group: The hw engine group + * @q: The exec_queue + * + * Return: 0 on success, + * -EINTR if the lock could not be acquired + */ +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + int err; + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, q->vm); + + if (xe_vm_in_preempt_fence_mode(q->vm)) + return 0; + + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { + q->ops->suspend(q); + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + + queue_work(group->resume_wq, &group->resume_work); + } + + list_add(&q->hw_engine_group_link, &group->exec_queue_list); + up_write(&group->mode_sem); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group + * @group: The hw engine group + * @q: The exec_queue + */ +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, q->vm); + + down_write(&group->mode_sem); + + if (!list_empty(&q->hw_engine_group_link)) + list_del(&q->hw_engine_group_link); + + up_write(&group->mode_sem); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index c2648f87f7ef..857a83787504 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -9,8 +9,12 @@ #include "xe_hw_engine_group_types.h" struct drm_device; +struct xe_exec_queue; struct xe_gt; int xe_hw_engine_setup_groups(struct xe_gt *gt); +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); + #endif From 53fdfa19e6a9220a14c0bb21273880774ea70dbd Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:29 +0200 Subject: [PATCH 1201/1523] drm/xe/hw_engine_group: Add helper to suspend faulting LR jobs This is a required feature for dma fence jobs to preempt faulting long running jobs in order to ensure mutual exclusion on a given hw engine group. v2: Pipeline calls to suspend(q) and suspend_wait(q) to improve efficiency, switch to lockdep_assert_held_write (Matt Brost) v3: Return error on suspend_wait failure to propagate on the call stack up to IOCTL (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-5-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index b362af7ffab5..8659332012dd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -166,3 +166,39 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct up_write(&group->mode_sem); } + +/** + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group + * @group: The hw engine group + * + * Return: 0 on success, negative error code on error. + */ +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + int err; + struct xe_exec_queue *q; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->suspend(q); + } + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + } + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} From 7f0d7bee2079fc899c8280e177b0c0feb8b9debe Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:30 +0200 Subject: [PATCH 1202/1523] drm/xe/exec_queue: Remove duplicated code This code section is the same as the body of xe_exec_queue_last_fence_put_unlocked() so call the function instead and remove duplicated code to make maintenance easier. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-6-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ebc80add10ac..3ce4582504f9 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -852,10 +852,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** From 0d92cd8935a3fffbbfee0fd59cdc89ac5167b14a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:31 +0200 Subject: [PATCH 1203/1523] drm/xe/exec_queue: Prepare last fence for hw engine group resume context Ensure we can safely take a ref of the exec queue's last fence from the context of resuming jobs from the hw engine group. The locking requirements differ from the general case, hence the introduction of this new function. v2: Add kernel doc, rework the code to prevent code duplication v3: Fix kernel doc, remove now unnecessary lockdep variants (Matt Brost) v4: Remove new put function (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-7-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 33 ++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 3ce4582504f9..6d3b44cbc4c7 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -837,10 +837,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -894,6 +896,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, return fence; } +/** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + /** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 99139368ba6e..90c7f73eab88 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -77,6 +77,8 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, + struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, From 2750ff97ee385b85195c5579ee911a551fbb0dd9 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:32 +0200 Subject: [PATCH 1204/1523] drm/xe/hw_engine_group: Add helper to wait for dma fence jobs This is a required feature for faulting long running jobs not to be submitted while dma fence jobs are running on the hw engine group. v2: Switch to lockdep_assert_held_write in worker, get a proper reference for the last fence (Matt Brost) v3: Directly call dma_fence_put with the fence ref (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-8-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 8659332012dd..8d3ddfc4ee82 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -202,3 +202,36 @@ err_suspend: up_write(&group->mode_sem); return err; } + +/** + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete + * @group: The hw engine group + * + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() + * is not interruptible. + * + * Return: 0 on success, + * -ETIME if waiting for one job failed + */ +static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) +{ + long timeout; + struct xe_exec_queue *q; + struct dma_fence *fence; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (xe_vm_in_lr_mode(q->vm)) + continue; + + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); + timeout = dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (timeout < 0) + return -ETIME; + } + + return 0; +} From 770bd1d341130ff38feda169177159cd78389cfc Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:33 +0200 Subject: [PATCH 1205/1523] drm/xe/hw_engine_group: Ensure safe transition between execution modes Provide a way to safely transition execution modes of the hw engine group ahead of the actual execution. When necessary, either wait for running jobs to complete or preempt them, thus ensuring mutual exclusion between execution modes. Unlike a mutex, the rw_semaphore used in this context allows multiple submissions in the same mode. v2: Use lockdep_assert_held_write, add annotations (Matt Brost) v3: Fix kernel doc, remove redundant code (Matt Brost) v4: Now that xe_hw_engine_group_suspend_faulting_lr_jobs can fail, propagate the error to the caller (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-9-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 75 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 5 ++ 2 files changed, 80 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 8d3ddfc4ee82..e6c235119351 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -235,3 +235,78 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group return 0; } + +static int switch_mode(struct xe_hw_engine_group *group) +{ + int err = 0; + enum xe_hw_engine_group_execution_mode new_mode; + + lockdep_assert_held_write(&group->mode_sem); + + switch (group->cur_mode) { + case EXEC_MODE_LR: + new_mode = EXEC_MODE_DMA_FENCE; + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + break; + case EXEC_MODE_DMA_FENCE: + new_mode = EXEC_MODE_LR; + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); + break; + } + + if (err) + return err; + + group->cur_mode = new_mode; + + return 0; +} + +/** + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode + * @group: The hw engine group + * @new_mode: The new execution mode + * @previous_mode: Pointer to the previous mode provided for use by caller + * + * Return: 0 if successful, -EINTR if locking failed. + */ +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode) +__acquires(&group->mode_sem) +{ + int err = down_read_interruptible(&group->mode_sem); + + if (err) + return err; + + *previous_mode = group->cur_mode; + + if (new_mode != group->cur_mode) { + up_read(&group->mode_sem); + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (new_mode != group->cur_mode) { + err = switch_mode(group); + if (err) { + up_write(&group->mode_sem); + return err; + } + } + downgrade_write(&group->mode_sem); + } + + return err; +} + +/** + * xe_hw_engine_group_put() - Put the group + * @group: The hw engine group + */ +void xe_hw_engine_group_put(struct xe_hw_engine_group *group) +__releases(&group->mode_sem) +{ + up_read(&group->mode_sem); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index 857a83787504..e0deb7c7bb5b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -17,4 +17,9 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt); int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode); +void xe_hw_engine_group_put(struct xe_hw_engine_group *group); + #endif From d16ef1a18e39a5086a419d8b3c71adb30273881a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:34 +0200 Subject: [PATCH 1206/1523] drm/xe/exec: Switch hw engine group execution mode upon job submission If the job about to be submitted is a dma-fence job, update the current execution mode of the hw engine group. This triggers an immediate suspend of the exec queues running faulting long-running jobs. If the job about to be submitted is a long-running job, kick a new worker used to resume the exec queues running faulting long-running jobs once the dma-fence jobs have completed. v2: Kick the resume worker from exec IOCTL, switch to unordered workqueue, destroy it after use (Matt Brost) v3: Do not resume if no exec queue was suspended (Matt Brost) v4: Squash commits (Matt Brost) v5: Do not kick the worker when xe_vm_in_preempt_fence_mode (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-10-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 20 +++++++- drivers/gpu/drm/xe/xe_hw_engine_group.c | 62 ++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f36980aa26e6..484acfbe0e61 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -199,7 +211,7 @@ retry: downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_unlock_list; + goto err_hw_exec_mode; } if (!args->num_batch_buffer) { @@ -312,6 +324,9 @@ retry: spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -324,6 +339,9 @@ err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index e6c235119351..82750520a90a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -17,9 +17,36 @@ hw_engine_group_free(struct drm_device *drm, void *arg) { struct xe_hw_engine_group *group = arg; + destroy_workqueue(group->resume_wq); kfree(group); } +static void +hw_engine_group_resume_lr_jobs_func(struct work_struct *w) +{ + struct xe_exec_queue *q; + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); + int err; + enum xe_hw_engine_group_execution_mode previous_mode; + + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + if (err) + return; + + if (previous_mode == EXEC_MODE_LR) + goto put; + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->resume(q); + } + +put: + xe_hw_engine_group_put(group); +} + static struct xe_hw_engine_group * hw_engine_group_alloc(struct xe_device *xe) { @@ -30,7 +57,12 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group) return ERR_PTR(-ENOMEM); + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + if (!group->resume_wq) + return ERR_PTR(-ENOMEM); + init_rwsem(&group->mode_sem); + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); @@ -134,7 +166,7 @@ int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct x if (err) goto err_suspend; - queue_work(group->resume_wq, &group->resume_work); + xe_hw_engine_group_resume_faulting_lr_jobs(group); } list_add(&q->hw_engine_group_link, &group->exec_queue_list); @@ -167,6 +199,16 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct up_write(&group->mode_sem); } +/** + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's + * faulting LR jobs + * @group: The hw engine group + */ +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + queue_work(group->resume_wq, &group->resume_work); +} + /** * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group * @group: The hw engine group @@ -177,6 +219,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group { int err; struct xe_exec_queue *q; + bool need_resume = false; lockdep_assert_held_write(&group->mode_sem); @@ -184,6 +227,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group if (!xe_vm_in_fault_mode(q->vm)) continue; + need_resume = true; q->ops->suspend(q); } @@ -196,6 +240,9 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group goto err_suspend; } + if (need_resume) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + return 0; err_suspend: @@ -310,3 +357,16 @@ __releases(&group->mode_sem) { up_read(&group->mode_sem); } + +/** + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue + * @q: The exec_queue + */ +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) +{ + if (xe_vm_in_fault_mode(q->vm)) + return EXEC_MODE_LR; + else + return EXEC_MODE_DMA_FENCE; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index e0deb7c7bb5b..797ee81acbf2 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -22,4 +22,8 @@ int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, enum xe_hw_engine_group_execution_mode *previous_mode); void xe_hw_engine_group_put(struct xe_hw_engine_group *group); +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); + #endif From 226d92e49a7e7a7f5a3a37a34df7c319b72009fc Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:35 +0200 Subject: [PATCH 1207/1523] drm/xe/vm: Remove restriction that all VMs must be faulting if one is With this restriction, all VMs on the device must be faulting VMs if there is already one faulting VM, in which case the device is considered in fault mode. This prevents for example an application from running 3D jobs for the compositor while submitting a SVM compute job on the same device. Now that mutual exclusion of faulting LR jobs and dma fence jobs is ensured on the hw engine group, remove this restriction to allow running faulting and non-faulting VMs on the same device. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-11-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b15348d4809a..cd542de23234 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1762,14 +1762,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; From 4099cfda9dd856222d7cab6970a65896375616d5 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:36 +0200 Subject: [PATCH 1208/1523] drm/xe/device: Remove unused xe_device::usm::num_vm_in_* Those counters were used to keep track of the numbers VMs in fault mode and in non-fault mode, to determine if the whole device was in fault mode or not. This is no longer needed so remove those variables and their usages. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-12-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_device.h | 10 ---------- drivers/gpu/drm/xe/xe_device_types.h | 4 ---- drivers/gpu/drm/xe/xe_vm.c | 12 ------------ 3 files changed, 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 2c96f1b2aafd..f052c06a2d2f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -139,16 +139,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) void xe_device_assert_mem_access(struct xe_device *xe); -static inline bool xe_device_in_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_fault_mode != 0; -} - -static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_non_fault_mode != 0; -} - static inline bool xe_device_has_flat_ccs(struct xe_device *xe) { return xe->info.has_flat_ccs; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 16a24eadd94b..fc89420d0ba6 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -368,10 +368,6 @@ struct xe_device { struct xarray asid_to_vm; /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ - u32 num_vm_in_fault_mode; - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ - u32 num_vm_in_non_fault_mode; /** @usm.lock: protects UM state */ struct mutex lock; } usm; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cd542de23234..3cff3ae57643 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1501,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_create(vm); return vm; @@ -1621,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - if (vm->usm.asid) { void *lookup; From 3c0da3d163eb32f1f91891efaade027fa9b245b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Aug 2024 21:51:42 +0200 Subject: [PATCH 1209/1523] fuse: Initialize beyond-EOF page contents before setting uptodate fuse_notify_store(), unlike fuse_do_readpage(), does not enable page zeroing (because it can be used to change partial page contents). So fuse_notify_store() must be more careful to fully initialize page contents (including parts of the page that are beyond end-of-file) before marking the page uptodate. The current code can leave beyond-EOF page contents uninitialized, which makes these uninitialized page contents visible to userspace via mmap(). This is an information leak, but only affects systems which do not enable init-on-alloc (via CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y or the corresponding kernel command line parameter). Link: https://bugs.chromium.org/p/project-zero/issues/detail?id=2574 Cc: stable@kernel.org Fixes: a1d75f258230 ("fuse: add store request") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de..7146038b2fe7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); From 47ac09b91befbb6a235ab620c32af719f8208399 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Aug 2024 13:17:27 -0700 Subject: [PATCH 1210/1523] Linux 6.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5f417808213f..68ebd6d6b444 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* From e4be320eeca842a3d7648258ee3673f1755a5a59 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 Aug 2024 18:31:36 -0500 Subject: [PATCH 1211/1523] smb3: fix broken cached reads when posix locks Mandatory locking is enforced for cached reads, which violates default posix semantics, and also it is enforced inconsistently. This affected recent versions of libreoffice, and can be demonstrated by opening a file twice from the same client, locking it from handle one and trying to read from it from handle two (which fails, returning EACCES). There is already a mount option "forcemandatorylock" (which defaults to off), so with this change only when the user intentionally specifies "forcemandatorylock" on mount will we break posix semantics on read to a locked range (ie we will only fail in this case, if the user mounts with "forcemandatorylock"). An earlier patch fixed the write path. Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks") Cc: stable@vger.kernel.org Cc: Pavel Shilovsky Reviewed-by: David Howells Reported-by: abartlet@samba.org Reported-by: Kevin Ottens Signed-off-by: Steve French --- fs/smb/client/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 1fc66bcf49eb..f9b302cb8233 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2912,9 +2912,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) if (!CIFS_CACHE_READ(cinode)) return netfs_unbuffered_read_iter(iocb, to); - if (cap_unix(tcon->ses) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) { if (iocb->ki_flags & IOCB_DIRECT) return netfs_unbuffered_read_iter(iocb, to); return netfs_buffered_read_iter(iocb, to); From dfd046d0ced19b6ff5f11ec4ceab0a83de924771 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 15 Aug 2024 08:56:35 +0900 Subject: [PATCH 1212/1523] ksmbd: Use unsafe_memcpy() for ntlm_negotiate rsp buffer is allocated larger than spnego_blob from smb2_allocate_rsp_buf(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2df1354288e6..3f4c56a10a86 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } From 76e98a158b207771a6c9a0de0a60522a446a3447 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 17 Aug 2024 14:03:49 +0900 Subject: [PATCH 1213/1523] ksmbd: fix race condition between destroy_previous_session() and smb2 operations() If there is ->PreviousSessionId field in the session setup request, The session of the previous connection should be destroyed. During this, if the smb2 operation requests in the previous session are being processed, a racy issue could happen with ksmbd_destroy_file_table(). This patch sets conn->status to KSMBD_SESS_NEED_RECONNECT to block incoming operations and waits until on-going operations are complete (i.e. idle) before desctorying the previous session. Fixes: c8efcc786146 ("ksmbd: add support for durable handles v1/v2") Cc: stable@vger.kernel.org # v6.6+ Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-25040 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 34 ++++++++++++++++++++++++++++++- fs/smb/server/connection.h | 3 ++- fs/smb/server/mgmt/user_session.c | 9 ++++++++ fs/smb/server/smb2pdu.c | 2 +- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592..7889df8112b4 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2..5b947175c048 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c..99416ce9f501 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 3f4c56a10a86..cb7f487c96af 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2213,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted From 4fdd8664c8a94411a01d11d5ed2f083f105f570a Mon Sep 17 00:00:00 2001 From: Victor Timofei Date: Fri, 16 Aug 2024 22:24:52 +0300 Subject: [PATCH 1214/1523] ksmbd: fix spelling mistakes in documentation There are a couple of spelling mistakes in the documentation. This patch fixes them. Signed-off-by: Victor Timofei Acked-by: Namjae Jeon Signed-off-by: Steve French --- Documentation/filesystems/smb/ksmbd.rst | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11..67cb68ea6e68 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. From 7c525dddbee71880e654ad44f3917787a4f6042c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 16 Aug 2024 19:33:39 +0200 Subject: [PATCH 1215/1523] ksmbd: Replace one-element arrays with flexible-array members Replace the deprecated one-element arrays with flexible-array members in the structs filesystem_attribute_info and filesystem_device_info. There are no binary differences after this conversion. Link: https://github.com/KSPP/linux/issues/79 Signed-off-by: Thorsten Blum Reviewed-by: Gustavo A. R. Silva Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++-- fs/smb/server/smb_common.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cb7f487c96af..0bc9edf22ba4 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5360,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5386,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index 4a3148b0167f..cc1d6dfe29d5 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { From d6d539c9a7ad0655e5ad46b5e869f1b20bce8953 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 19:51:13 -0400 Subject: [PATCH 1216/1523] bcachefs: Reallocate table when we're increasing size Fixes: c2f6e16a6771 ("bcachefs: Increase size of cuckoo hash table on too many rehashes") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f70eb2127d32..f9fb150eda70 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -107,7 +107,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, nr_elements += t->d[i].journal_seq > flushed_seq; new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); - +realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; @@ -118,6 +118,8 @@ retry_rehash: if (nr_rehashes_this_size == 3) { new_bits++; nr_rehashes_this_size = 0; + kvfree(n); + goto realloc; } nr_rehashes++; From d9f49c3106e404776afcf6c5682357f4fe088beb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 16:41:39 -0400 Subject: [PATCH 1217/1523] bcachefs: fix field-spanning write warning attempts to retrofit memory safety onto C are increasingly annoying ------------[ cut here ]------------ memcpy: detected field-spanning write (size 4) of single field "&k.replicas" at fs/bcachefs/replicas.c:454 (size 3) WARNING: CPU: 5 PID: 6525 at fs/bcachefs/replicas.c:454 bch2_replicas_gc2+0x2cb/0x400 [bcachefs] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) Modules linked in: dm_mod tun nf_conntrack_netlink nfnetlink xt_addrtype br_netfilter overlay msr sctp bcachefs lz4hc_compress lz4_compress libcrc32c xor raid6_pq lz4_decompress pps_ldisc pps_core wireguard libchacha20poly1305 chacha_x86_64 poly1305_x86_64 ip6_udp_tunnel udp_tunnel curve25519_x86_64 libcurve25519_generic libchacha sit tunnel4 ip_tunnel af_packet bridge stp llc ip6table_nat ip6table_filter ip6_tables xt_MASQUERADE xt_conntrack iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter ip_tables x_tables tcp_bbr sch_fq_codel efivarfs nls_iso8859_1 nls_cp437 vfat fat cdc_mbim cdc_wdm cdc_ncm cdc_ether usbnet r8152 input_leds joydev mii amdgpu mousedev hid_generic usbhid hid ath10k_pci amd_atl edac_mce_amd ath10k_core kvm_amd ath kvm mac80211 bfq crc32_pclmul crc32c_intel polyval_clmulni polyval_generic sha512_ssse3 sha256_ssse3 sha1_ssse3 snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg i2c_algo_bit drm_exec snd_hda_codec r8169 drm_suballoc_helper aesni_intel gf128mul crypto_simd amdxcp realtek mfd_core tpm_crb drm_buddy snd_hwdep mdio_devres libarc4 cryptd tpm_tis wmi_bmof cfg80211 evdev libphy snd_hda_core tpm_tis_core gpu_sched rapl xhci_pci xhci_hcd snd_pcm drm_display_helper snd_timer tpm sp5100_tco rfkill efi_pstore mpt3sas drm_ttm_helper ahci usbcore libaescfb ccp snd ttm 8250 libahci watchdog soundcore raid_class sha1_generic acpi_cpufreq k10temp 8250_base usb_common scsi_transport_sas i2c_piix4 hwmon video serial_mctrl_gpio serial_base ecdh_generic wmi rtc_cmos backlight ecc gpio_amdpt rng_core gpio_generic button CPU: 5 UID: 0 PID: 6525 Comm: bcachefs Tainted: G W 6.11.0-rc1-ojab-00058-g224bc118aec9 #6 6d5debde398d2a84851f42ab300dae32c2992027 Tainted: [W]=WARN RIP: 0010:bch2_replicas_gc2+0x2cb/0x400 [bcachefs] Code: c7 c2 60 91 d1 c1 48 89 c6 48 c7 c7 98 91 d1 c1 4c 89 14 24 44 89 5c 24 08 48 89 44 24 20 c6 05 fa 68 04 00 01 e8 05 a3 40 e4 <0f> 0b 4c 8b 14 24 44 8b 5c 24 08 48 8b 44 24 20 e9 55 fe ff ff 8b RSP: 0018:ffffb434c9263d60 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9a8efa79cc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb434c9263de0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000005 R13: ffff9a8efa73c300 R14: ffff9a8d9e880000 R15: ffff9a8d9e8806f8 FS: 0000000000000000(0000) GS:ffff9a9410c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565423373090 CR3: 0000000164e30000 CR4: 00000000003506f0 Call Trace: ? __warn+0x97/0x150 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? report_bug+0x196/0x1c0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x17/0x80 ? __wake_up_klogd.part.0+0x4c/0x80 ? asm_exc_invalid_op+0x16/0x20 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? bch2_dev_usage_read+0xa0/0xa0 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_dev_usage_read+0xa0/0xa0: discard_in_flight_remove at /home/ojab/src/bcachefs/fs/bcachefs/alloc_background.c:1712 Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 1223b710755d..12b1d28b7eb4 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -451,7 +451,8 @@ retry: .type = BCH_DISK_ACCOUNTING_replicas, }; - memcpy(&k.replicas, e, replicas_entry_bytes(e)); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); struct bpos p = disk_accounting_pos_to_bpos(&k); From 47cdc7b14417a40af6a5d5909f1d28a5a23fc11d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 17:38:43 -0400 Subject: [PATCH 1218/1523] bcachefs: Fix incorrect gfp flags fixes: 00488 WARNING: CPU: 9 PID: 194 at mm/page_alloc.c:4410 __alloc_pages_noprof+0x1818/0x1888 00488 Modules linked in: 00488 CPU: 9 UID: 0 PID: 194 Comm: kworker/u66:1 Not tainted 6.11.0-rc1-ktest-g18fa10d6495f #2931 00488 Hardware name: linux,dummy-virt (DT) 00488 Workqueue: writeback wb_workfn (flush-bcachefs-2) 00488 pstate: 20001005 (nzCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00488 pc : __alloc_pages_noprof+0x1818/0x1888 00488 lr : __alloc_pages_noprof+0x5f4/0x1888 00488 sp : ffffff80ccd8ed00 00488 x29: ffffff80ccd8ed00 x28: 0000000000000000 x27: dfffffc000000000 00488 x26: 0000000000000010 x25: 0000000000000002 x24: 0000000000000000 00488 x23: 0000000000000000 x22: 1ffffff0199b1dbe x21: ffffff80cc680900 00488 x20: 0000000000000000 x19: ffffff80ccd8eed0 x18: 0000000000000000 00488 x17: ffffff80cc58a010 x16: dfffffc000000000 x15: 1ffffff00474e518 00488 x14: 1ffffff00474e518 x13: 1ffffff00474e518 x12: ffffffb8104701b9 00488 x11: 1ffffff8104701b8 x10: ffffffb8104701b8 x9 : ffffffc08043cde8 00488 x8 : 00000047efb8fe48 x7 : ffffff80ccd8ee20 x6 : 0000000000048000 00488 x5 : 1ffffff810470138 x4 : 0000000000000050 x3 : 1ffffff0199b1d94 00488 x2 : ffffffb0199b1d94 x1 : 0000000000000001 x0 : ffffffc082387448 00488 Call trace: 00488 __alloc_pages_noprof+0x1818/0x1888 00488 new_slab+0x284/0x2f0 00488 ___slab_alloc+0x208/0x8e0 00488 __kmalloc_noprof+0x328/0x340 00488 __bch2_writepage+0x106c/0x1830 00488 write_cache_pages+0xa0/0xe8 due to __GFP_NOFAIL without allowing reclaim Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index cc33d763f722..184d03851676 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -534,7 +534,7 @@ do_io: if (f_sectors > w->tmp_sectors) { kfree(w->tmp); - w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL); w->tmp_sectors = f_sectors; } From 5b5c96c63d5b6e91c622611e04b2b156bbae53f5 Mon Sep 17 00:00:00 2001 From: Hongzhen Luo Date: Thu, 1 Aug 2024 19:26:22 +0800 Subject: [PATCH 1219/1523] erofs: simplify readdir operation - Use i_size instead of i_size_read() due to immutable fses; - Get rid of an unneeded goto since erofs_fill_dentries() also works; - Remove unnecessary lines. Signed-off-by: Hongzhen Luo Link: https://lore.kernel.org/r/20240801112622.2164029-1-hongzhen@linux.alibaba.com Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang --- fs/erofs/dir.c | 35 ++++++++++++----------------------- fs/erofs/internal.h | 2 +- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f..c3b90abdee37 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396..45dc15ebd870 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) From 2c534624ae70100aeea0b5800b0f3768b2fd3cf0 Mon Sep 17 00:00:00 2001 From: Hongzhen Luo Date: Tue, 6 Aug 2024 19:22:08 +0800 Subject: [PATCH 1220/1523] erofs: get rid of check_layout_compatibility() Simple enough to just open-code it. Signed-off-by: Hongzhen Luo Reviewed-by: Sandeep Dhavale Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240806112208.150323-1-hongzhen@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 32ce5b35e1df..6cb5c8916174 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { From 7ce7c2283fa6843ab3c2adfeb83dcc504a107858 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Aug 2024 12:06:19 +0200 Subject: [PATCH 1221/1523] Input: i8042 - add Fujitsu Lifebook E756 to i8042 quirk table Yet another quirk entry for Fujitsu laptop. Lifebook E756 requires i8041.nomux for keeping the touchpad working after suspend/resume. Link: https://bugzilla.suse.com/show_bug.cgi?id=1229056 Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240814100630.2048-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 5b50475ec414..78e5c9c60b8b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -626,6 +626,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook E756 */ + /* https://bugzilla.suse.com/show_bug.cgi?id=1229056 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E756"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Fujitsu Lifebook E5411 */ .matches = { From 5d41eeb6725e3e24853629e5d7635e4bc45d736e Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 9 Aug 2024 17:11:28 +0530 Subject: [PATCH 1222/1523] drm/i915/hdcp: Use correct cp_irq_count We are checking cp_irq_count from the wrong hdcp structure which ends up giving timed out errors. We only increment the cp_irq_count of the primary connector's hdcp structure but here in case of multidisplay setup we end up checking the secondary connector's hdcp structure, which will not have its cp_irq_count incremented. This leads to a timed out at CP_IRQ error even though a CP_IRQ was raised. Extract it from the correct intel_hdcp structure. --v2 -Explain why it was the wrong hdcp structure [Jani] Fixes: 8c9e4f68b861 ("drm/i915/hdcp: Use per-device debugs") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240809114127.3940699-2-suraj.kandpal@intel.com (cherry picked from commit dd925902634def895690426bf10e0a8b3e56f56d) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360..b0101d72b9c1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) From 3d765ae2daccc570b3f4fbcb57eb321b12cdded2 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 4 Jan 2024 19:31:17 +0100 Subject: [PATCH 1223/1523] Input: i8042 - add forcenorestore quirk to leave controller untouched even on s3 On s3 resume the i8042 driver tries to restore the controller to a known state by reinitializing things, however this can confuse the controller with different effects. Mostly occasionally unresponsive keyboards after resume. These issues do not rise on s0ix resume as here the controller is assumed to preserved its state from before suspend. This patch adds a quirk for devices where the reinitialization on s3 resume is not needed and might be harmful as described above. It does this by using the s0ix resume code path at selected locations. This new quirk goes beyond what the preexisting reset=never quirk does, which only skips some reinitialization steps. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240104183118.779778-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +++++++--- drivers/input/serio/i8042.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 78e5c9c60b8b..00e804301088 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -83,6 +83,7 @@ static inline void i8042_write_command(int val) #define SERIO_QUIRK_KBDRESET BIT(12) #define SERIO_QUIRK_DRITEK BIT(13) #define SERIO_QUIRK_NOPNP BIT(14) +#define SERIO_QUIRK_FORCENORESTORE BIT(15) /* Quirk table for different mainboards. Options similar or identical to i8042 * module parameters. @@ -1694,6 +1695,8 @@ static void __init i8042_check_quirks(void) if (quirks & SERIO_QUIRK_NOPNP) i8042_nopnp = true; #endif + if (quirks & SERIO_QUIRK_FORCENORESTORE) + i8042_forcenorestore = true; } #else static inline void i8042_check_quirks(void) {} @@ -1727,7 +1730,7 @@ static int __init i8042_platform_init(void) i8042_check_quirks(); - pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", i8042_nokbd ? " nokbd" : "", i8042_noaux ? " noaux" : "", i8042_nomux ? " nomux" : "", @@ -1747,10 +1750,11 @@ static int __init i8042_platform_init(void) "", #endif #ifdef CONFIG_PNP - i8042_nopnp ? " nopnp" : ""); + i8042_nopnp ? " nopnp" : "", #else - ""); + "", #endif + i8042_forcenorestore ? " forcenorestore" : ""); retval = i8042_pnp_init(); if (retval) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index e0fb1db653b7..8ec4872b4471 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -115,6 +115,10 @@ module_param_named(nopnp, i8042_nopnp, bool, 0); MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings"); #endif +static bool i8042_forcenorestore; +module_param_named(forcenorestore, i8042_forcenorestore, bool, 0); +MODULE_PARM_DESC(forcenorestore, "Force no restore on s3 resume, copying s2idle behaviour"); + #define DEBUG #ifdef DEBUG static bool i8042_debug; @@ -1232,7 +1236,7 @@ static int i8042_pm_suspend(struct device *dev) { int i; - if (pm_suspend_via_firmware()) + if (!i8042_forcenorestore && pm_suspend_via_firmware()) i8042_controller_reset(true); /* Set up serio interrupts for system wakeup. */ @@ -1248,7 +1252,7 @@ static int i8042_pm_suspend(struct device *dev) static int i8042_pm_resume_noirq(struct device *dev) { - if (!pm_resume_via_firmware()) + if (i8042_forcenorestore || !pm_resume_via_firmware()) i8042_interrupt(0, NULL); return 0; @@ -1271,7 +1275,7 @@ static int i8042_pm_resume(struct device *dev) * not restore the controller state to whatever it had been at boot * time, so we do not need to do anything. */ - if (!pm_suspend_via_firmware()) + if (i8042_forcenorestore || !pm_suspend_via_firmware()) return 0; /* From aaa4ca873d3da768896ffc909795359a01e853ef Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 4 Jan 2024 19:31:18 +0100 Subject: [PATCH 1224/1523] Input: i8042 - use new forcenorestore quirk to replace old buggy quirk combination The old quirk combination sometimes cause a laggy keyboard after boot. With the new quirk the initial issue of an unresponsive keyboard after s3 resume is also fixed, but it doesn't have the negative side effect of the sometimes laggy keyboard. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240104183118.779778-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 00e804301088..bad238f69a7a 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1159,18 +1159,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "N150CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { From 6b77dab5da721f2fa9d3c3611e1cbaead8030706 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 12:22:48 +0200 Subject: [PATCH 1225/1523] drm/xe: Remove redundant param from xe_bo_create_user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BO from xe_bo_create_user() will always be of type, ttm_bo_type_device. So remove that redundant parameter. Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240816102248.25628-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/tests/xe_bo.c | 5 ++--- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 12 +++++++----- drivers/gpu/drm/xe/xe_bo.c | 6 ++---- drivers/gpu/drm/xe/xe_bo.h | 1 - 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index df9fd907edd4..8dac069483e8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -125,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Testing system memory\n"); bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -206,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc xe_vm_lock(vm, false); bo = xe_bo_create_user(xe, NULL, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { @@ -216,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc external = xe_bo_create_user(xe, NULL, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c24c8509227e..13db6c0530b3 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 47ae9d0b8864..1a192a2a941b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -641,7 +641,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til long ret; sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, - DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, + DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(sys_bo)) { @@ -664,8 +664,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -687,8 +688,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 800119c8fc8d..ce8282e67e84 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1499,11 +1499,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, type, + cpu_caching, ttm_bo_type_device, flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -2027,7 +2026,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, } bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - ttm_bo_type_device, bo_flags); + bo_flags); if (vm) xe_vm_unlock(vm); @@ -2333,7 +2332,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_NEEDS_CPU_ACCESS); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 1c9dc8adaaa3..935a94279026 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -87,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, From a9aaf1ff88a8cb99a1335c9eb76de637f0cf8c10 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 13 Aug 2024 21:07:50 +0200 Subject: [PATCH 1226/1523] power: sequencing: request the WLAN enable GPIO as-is If the WCN module is powered up before linux boots and the ath11k driver probes at the same time as the power sequencing driver, we may end up driving the wlan-enable GPIO low in the latter, breaking the start-up of the WLAN module. Request the wlan-enable GPIO as-is so that if the WLAN module is already starting/started, we leave it alone. Fixes: 2f1630f437df ("power: pwrseq: add a driver for the PMU module on the QCom WCN chipsets") Reported-by: Stephan Gerhold Link: https://lore.kernel.org/r/20240813190751.155035-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/power/sequencing/pwrseq-qcom-wcn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/sequencing/pwrseq-qcom-wcn.c b/drivers/power/sequencing/pwrseq-qcom-wcn.c index 42dacfda745e..d786cbf1b2cd 100644 --- a/drivers/power/sequencing/pwrseq-qcom-wcn.c +++ b/drivers/power/sequencing/pwrseq-qcom-wcn.c @@ -283,7 +283,7 @@ static int pwrseq_qcom_wcn_probe(struct platform_device *pdev) "Failed to get the Bluetooth enable GPIO\n"); ctx->wlan_gpio = devm_gpiod_get_optional(dev, "wlan-enable", - GPIOD_OUT_LOW); + GPIOD_ASIS); if (IS_ERR(ctx->wlan_gpio)) return dev_err_probe(dev, PTR_ERR(ctx->wlan_gpio), "Failed to get the WLAN enable GPIO\n"); From e080a26725fb36f535f22ea42694c60ab005fb2e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 19 Aug 2024 10:52:07 +0800 Subject: [PATCH 1227/1523] erofs: allow large folios for compressed files As commit 2e6506e1c4ee ("mm/migrate: fix deadlock in migrate_pages_batch() on large folios") has landed upstream, large folios can be safely enabled for compressed inodes since all prerequisites have already landed in 6.11-rc1. Stress tests has been running on my fleet for over 20 days without any regression. Additionally, users [1] have requested it for months. Let's allow large folios for EROFS full cases upstream now for wider testing. [1] https://lore.kernel.org/r/CAGsJ_4wtE8OcpinuqVwG4jtdx6Qh5f+TON6wz+4HMCq=A2qFcA@mail.gmail.com Cc: Barry Song <21cnbao@gmail.com> Cc: Matthew Wilcox (Oracle) [ Gao Xiang: minor commit typo fixes. ] Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240819025207.3808649-1-hsiangkao@linux.alibaba.com --- Documentation/filesystems/erofs.rst | 2 +- fs/erofs/inode.c | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f..c293f8e37468 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 43c09aae2afc..419432be3223 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); -#ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; +#endif + } out_unlock: erofs_put_metabuf(&buf); return err; From 7167395a4be7930ecac6a33b4e54d7e3dd9ee209 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Aug 2024 15:59:50 +0800 Subject: [PATCH 1228/1523] selftests: udpgro: report error when receive failed Currently, we only check the latest senders's exit code. If the receiver report failed, it is not recoreded. Fix it by checking the exit code of all the involved processes. Before: bad GRO lookup ok multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 failed $ echo $? 0 After: bad GRO lookup ok multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 failed $ echo $? 1 Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") Suggested-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 44 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..4659cf01e438 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -46,17 +46,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +75,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +96,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +115,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } From d7818402b1d80347c764001583f6d63fa68c2e1a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Aug 2024 15:59:51 +0800 Subject: [PATCH 1229/1523] selftests: udpgro: no need to load xdp for gro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d7db7775ea2e ("net: veth: do not manipulate GRO when using XDP"), there is no need to load XDP program to enable GRO. On the other hand, the current test is failed due to loading the XDP program. e.g. # selftests: net: udpgro.sh # ipv4 # no GRO ok # no GRO chk cmsg ok # GRO ./udpgso_bench_rx: recv: bad packet len, got 1472, expected 14720 # # failed [...] # bad GRO lookup ok # multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 # # ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 # # failed ok 1 selftests: net: udpgro.sh After fix, all the test passed. # ./udpgro.sh ipv4 no GRO ok [...] multiple GRO socks ok Fixes: d7db7775ea2e ("net: veth: do not manipulate GRO when using XDP") Reported-by: Yi Chen Closes: https://issues.redhat.com/browse/RHEL-53858 Reviewed-by: Toke Høiland-Jørgensen Acked-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 4659cf01e438..d5ffd8c9172e 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -206,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then From 7116c35aacedc38be6d15bd21b2fc936eed0008b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 Aug 2024 12:01:30 +0100 Subject: [PATCH 1230/1523] drm/xe: prevent UAF around preempt fence The fence lock is part of the queue, therefore in the current design anything locking the fence should then also hold a ref to the queue to prevent the queue from being freed. However, currently it looks like we signal the fence and then drop the queue ref, but if something is waiting on the fence, the waiter is kicked to wake up at some later point, where upon waking up it first grabs the lock before checking the fence state. But if we have already dropped the queue ref, then the lock might already be freed as part of the queue, leading to uaf. To prevent this, move the fence lock into the fence itself so we don't run into lifetime issues. Alternative might be to have device level lock, or only release the queue in the fence release callback, however that might require pushing to another worker to avoid locking issues. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2454 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2342 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2020 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814110129.825847-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 - drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 ++- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 6d3b44cbc4c7..e53937fafd14 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -620,7 +620,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 315f874426bd..7deb480e26af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 56e709d2fb30..83fbeea5aa20 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -134,8 +134,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; From cf1e515c9a40caa8bddb920970d3257bb01c1421 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 19 Aug 2024 20:00:07 +0800 Subject: [PATCH 1231/1523] iommufd/selftest: Make dirty_ops static The sparse tool complains as follows: drivers/iommu/iommufd/selftest.c:277:30: warning: symbol 'dirty_ops' was not declared. Should it be static? This symbol is not used outside of selftest.c, so marks it static. Fixes: 266ce58989ba ("iommufd/selftest: Test IOMMU_HWPT_ALLOC_DIRTY_TRACKING") Link: https://patch.msgid.link/r/20240819120007.3884868-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e29133..222cfc11ebfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; From 492be2a070f023c66aaef6ebd664567fda28c2a6 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:43 +0200 Subject: [PATCH 1232/1523] drm/xe/display: Match i915 driver suspend/resume sequences better Suspend fbdev sooner, and disable user access before suspending to prevent some races. I've noticed this when comparing xe suspend to i915's. Matches the following commits from i915: 24b412b1bfeb ("drm/i915: Disable intel HPD poll after DRM poll init/enable") 1ef28d86bea9 ("drm/i915: Suspend the framebuffer console earlier during system suspend") bd738d859e71 ("drm/i915: Prevent modesets during driver init/shutdown") Thanks to Imre for pointing me to those commits. Driver shutdown is currently missing, but I have some idea how to implement it next. Signed-off-by: Maarten Lankhorst Cc: Imre Deak Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-2-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, --- drivers/gpu/drm/xe/display/xe_display.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 982b9c5b440f..8b4fd90a9c35 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -299,8 +299,11 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); + intel_display_driver_disable_user_access(xe); + } if (!runtime) intel_display_driver_suspend(xe); @@ -309,12 +312,13 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); + if (has_display(xe)) + intel_display_driver_suspend_access(xe); + intel_encoder_suspend_all(&xe->display); intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - intel_dmc_suspend(xe); } @@ -354,14 +358,19 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); if (!runtime) intel_display_driver_resume(xe); - intel_hpd_poll_disable(xe); - if (has_display(xe)) + if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); + intel_display_driver_enable_user_access(xe); + } + intel_hpd_poll_disable(xe); intel_opregion_resume(display); From cb8f81c1753187995b7a43e79c12959f14eb32d3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:44 +0200 Subject: [PATCH 1233/1523] drm/xe/display: Make display suspend/resume work on discrete We should unpin before evicting all memory, and repin after GT resume. This way, we preserve the contents of the framebuffers, and won't hang on resume due to migration engine not being restored yet. Signed-off-by: Maarten Lankhorst Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-3-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.c | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b4fd90a9c35..30dfdac9f6fa 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -287,6 +287,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -308,6 +329,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9f3c14fd9f33..fcfb49af8c89 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -93,13 +93,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -154,11 +154,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -367,10 +367,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { From cd8e468efb4fb2742e06328a75b282c35c1abf8d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:57 +0200 Subject: [PATCH 1234/1523] ACPI: video: Add Dell UART backlight controller detection Dell All In One (AIO) models released after 2017 use a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") Commit 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") has added support for this, but I neglected to tie this into acpi_video_get_backlight_type(). Now the first AIO has turned up which has not only the DSDT bits for this, but also an actual controller attached to the UART, yet it is not using this controller for backlight control. Add support to acpi_video_get_backlight_type() for a new dell_uart backlight type. So that the existing infra to override the backlight control method on the commandline or with DMI quirks can be used. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-2-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 7 +++++++ include/acpi/video.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index c11cbe5b6eaa..e509dcbf3090 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -54,6 +54,8 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; if (!strcmp("apple_gmux", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_apple_gmux; + if (!strcmp("dell_uart", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_dell_uart; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } @@ -918,6 +920,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto static DEFINE_MUTEX(init_mutex); static bool nvidia_wmi_ec_present; static bool apple_gmux_present; + static bool dell_uart_present; static bool native_available; static bool init_done; static long video_caps; @@ -932,6 +935,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto &video_caps, NULL); nvidia_wmi_ec_present = nvidia_wmi_ec_supported(); apple_gmux_present = apple_gmux_detect(NULL, NULL); + dell_uart_present = acpi_dev_present("DELL0501", NULL, -1); init_done = true; } if (native) @@ -962,6 +966,9 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto if (apple_gmux_present) return acpi_backlight_apple_gmux; + if (dell_uart_present) + return acpi_backlight_dell_uart; + /* Use ACPI video if available, except when native should be preferred. */ if ((video_caps & ACPI_VIDEO_BACKLIGHT) && !(native_available && prefer_native_over_acpi_video())) diff --git a/include/acpi/video.h b/include/acpi/video.h index 3d538d4178ab..044c463138df 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -50,6 +50,7 @@ enum acpi_backlight_type { acpi_backlight_native, acpi_backlight_nvidia_wmi_ec, acpi_backlight_apple_gmux, + acpi_backlight_dell_uart, }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) From b5f0943001339c4d324a1af10470ce0bdd79f966 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:58 +0200 Subject: [PATCH 1235/1523] platform/x86: dell-uart-backlight: Use acpi_video_get_backlight_type() The dell-uart-backlight driver supports backlight control on Dell All In One (AIO) models using a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") Now the first AIO has turned up which has not only the DSDT bits for this, but also an actual controller attached to the UART, yet it is not using this controller for backlight control. Use the acpi_video_get_backlight_type() function from the ACPI video-detect code to check if the dell-uart-backlight driver should actually be used. This allows reusing the existing ACPI video-detect infra to override the backlight control method on the commandline or with DMI quirks. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-3-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/dell/Kconfig | 1 + drivers/platform/x86/dell/dell-uart-backlight.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 85a78ef91182..309236cecd5a 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -161,6 +161,7 @@ config DELL_SMO8800 config DELL_UART_BACKLIGHT tristate "Dell AIO UART Backlight driver" depends on ACPI + depends on ACPI_VIDEO depends on BACKLIGHT_CLASS_DEVICE depends on SERIAL_DEV_BUS help diff --git a/drivers/platform/x86/dell/dell-uart-backlight.c b/drivers/platform/x86/dell/dell-uart-backlight.c index 87d2a20b4cb3..3995f90add45 100644 --- a/drivers/platform/x86/dell/dell-uart-backlight.c +++ b/drivers/platform/x86/dell/dell-uart-backlight.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "../serdev_helpers.h" /* The backlight controller must respond within 1 second */ @@ -332,10 +333,17 @@ struct serdev_device_driver dell_uart_bl_serdev_driver = { static int dell_uart_bl_pdev_probe(struct platform_device *pdev) { + enum acpi_backlight_type bl_type; struct serdev_device *serdev; struct device *ctrl_dev; int ret; + bl_type = acpi_video_get_backlight_type(); + if (bl_type != acpi_backlight_dell_uart) { + dev_dbg(&pdev->dev, "Not loading (ACPI backlight type = %d)\n", bl_type); + return -ENODEV; + } + ctrl_dev = get_serdev_controller("DELL0501", NULL, 0, "serial0"); if (IS_ERR(ctrl_dev)) return PTR_ERR(ctrl_dev); From 5c7bb62cb8f53de71d8ab3d619be22740da0b837 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:59 +0200 Subject: [PATCH 1236/1523] ACPI: video: Add backlight=native quirk for Dell OptiPlex 7760 AIO Dell All In One (AIO) models released after 2017 may use a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") The Dell OptiPlex 7760 AIO has an ACPI device for one if its UARTs with the above _HID + _CID. Loading the dell-uart-backlight driver shows that there actually is a backlight controller board attached to the UART, which reports a firmware version of "G&MX01-V15". But the backlight controller board does not actually control the backlight brightness and the GPU's native backlight control method does work. Add a quirk to use the GPU's native backlight control method on this model. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2303936 Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-4-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index e509dcbf3090..674b9db7a1ef 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -823,6 +823,21 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Dell AIO (All in Ones) which advertise an UART attached backlight + * controller board in their ACPI tables (and may even have one), but + * which need native backlight control nevertheless. + */ + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=2303936 */ + .callback = video_detect_force_native, + /* Dell OptiPlex 7760 AIO */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7760 AIO"), + }, + }, + /* * Models which have nvidia-ec-wmi support, but should not use it. * Note this indicates a likely firmware bug on these models and should From f4b2a0ae1a31fd3d1b5ca18ee08319b479cf9b5f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 24 Jul 2024 14:53:09 -0700 Subject: [PATCH 1237/1523] drm/xe: Fix opregion leak Being part o the display, ideally the setup and cleanup would be done by display itself. However this is a bigger refactor that needs to be done on both i915 and xe. For now, just fix the leak: unreferenced object 0xffff8881a0300008 (size 192): comm "modprobe", pid 4354, jiffies 4295647021 hex dump (first 32 bytes): 00 00 87 27 81 88 ff ff 18 80 9b 00 00 c9 ff ff ...'............ 18 81 9b 00 00 c9 ff ff 00 00 00 00 00 00 00 00 ................ backtrace (crc 99260e31): [] kmemleak_alloc+0x4b/0x80 [] kmalloc_trace_noprof+0x312/0x3d0 [] intel_opregion_setup+0x89/0x700 [xe] [] xe_display_init_noirq+0x2f/0x90 [xe] [] xe_device_probe+0x7a3/0xbf0 [xe] [] xe_pci_probe+0x333/0x5b0 [xe] [] local_pci_probe+0x48/0xb0 [] pci_device_probe+0xc8/0x280 [] really_probe+0xf8/0x390 [] __driver_probe_device+0x8a/0x170 [] driver_probe_device+0x23/0xb0 [] __driver_attach+0xc7/0x190 [] bus_for_each_dev+0x7d/0xd0 [] driver_attach+0x1e/0x30 [] bus_add_driver+0x117/0x250 Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240724215309.644423-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6f4e43a2f771b737d991142ec4f6d4b7ff31fbb4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..ca4468c82078 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(xe); } int xe_display_init_noirq(struct xe_device *xe) @@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(xe); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } From c621f70539cae731d9749c1900cd00bb70ea5c72 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 4 Aug 2024 23:20:57 -0700 Subject: [PATCH 1238/1523] drm/xe/observation: Drop empty sysctl table entry An empty sysctl table entry was inadvertently left behind for observation sysctl. The breaks on 6.11 with the following errors: [ 219.654850] sysctl table check failed: dev/xe/(null) procname is null [ 219.654862] sysctl table check failed: dev/xe/(null) No proc_handler Drop the empty entry. Fixes: 63347fe031e3 ("drm/xe/uapi: Rename xe perf layer as xe observation layer") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2419 Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240805062057.3547560-1-ashutosh.dixit@intel.com (cherry picked from commit be1dec570b6f5a29ce9c99334c52bea94c28914b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_observation.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** From 7090d7fc969fcc9985d7e538cfcd8a69a5f9c616 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 08:28:31 -0700 Subject: [PATCH 1239/1523] drm/xe: Move VM dma-resv lock from xe_exec_queue_create to __xe_exec_queue_init The critical section which requires the VM dma-resv is the call xe_lrc_create in __xe_exec_queue_init. Move this lock to __xe_exec_queue_init holding it just around xe_lrc_create. Not only is good practice, this also fixes a locking double of the VM dma-resv in the error paths of __xe_exec_queue_init as xe_lrc_put tries to acquire this too resulting in a deadlock. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240724152831.1848325-1-matthew.brost@intel.com (cherry picked from commit 549dd786b61cd3db903f5d94d07fc5a89ccdbeb9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a39384bb9553..16f24f4a7062 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -105,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; From 15939ca77d4424f736e1e4953b4da2351cc9689d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 16:28:30 -0700 Subject: [PATCH 1240/1523] drm/xe: Fix tile fini sequence Only set tile->mmio.regs to NULL if not the root tile in tile_fini. The root tile mmio regs is setup ealier in MMIO init thus it should be set to NULL in mmio_fini. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240809232830.3302251-1-matthew.brost@intel.com (cherry picked from commit 3396900aa273903639a1792afa4d23dc09bec291) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f92faad4b96d..83122c77edd9 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } int xe_mmio_probe_tiles(struct xe_device *xe) @@ -91,9 +92,11 @@ add_mmio_ext: static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) From 730b72480e29f63fd644f5fa57c9d46109428953 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 Aug 2024 12:01:30 +0100 Subject: [PATCH 1241/1523] drm/xe: prevent UAF around preempt fence The fence lock is part of the queue, therefore in the current design anything locking the fence should then also hold a ref to the queue to prevent the queue from being freed. However, currently it looks like we signal the fence and then drop the queue ref, but if something is waiting on the fence, the waiter is kicked to wake up at some later point, where upon waking up it first grabs the lock before checking the fence state. But if we have already dropped the queue ref, then the lock might already be freed as part of the queue, leading to uaf. To prevent this, move the fence lock into the fence itself so we don't run into lifetime issues. Alternative might be to have device level lock, or only release the queue in the fence release callback, however that might require pushing to another worker to avoid locking issues. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2454 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2342 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2020 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814110129.825847-2-matthew.auld@intel.com (cherry picked from commit 7116c35aacedc38be6d15bd21b2fc936eed0008b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 - drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 ++- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 16f24f4a7062..9731dcd0b1bd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -643,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index a35ce24c9798..f6ee0ae80fd6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e8b8ae5c6485..c453f45328b1 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; From ddf6492e0e508b7c2b42c8d5a4ac82bd38ef0dd5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:44 +0200 Subject: [PATCH 1242/1523] drm/xe/display: Make display suspend/resume work on discrete We should unpin before evicting all memory, and repin after GT resume. This way, we preserve the contents of the framebuffers, and won't hang on resume due to migration engine not being restored yet. Signed-off-by: Maarten Lankhorst Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-3-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, (cherry picked from commit cb8f81c1753187995b7a43e79c12959f14eb32d3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.c | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ca4468c82078..49de4e4f8a75 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -283,6 +283,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); @@ -300,6 +321,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index de3b5df65e48..9a3f618d22dc 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { From 6841a26e2c6715dc52a1cdd888e958dd2a92a5bc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 19 Aug 2024 11:57:52 +0200 Subject: [PATCH 1243/1523] drm/xe/oa: Use vma_pages() helper function in xe_oa_mmap() Use the vma_pages() helper function and remove the following Coccinelle/coccicheck warning reported by vma_pages.cocci: WARNING: Consider using vma_pages helper on vma Reviewed-by: Ashutosh Dixit Signed-off-by: Thorsten Blum Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240819095751.539645-2-thorsten.blum@toblux.com --- drivers/gpu/drm/xe/xe_oa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 3ef92eb8fbb1..4d4541e0b24c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1244,8 +1244,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) { ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), PAGE_SIZE, vma->vm_page_prot); From ad614a706b1ac83b95b333f44b8f5e70bcb37dc5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: [PATCH 1244/1523] drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi (cherry picked from commit f2881dfdaaa9ec873dbd383ef5512fc31e576cbb) Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..db232a25189e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit From 565d121b69980637f040eb4d84289869cdaabedf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Aug 2024 00:28:25 +0200 Subject: [PATCH 1245/1523] tcp: prevent concurrent execution of tcp_sk_exit_batch Its possible that two threads call tcp_sk_exit_batch() concurrently, once from the cleanup_net workqueue, once from a task that failed to clone a new netns. In the latter case, error unwinding calls the exit handlers in reverse order for the 'failed' netns. tcp_sk_exit_batch() calls tcp_twsk_purge(). Problem is that since commit b099ce2602d8 ("net: Batch inet_twsk_purge"), this function picks up twsk in any dying netns, not just the one passed in via exit_batch list. This means that the error unwind of setup_net() can "steal" and destroy timewait sockets belonging to the exiting netns. This allows the netns exit worker to proceed to call WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); without the expected 1 -> 0 transition, which then splats. At same time, error unwind path that is also running inet_twsk_purge() will splat as well: WARNING: .. at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 ... refcount_dec include/linux/refcount.h:351 [inline] inet_twsk_kill+0x758/0x9c0 net/ipv4/inet_timewait_sock.c:70 inet_twsk_deschedule_put net/ipv4/inet_timewait_sock.c:221 inet_twsk_purge+0x725/0x890 net/ipv4/inet_timewait_sock.c:304 tcp_sk_exit_batch+0x1c/0x170 net/ipv4/tcp_ipv4.c:3522 ops_exit_list+0x128/0x180 net/core/net_namespace.c:178 setup_net+0x714/0xb40 net/core/net_namespace.c:375 copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 ... because refcount_dec() of tw_refcount unexpectedly dropped to 0. This doesn't seem like an actual bug (no tw sockets got lost and I don't see a use-after-free) but as erroneous trigger of debug check. Add a mutex to force strict ordering: the task that calls tcp_twsk_purge() blocks other task from doing final _dec_and_test before mutex-owner has removed all tw sockets of dying netns. Fixes: e9bd0cca09d1 ("tcp: Don't allocate tcp_death_row outside of struct netns_ipv4.") Reported-by: syzbot+8ea26396ff85d23a8929@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000003a5292061f5e4e19@google.com/ Link: https://lore.kernel.org/netdev/20240812140104.GA21559@breakpoint.cc/ Signed-off-by: Florian Westphal Reviewed-by: Kuniyuki Iwashima Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240812222857.29837-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fd17f25ff288..a4e510846905 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,6 +97,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; +static DEFINE_MUTEX(tcp_exit_batch_mutex); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -3514,6 +3516,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work + * and failed setup_net error unwinding path are serialized. + * + * tcp_twsk_purge() handles twsk in any dead netns, not just those in + * net_exit_list, the thread that dismantles a particular twsk must + * do so without other thread progressing to refcount_dec_and_test() of + * tcp_death_row.tw_refcount. + */ + mutex_lock(&tcp_exit_batch_mutex); + tcp_twsk_purge(net_exit_list); list_for_each_entry(net, net_exit_list, exit_list) { @@ -3521,6 +3533,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); } + + mutex_unlock(&tcp_exit_batch_mutex); } static struct pernet_operations __net_initdata tcp_sk_ops = { From 64b582ca88ca11400467b282d5fa3b870ded1c11 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Aug 2024 16:32:27 +0000 Subject: [PATCH 1246/1523] block: Read max write zeroes once for __blkdev_issue_write_zeroes() As reported in [0], we may get a hang when formatting a XFS FS on a RAID0 drive. Commit 73a768d5f955 ("block: factor out a blk_write_zeroes_limit helper") changed __blkdev_issue_write_zeroes() to read the max write zeroes value in the loop. This is not safe as max write zeroes may change in value. Specifically for the case of [0], the value goes to 0, and we get an infinite loop. Lift the limit reading out of the loop. [0] https://lore.kernel.org/linux-xfs/4d31268f-310b-4220-88a2-e191c3932a82@oracle.com/T/#t Fixes: 73a768d5f955 ("block: factor out a blk_write_zeroes_limit helper") Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240815163228.216051-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-lib.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 9f735efa6c94..83eb7761c2bf 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -111,13 +111,20 @@ static sector_t bio_write_zeroes_limit(struct block_device *bdev) (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); } +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ static void __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop, unsigned flags) + struct bio **biop, unsigned flags, sector_t limit) { + while (nr_sects) { - unsigned int len = min_t(sector_t, nr_sects, - bio_write_zeroes_limit(bdev)); + unsigned int len = min(nr_sects, limit); struct bio *bio; if ((flags & BLKDEV_ZERO_KILLABLE) && @@ -141,12 +148,14 @@ static void __blkdev_issue_write_zeroes(struct block_device *bdev, static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; blk_start_plug(&plug); - __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags); + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { @@ -165,7 +174,7 @@ static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, * on an I/O error, in which case we'll turn any error into * "not supported" here. */ - if (ret && !bdev_write_zeroes_sectors(bdev)) + if (ret && !limit) return -EOPNOTSUPP; return ret; } @@ -265,12 +274,14 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); + if (bdev_read_only(bdev)) return -EPERM; - if (bdev_write_zeroes_sectors(bdev)) { + if (limit) { __blkdev_issue_write_zeroes(bdev, sector, nr_sects, - gfp_mask, biop, flags); + gfp_mask, biop, flags, limit); } else { if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; From 81475beb1b5996505a39cd1d9316ce1e668932a2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Aug 2024 16:32:28 +0000 Subject: [PATCH 1247/1523] block: Drop NULL check in bdev_write_zeroes_sectors() Function bdev_get_queue() must not return NULL, so drop the check in bdev_write_zeroes_sectors(). Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Nitesh Shetty Link: https://lore.kernel.org/r/20240815163228.216051-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e85ec73a07d5..b7664d593486 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1296,12 +1296,7 @@ bdev_max_secure_erase_sectors(struct block_device *bdev) static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_zeroes_sectors; - - return 0; + return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) From decbfaf06db05fa1f9b33149ebb3c145b44e878f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:53 +0200 Subject: [PATCH 1248/1523] drm/ttm: Add a flag to allow drivers to skip clear-on-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before releasing backing stores if they want to skip clear-on-free. Cc: Matthew Auld Cc: Thomas Hellström Suggested-by: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++------- include/drm/ttm/ttm_tt.h | 6 +++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 8504dbe19c1a..935ab3cfd046 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, + bool cleared) { unsigned int i, num_pages = 1 << pt->order; - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); + if (!cleared) { + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); + } } spin_lock(&pt->lock); @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; + bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages); + ttm_pool_type_give(pt, *pages, cleared); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 2b9d856ff388..cfaf49de2419 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,6 +85,9 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * + * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles + * clearing backing store + * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -94,8 +97,9 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) +#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; From 23683061805be368c8d1c7e7ff52abc470cac275 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:54 +0200 Subject: [PATCH 1249/1523] drm/xe/lnl: Offload system clear page activity to GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LNL because of flat CCS, driver creates migrates job to clear CCS meta data. Extend that to also clear system pages using GPU. Inform TTM to allocate pages without __GFP_ZERO to avoid double page clearing by clearing out TTM_TT_FLAG_ZERO_ALLOC flag and set TTM_TT_FLAG_CLEARED_ON_FREE while freeing to skip ttm pool's clear on free as XE now takes care of clearing pages. If a bo is in system placement such as BO created with DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING and there is a cpu map then for such BO gpu clear will be avoided as there is no dma mapping for such BO at that moment to create migration jobs. Tested this patch api_overhead_benchmark_l0 from https://github.com/intel/compute-benchmarks Without the patch: api_overhead_benchmark_l0 --testFilter=UsmMemoryAllocation: UsmMemoryAllocation(api=l0 type=Host size=4KB) 84.206 us UsmMemoryAllocation(api=l0 type=Host size=1GB) 105775.56 us erf tool top 5 entries: 71.44% api_overhead_be [kernel.kallsyms] [k] clear_page_erms 6.34% api_overhead_be [kernel.kallsyms] [k] __pageblock_pfn_to_page 2.24% api_overhead_be [kernel.kallsyms] [k] cpa_flush 2.15% api_overhead_be [kernel.kallsyms] [k] pages_are_mergeable 1.94% api_overhead_be [kernel.kallsyms] [k] find_next_iomem_res With the patch: api_overhead_benchmark_l0 --testFilter=UsmMemoryAllocation: UsmMemoryAllocation(api=l0 type=Host size=4KB) 79.439 us UsmMemoryAllocation(api=l0 type=Host size=1GB) 98677.75 us Perf tool top 5 entries: 11.16% api_overhead_be [kernel.kallsyms] [k] __pageblock_pfn_to_page 7.85% api_overhead_be [kernel.kallsyms] [k] cpa_flush 7.59% api_overhead_be [kernel.kallsyms] [k] find_next_iomem_res 7.24% api_overhead_be [kernel.kallsyms] [k] pages_are_mergeable 5.53% api_overhead_be [kernel.kallsyms] [k] lookup_address_in_pgd_attr Without this patch clear_page_erms() dominates execution time which is also not pipelined with migration jobs. With this patch page clearing will get pipelined with migration job and will free CPU for more work. v2: Handle regression on dgfx(Himal) Update commit message as no ttm API changes needed. v3: Fix Kunit test. v4: handle data leak on cpu mmap(Thomas) v5: s/gpu_page_clear/gpu_page_clear_sys and move setting it to xe_ttm_sys_mgr_init() and other nits (Matt Auld) v6: Disable it when init_on_alloc and/or init_on_free is active(Matt) Use compute-benchmarks as reporter used it to report this allocation latency issue also a proper test application than mime. In v5, the test showed significant reduction in alloc latency but that is not the case any more, I think this was mostly because previous test was done on IFWI which had low mem BW from CPU. Cc: Himal Prasad Ghimiray Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-2-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_bo.c | 26 ++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 12 ++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ce8282e67e84..6ed0e1955215 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -396,6 +396,14 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, caching = ttm_uncached; } + /* + * If the device can support gpu clear system pages then set proper ttm + * flag. Zeroed pages are only required for ttm_bo_type_device so + * unwanted data is not leaked to userspace. + */ + if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) + page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE; + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); @@ -417,6 +425,10 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; + /* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */ + if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE) + tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC; + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); if (err) return err; @@ -659,8 +671,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool needs_clear; bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; + bool clear_system_pages; int ret = 0; + /* + * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when + * moving to system as the bo doesn't have dma_mapping. + */ + if (!old_mem && ttm && !ttm_tt_is_populated(ttm)) + ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE; + /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) @@ -683,8 +703,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data); + clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || - (!ttm && ttm_bo->type == ttm_bo_type_device); + (!ttm && ttm_bo->type == ttm_bo_type_device) || + clear_system_pages; if (new_mem->mem_type == XE_PL_TT) { ret = xe_tt_map_sg(ttm); @@ -796,7 +818,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) { u32 flags = 0; - if (mem_type_is_vram(new_mem->mem_type)) + if (mem_type_is_vram(new_mem->mem_type) || clear_system_pages) flags |= XE_MIGRATE_CLEAR_FLAG_FULL; else if (handle_system_ccs) flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index fc89420d0ba6..cb60bc5ec21b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -339,6 +339,8 @@ struct xe_device { struct xe_mem_region vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; + /** @mem.gpu_page_clear_sys: clear system pages offloaded to GPU */ + bool gpu_page_clear_sys; } mem; /** @sriov: device level virtualization data */ diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 9844a8edbfe1..e0ac20f20758 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -117,5 +117,17 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); + + /* + * On iGFX device with flat CCS, we clear CCS metadata, let's extend that + * and use GPU to clear pages as well. + * + * Disable this when init_on_free and/or init_on_alloc is on to avoid double + * zeroing pages with CPU and GPU. + */ + if (xe_device_has_flat_ccs(xe) && !IS_DGFX(xe) && + !want_init_on_alloc(GFP_USER) && !want_init_on_free()) + xe->mem.gpu_page_clear_sys = true; + return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); } From de48aad2a8e80ba026ca91c383f590f0bf97b3c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:57 -0400 Subject: [PATCH 1250/1523] rpcrdma: Device kref is over-incremented on error from xa_alloc If the device's reference count is too high, the device completion callback never fires. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/ib_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index a938c19c3490..4d1e9fa89573 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -62,9 +62,9 @@ int rpcrdma_rn_register(struct ib_device *device, if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) return -ENETUNREACH; - kref_get(&rd->rd_kref); if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) return -ENOMEM; + kref_get(&rd->rd_kref); rn->rn_done = done; return 0; } From 6b3b023e2d0c130235c0e494f77df2a9a64ab6a2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:58 -0400 Subject: [PATCH 1251/1523] rpcrdma: Use XA_FLAGS_ALLOC instead of XA_FLAGS_ALLOC1 Nit: The built-in xa_limit_32b range starts at 0, but XA_FLAGS_ALLOC1 configures the xarray's allocator to start at 1. Adopt the more conventional XA_FLAGS_ALLOC because there's no mechanical reason to skip 0. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/ib_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index 4d1e9fa89573..7913d7bad23d 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -111,7 +111,7 @@ static int rpcrdma_add_one(struct ib_device *device) return -ENOMEM; kref_init(&rd->rd_kref); - xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC); rd->rd_device = device; init_completion(&rd->rd_done); ib_set_client_data(device, &rpcrdma_ib_client, rd); From dc0112e6d8b42b39f9d283bab489a757e9d284f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:59 -0400 Subject: [PATCH 1252/1523] rpcrdma: Trace connection registration and unregistration These new trace points record xarray indices and the time of endpoint registration and unregistration, to co-ordinate with device removal events. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/ib_client.c | 2 ++ 2 files changed, 38 insertions(+) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ba2d6a0e41cc..a96a985c49b3 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2277,6 +2277,42 @@ DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); +DECLARE_EVENT_CLASS(rpcrdma_client_register_class, + TP_PROTO( + const struct ib_device *device, + const struct rpcrdma_notification *rn + ), + + TP_ARGS(device, rn), + + TP_STRUCT__entry( + __string(name, device->name) + __field(void *, callback) + __field(u32, index) + ), + + TP_fast_assign( + __assign_str(name); + __entry->callback = rn->rn_done; + __entry->index = rn->rn_index; + ), + + TP_printk("device=%s index=%u done callback=%pS\n", + __get_str(name), __entry->index, __entry->callback + ) +); + +#define DEFINE_CLIENT_REGISTER_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_register_class, name, \ + TP_PROTO( \ + const struct ib_device *device, \ + const struct rpcrdma_notification *rn \ + ), \ + TP_ARGS(device, rn)) + +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register); +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index 7913d7bad23d..8507cd4d8921 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -66,6 +66,7 @@ int rpcrdma_rn_register(struct ib_device *device, return -ENOMEM; kref_get(&rd->rd_kref); rn->rn_done = done; + trace_rpcrdma_client_register(device, rn); return 0; } @@ -91,6 +92,7 @@ void rpcrdma_rn_unregister(struct ib_device *device, if (!rd) return; + trace_rpcrdma_client_unregister(device, rn); xa_erase(&rd->rd_xa, rn->rn_index); kref_put(&rd->rd_kref, rpcrdma_rn_release); } From 2240a50e6294214de791729e9dcba6880fa7e44e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 17 Aug 2024 18:15:41 +0800 Subject: [PATCH 1253/1523] KVM: arm64: vgic-debug: Don't put unmarked LPIs If there were LPIs being mapped behind our back (i.e., between .start() and .stop()), we would put them at iter_unmark_lpis() without checking if they were actually *marked*, which is obviously not good. Switch to use the xa_for_each_marked() iterator to fix it. Cc: stable@vger.kernel.org Fixes: 85d3ccc8b75b ("KVM: arm64: vgic-debug: Use an xarray mark for debug iterator") Signed-off-by: Zenghui Yu Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240817101541.1664-1-yuzenghui@huawei.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bc74d06398ef..e1397ab2072a 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void iter_unmark_lpis(struct kvm *kvm) struct vgic_irq *irq; unsigned long intid; - xa_for_each(&dist->lpi_xa, intid, irq) { + xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); vgic_put_irq(kvm, irq); } From f616506754d34bcfdbfbc7508b562e5c98461e9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Aug 2024 13:50:45 +0100 Subject: [PATCH 1254/1523] KVM: arm64: vgic: Don't hold config_lock while unregistering redistributors We recently moved the teardown of the vgic part of a vcpu inside a critical section guarded by the config_lock. This teardown phase involves calling into kvm_io_bus_unregister_dev(), which takes the kvm->srcu lock. However, this violates the established order where kvm->srcu is taken on a memory fault (such as an MMIO access), possibly followed by taking the config_lock if the GIC emulation requires mutual exclusion from the other vcpus. It therefore results in a bad lockdep splat, as reported by Zenghui. Fix this by moving the call to kvm_io_bus_unregister_dev() outside of the config_lock critical section. At this stage, there shouln't be any need to hold the config_lock. As an additional bonus, document the ordering between kvm->slots_lock, kvm->srcu and kvm->arch.config_lock so that I cannot pretend I didn't know about those anymore. Fixes: 9eb18136af9f ("KVM: arm64: vgic: Hold config_lock while tearing down a CPU interface") Reported-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Tested-by: Zenghui Yu Link: https://lore.kernel.org/r/20240819125045.3474845-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-init.c | 9 ++++++--- arch/arm64/kvm/vgic/vgic.c | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 41feb858ff9a..e7c53e8af3d1 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -417,10 +417,8 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->private_irqs); vgic_cpu->private_irqs = NULL; - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_unregister_redist_iodev(vcpu); + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; - } } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -448,6 +446,11 @@ void kvm_vgic_destroy(struct kvm *kvm) kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + kvm_for_each_vcpu(i, vcpu, kvm) + vgic_unregister_redist_iodev(vcpu); + mutex_unlock(&kvm->slots_lock); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 974849ea7101..abe29c7d85d0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -36,6 +36,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * we have to disable IRQs before taking this lock and everything lower * than it. * + * The config_lock has additional ordering requirements: + * kvm->slots_lock + * kvm->srcu + * kvm->arch.config_lock + * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. * If you are already holding a lock and need to take a higher one, you From 27cb2b7fec2abf310e4128137979124ead920ccb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 3 Jul 2024 13:43:38 +0100 Subject: [PATCH 1255/1523] drm/xe/bmg: implement Wa_16023588340 This involves enabling l2 caching of host side memory access to VRAM through the CPU BAR. The main fallout here is with display since VRAM writes from CPU can now be cached in GPU l2, and display is never coherent with caches, so needs various manual flushing. In the case of fbc we disable it due to complications in getting this to work correctly (in a later patch). Signed-off-by: Matthew Auld Cc: Jonathan Cavitt Cc: Matt Roper Cc: Lucas De Marchi Cc: Vinod Govindapillai Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240703124338.208220-3-matthew.auld@intel.com (cherry picked from commit 01570b446939c3538b1aa3d059837f49fa14a3ae) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 + drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 ++++ drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++ drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++ drivers/gpu/drm/xe/xe_device.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 54 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pat.c | 11 ++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 9 files changed, 117 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 628c245c4822..e97c9da451b3 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ uses_generated_oob := \ $(obj)/xe_ggtt.o \ + $(obj)/xe_device.o \ $(obj)/xe_gsc.o \ $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ + $(obj)/xe_pat.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ $(obj)/xe_wa.o \ diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9e860c61f4b3..ccd0d87d438a 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -7,6 +7,8 @@ #include "intel_display_types.h" #include "intel_dsb_buffer.h" #include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" #include "xe_gt.h" u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); + xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); + xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 423f367c7065..d7db44e79eaf 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_pm.h" @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d44564bad009..fd9d94174efb 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define CG_DIS_CNTLBUS REG_BIT(6) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -372,6 +375,11 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2_GLOBAL_INVAL XE_REG(0xb404) + +#define SCRATCH1LPFC XE_REG(0xb474) +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f2f1d8ddb221..6ce44ca2524d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -54,6 +54,9 @@ #include "xe_vm.h" #include "xe_vram.h" #include "xe_wait_user_fence.h" +#include "xe_wa.h" + +#include static int xe_file_open(struct drm_device *dev, struct drm_file *file) { @@ -820,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + xe_device_l2_flush(xe); + return; + } + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -843,6 +851,28 @@ void xe_device_td_flush(struct xe_device *xe) } } +void xe_device_l2_flush(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + + gt = xe_root_mmio_gt(xe); + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) + xe_gt_err_once(gt, "Global invalidation timeout\n"); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index b3952718b3c1..533ccfb2567a 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +void xe_device_l2_flush(struct xe_device *xe); static inline bool xe_device_wedged(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 31b2e64c70c6..816ecc9e294c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -11,6 +11,8 @@ #include #include +#include + #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + if (!xe_gt_is_media_type(gt)) { + xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + if (xe_gt_is_media_type(gt)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + /** * xe_gt_remove() - Clean up the GT structures before driver removal * @gt: the GT object @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt) for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); } static void gt_reset_worker(struct work_struct *w); @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_idle_disable_pg(gt); + xe_gt_disable_host_l2_vram(gt); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_gt_dbg(gt, "suspended\n"); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ee32ee1cc88..722278cc23fc 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -7,6 +7,8 @@ #include +#include + #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -15,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_wa.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + + /* Wa_16023588340. XXX: Should use XE_WA */ + if (GRAPHICS_VERx100(xe) == 2001) + xe->pat.n_entries = 28; /* Disable CLOS3 */ + else + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 26066beb4f6f..08f7336881e3 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -29,3 +29,4 @@ 13011645652 GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001) From 03a2dc84f5c4ef31ac0112b29d51ff103f7c8dd4 Mon Sep 17 00:00:00 2001 From: Ngai-Mint Kwan Date: Mon, 1 Jul 2024 11:46:37 -0700 Subject: [PATCH 1256/1523] drm/xe/xe2lpm: Extend Wa_16021639441 Wa_16021639441 applies to Xe2_LPM. Signed-off-by: Ngai-Mint Kwan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240701184637.531794-1-ngai-mint.kwan@linux.intel.com (cherry picked from commit 74e3076800067c6dc0dcff5b75344cec064c20eb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7bf0862b231..6c52d9d02b5f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -539,6 +539,16 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + /* Xe2_LPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* Xe2_HPM */ { XE_RTP_NAME("16021639441"), From b196e6fcc71186134b4cfe756067d87ae41b1ed9 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Wed, 3 Jul 2024 14:37:54 +0530 Subject: [PATCH 1257/1523] drm/xe/xe2lpg: Extend workaround 14021402888 workaround 14021402888 also applies to Xe2_LPG. Replicate the existing entry to one specific for Xe2_LPG. Signed-off-by: Bommu Krishnaiah Cc: Tejas Upadhyay Cc: Matt Roper Cc: Himal Prasad Ghimiray Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240703090754.1323647-1-krishnaiah.bommu@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 56ab6986992ba143aee0bda33e15a764343e271d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 6c52d9d02b5f..fd009b2c68fa 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPG */ From 7e81285380743aa5759bb29a388f056c3d326a2c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 10 Jul 2024 10:57:50 +0530 Subject: [PATCH 1258/1523] drm/xe/xe2: Make subsequent L2 flush sequential Issuing the flush on top of an ongoing flush is not desirable. Lets use lock to make it sequential. Reviewed-by: Nirmoy Das Signed-off-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240710052750.3031586-1-tejas.upadhyay@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit 71733b8d7f50b61403f940c6c9745fb3a9b98dcb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6ce44ca2524d..c89deffffb6d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -865,10 +865,12 @@ void xe_device_l2_flush(struct xe_device *xe) if (err) return; + spin_lock(>->global_invl_lock); xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 816ecc9e294c..b9bcbbe27705 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -388,6 +388,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); xe_pcode_init(gt); + spin_lock_init(>->global_invl_lock); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6b5e0b45efb0..38a0d0e178c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -362,6 +362,12 @@ struct xe_gt { */ spinlock_t mcr_lock; + /** + * @global_invl_lock: protects the register for the duration + * of a global invalidation of l2 cache + */ + spinlock_t global_invl_lock; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.gt: bitmap with active GT workarounds */ From cbc6e98ab11bea52789d2835e45e8816c39407e1 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 9 Jul 2024 21:26:06 +0530 Subject: [PATCH 1259/1523] drm/xe/xe2: Add Wa_15015404425 Wa_15015404425 asks us to perform four "dummy" writes to a non-existent register offset before every real register read. Although the specific offset of the writes doesn't directly matter, the workaround suggests offset 0x130030 as a good target so that these writes will be easy to recognize and filter out in debugging traces. V5(MattR): - Avoid negating an equality comparison V4(MattR): - Use writel and remove xe_reg usage V3(MattR): - Define dummy reg local to function - Avoid tracing dummy writes - Update commit message V2: - Add WA to 8/16/32bit reads also - MattR - Corrected dummy reg address - MattR - Use for loop to avoid mental pause - JaniN Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240709155606.2998941-1-tejas.upadhyay@intel.com (cherry picked from commit 86c5b70a9c0c3f05f7002ef8b789460c96b54e27) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 83122c77edd9..aa68cac9fdf8 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -124,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +static void mmio_flush_pending_writes(struct xe_gt *gt) +{ +#define DUMMY_REG_OFFSET 0x130030 + struct xe_tile *tile = gt_to_tile(gt); + int i; + + if (tile->xe->info.platform != XE_LUNARLAKE) + return; + + /* 4 dummy writes */ + for (i = 0; i < 4; i++) + writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); +} + u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u8 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -142,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u16 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -163,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u32 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) val = xe_gt_sriov_vf_read32(gt, reg); else From f5cb1275c8ce56c7583cb323cfa08a820a7ef6b4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 7 Aug 2024 16:53:32 -0700 Subject: [PATCH 1260/1523] drm/xe: fix WA 14018094691 This WA is applied while initializing the media GT, but it a primary GT WA (because it modifies a register on the primary GT), so the XE_WA macro is returning false even when the WA should be applied. Fix this by using the primary GT in the macro. Note that this WA only applies to PXP and we don't yet support that in Xe, so there are no negative effects to this bug, which is why we didn't see any errors in testing. v2: use the primary GT in the macro instead of marking the WA as platform-wide (Lucas, Matt). Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240807235333.1370915-1-daniele.ceraolospurio@intel.com (cherry picked from commit e422c0bfd9e47e399e86bcc483f49d8b54064fc2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..77ce44e845c5 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) From 8776b0234e1d008d8f19b26f6c3af1cfa6187070 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 12 Aug 2024 19:11:17 +0530 Subject: [PATCH 1261/1523] drm/xe/xe2hpg: Add Wa_14021821874 Wa_14021821874 applies to xe2_hpg V2(Himal): - Use space after define Cc: Matt Roper Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240812134117.813670-1-tejas.upadhyay@intel.com (cherry picked from commit 21ff3a16e92e2fa4f906a61d148aca1423c58298) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index fd9d94174efb..3c2865040058 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -437,6 +437,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index fd009b2c68fa..e648265d081b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -542,6 +542,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, /* Xe2_LPM */ From 8636a5c29be1f05b5162a5c82c874338b6717759 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 9 Aug 2024 16:12:35 -0700 Subject: [PATCH 1262/1523] drm/xe: use devm instead of drmm for managed bo The BO cleanup touches the GGTT and therefore requires the HW to be available, so we need to use devm instead of drmm. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1160 Signed-off-by: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Matthew Auld Reviewed-by: Matthew Auld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240809231237.1503796-2-daniele.ceraolospurio@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8d3a2d3d766a823c7510cdc17e6ff7c042c63b61) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 31192d983d9e..261d3d6c8a93 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; From a06a7b3429e2548a28bb661f17347b8ffe4a8a15 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:40 -0700 Subject: [PATCH 1263/1523] drm/xe/uc: Use devm to register cleanup that includes exec_queues Exec_queue cleanup requires HW access, so we need to use devm instead of drmm for it. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 5a891a0e69f134f53cc91b409f38e5ea1cafaf0a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 77ce44e845c5..2a612652bb13 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6398629e6b4e..77b0f0d8f729 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); From 4e870e6bbec5c41c0d8b253282dca9465fbf5044 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Aug 2024 17:04:42 -0700 Subject: [PATCH 1264/1523] Input: himax_hx83112b - fix incorrect size when reading product ID We need to read a u32 value (4 bytes), not size of a pointer to that value. Also, himax_read_mcu() wrapper is an overkill, remove it and use himax_bus_read() directly. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408200301.Ujpj7Vov-lkp@intel.com/ Fixes: 0944829d491e ("Input: himax_hx83112b - implement MCU register reading") Tested-by: Felix Kaechele Link: https://lore.kernel.org/r/ZsPdmtfC54R7JVxR@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/himax_hx83112b.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/himax_hx83112b.c b/drivers/input/touchscreen/himax_hx83112b.c index 9ed3bccde4ac..896a145ddb2b 100644 --- a/drivers/input/touchscreen/himax_hx83112b.c +++ b/drivers/input/touchscreen/himax_hx83112b.c @@ -130,17 +130,6 @@ static int himax_bus_read(struct himax_ts_data *ts, u32 address, void *dst, return 0; } -static int himax_read_mcu(struct himax_ts_data *ts, u32 address, u32 *dst) -{ - int error; - - error = himax_bus_read(ts, address, dst, sizeof(dst)); - if (error) - return error; - - return 0; -} - static void himax_reset(struct himax_ts_data *ts) { gpiod_set_value_cansleep(ts->gpiod_rst, 1); @@ -160,7 +149,8 @@ static int himax_read_product_id(struct himax_ts_data *ts, u32 *product_id) { int error; - error = himax_read_mcu(ts, HIMAX_REG_ADDR_ICID, product_id); + error = himax_bus_read(ts, HIMAX_REG_ADDR_ICID, product_id, + sizeof(*product_id)); if (error) return error; From ce335db0621648472f9bb4b7191eb2e13a5793cf Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 16 Aug 2024 18:29:17 +0800 Subject: [PATCH 1265/1523] net: mctp: test: Use correct skb for route input check In the MCTP route input test, we're routing one skb, then (when delivery is expected) checking the resulting routed skb. However, we're currently checking the original skb length, rather than the routed skb. Check the routed skb instead; the original will have been freed at this point. Fixes: 8892c0490779 ("mctp: Add route input to socket tests") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kernel-janitors/4ad204f0-94cf-46c5-bdab-49592addf315@kili.mountain/ Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240816-mctp-kunit-skb-fix-v1-1-3c367ac89c27@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 77e5dd422258..8551dab1d1e6 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test) skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); - KUNIT_EXPECT_EQ(test, skb->len, 1); + KUNIT_EXPECT_EQ(test, skb2->len, 1); skb_free_datagram(sock->sk, skb2); From 807067bf014d4a3ae2cc55bd3de16f22a01eb580 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Aug 2024 15:04:37 -0700 Subject: [PATCH 1266/1523] kcm: Serialise kcm_sendmsg() for the same socket. syzkaller reported UAF in kcm_release(). [0] The scenario is 1. Thread A builds a skb with MSG_MORE and sets kcm->seq_skb. 2. Thread A resumes building skb from kcm->seq_skb but is blocked by sk_stream_wait_memory() 3. Thread B calls sendmsg() concurrently, finishes building kcm->seq_skb and puts the skb to the write queue 4. Thread A faces an error and finally frees skb that is already in the write queue 5. kcm_release() does double-free the skb in the write queue When a thread is building a MSG_MORE skb, another thread must not touch it. Let's add a per-sk mutex and serialise kcm_sendmsg(). [0]: BUG: KASAN: slab-use-after-free in __skb_unlink include/linux/skbuff.h:2366 [inline] BUG: KASAN: slab-use-after-free in __skb_dequeue include/linux/skbuff.h:2385 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge include/linux/skbuff.h:3181 [inline] BUG: KASAN: slab-use-after-free in kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 Read of size 8 at addr ffff0000ced0fc80 by task syz-executor329/6167 CPU: 1 PID: 6167 Comm: syz-executor329 Tainted: G B 6.8.0-rc5-syzkaller-g9abbc24128bc #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call trace: dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:291 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:298 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x178/0x518 mm/kasan/report.c:488 kasan_report+0xd8/0x138 mm/kasan/report.c:601 __asan_report_load8_noabort+0x20/0x2c mm/kasan/report_generic.c:381 __skb_unlink include/linux/skbuff.h:2366 [inline] __skb_dequeue include/linux/skbuff.h:2385 [inline] __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] __skb_queue_purge include/linux/skbuff.h:3181 [inline] kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Allocated by task 6166: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x70/0x84 mm/kasan/generic.c:626 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x74/0x8c mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x204/0x4c0 mm/slub.c:3903 __alloc_skb+0x19c/0x3d8 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1296 [inline] kcm_sendmsg+0x1d3c/0x2124 net/kcm/kcmsock.c:783 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x220/0x2c0 net/socket.c:768 splice_to_socket+0x7cc/0xd58 fs/splice.c:889 do_splice_from fs/splice.c:941 [inline] direct_splice_actor+0xec/0x1d8 fs/splice.c:1164 splice_direct_to_actor+0x438/0xa0c fs/splice.c:1108 do_splice_direct_actor fs/splice.c:1207 [inline] do_splice_direct+0x1e4/0x304 fs/splice.c:1233 do_sendfile+0x460/0xb3c fs/read_write.c:1295 __do_sys_sendfile64 fs/read_write.c:1362 [inline] __se_sys_sendfile64 fs/read_write.c:1348 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1348 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Freed by task 6167: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x5c/0x74 mm/kasan/generic.c:640 poison_slab_object+0x124/0x18c mm/kasan/common.c:241 __kasan_slab_free+0x3c/0x78 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x15c/0x3d4 mm/slub.c:4363 kfree_skbmem+0x10c/0x19c __kfree_skb net/core/skbuff.c:1109 [inline] kfree_skb_reason+0x240/0x6f4 net/core/skbuff.c:1144 kfree_skb include/linux/skbuff.h:1244 [inline] kcm_release+0x104/0x4c8 net/kcm/kcmsock.c:1685 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 The buggy address belongs to the object at ffff0000ced0fc80 which belongs to the cache skbuff_head_cache of size 240 The buggy address is located 0 bytes inside of freed 240-byte region [ffff0000ced0fc80, ffff0000ced0fd70) The buggy address belongs to the physical page: page:00000000d35f4ae4 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10ed0f flags: 0x5ffc00000000800(slab|node=0|zone=2|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 05ffc00000000800 ffff0000c1cbf640 fffffdffc3423100 dead000000000004 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff0000ced0fb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff0000ced0fc00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff0000ced0fc80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff0000ced0fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc ffff0000ced0fd80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b72d86aa5df17ce74c60 Tested-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240815220437.69511-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 + net/kcm/kcmsock.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/net/kcm.h b/include/net/kcm.h index 90279e5e09a5..441e993be634 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -70,6 +70,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + struct mutex tx_mutex; u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 2f191e50d4fc..d4118c796290 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); int err = -EPIPE; + mutex_lock(&kcm->tx_mutex); lock_sock(sk); /* Per tcp_sendmsg this should be in poll */ @@ -926,6 +927,7 @@ partial_message: KCM_STATS_ADD(kcm->stats.tx_bytes, copied); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return copied; out_error: @@ -951,6 +953,7 @@ out_error: sk->sk_write_space(sk); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return err; } @@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->lock); INIT_WORK(&kcm->tx_work, kcm_tx_work); + mutex_init(&kcm->tx_mutex); spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); From 2102bdac67b55bf2d1df4ff757bced74e94a5f74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 21:03:07 -0400 Subject: [PATCH 1267/1523] bcachefs: Extra debug for data move path We don't have sufficient information to debug: https://github.com/koverstreet/bcachefs/issues/726 - print out durability of extent ptrs, when non default - print the number of replicas we need in data_update_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 3 +++ fs/bcachefs/extents.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 6a854c918496..1ca628e93e87 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -475,6 +475,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, bch2_compression_opt_to_text(out, background_compression(*io_opts)); prt_newline(out); + prt_str(out, "opts.replicas:\t"); + prt_u64(out, io_opts->data_replicas); + prt_str(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 4419ad3e454e..9406f82fc255 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1017,6 +1017,8 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_printf(out, "ptr: %u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); + if (ca->mi.durability != 1) + prt_printf(out, " d=%u", ca->mi.durability); if (ptr->cached) prt_str(out, " cached"); if (ptr->unwritten) From 50f2b98dc83de7809a5c5bf0ccf9af2e75c37c13 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 13 Aug 2024 10:59:08 +0100 Subject: [PATCH 1268/1523] MIPS: cevt-r4k: Don't call get_c0_compare_int if timer irq is installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids warning: [ 0.118053] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 Caused by get_c0_compare_int on secondary CPU. We also skipped saving IRQ number to struct clock_event_device *cd as it's never used by clockevent core, as per comments it's only meant for "non CPU local devices". Reported-by: Serge Semin Closes: https://lore.kernel.org/linux-mips/6szkkqxpsw26zajwysdrwplpjvhl5abpnmxgu2xuj3dkzjnvsf@4daqrz4mf44k/ Signed-off-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Serge Semin Tested-by: Serge Semin Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/cevt-r4k.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 368e8475870f..5f6e9e2ebbdb 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -303,13 +303,6 @@ int r4k_clockevent_init(void) if (!c0_compare_int_usable()) return -ENXIO; - /* - * With vectored interrupts things are getting platform specific. - * get_c0_compare_int is a hook to allow a platform to return the - * interrupt number of its liking. - */ - irq = get_c0_compare_int(); - cd = &per_cpu(mips_clockevent_device, cpu); cd->name = "MIPS"; @@ -320,7 +313,6 @@ int r4k_clockevent_init(void) min_delta = calculate_min_delta(); cd->rating = 300; - cd->irq = irq; cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->event_handler = mips_event_handler; @@ -332,6 +324,13 @@ int r4k_clockevent_init(void) cp0_timer_irq_installed = 1; + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of its liking. + */ + irq = get_c0_compare_int(); + if (request_irq(irq, c0_compare_interrupt, flags, "timer", c0_compare_interrupt)) pr_err("Failed to request irq %d (timer)\n", irq); From 1eacdd71b3436b54d5fc8218c4bb0187d92a6892 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 09:54:30 +0200 Subject: [PATCH 1269/1523] netfilter: nft_counter: Disable BH in nft_counter_offload_stats(). The sequence counter nft_counter_seq is a per-CPU counter. There is no lock associated with it. nft_counter_do_eval() is using the same counter and disables BH which suggest that it can be invoked from a softirq. This in turn means that nft_counter_offload_stats(), which disables only preemption, can be interrupted by nft_counter_do_eval() leading to two writer for one seqcount_t. This can lead to loosing stats or reading statistics while they are updated. Disable BH during stats update in nft_counter_offload_stats() to ensure one writer at a time. Fixes: b72920f6e4a9d ("netfilter: nftables: counter hardware offload support") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 291ed2026367..16f40b503d37 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -265,7 +265,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, struct nft_counter *this_cpu; seqcount_t *myseq; - preempt_disable(); + local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); myseq = this_cpu_ptr(&nft_counter_seq); @@ -273,7 +273,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, this_cpu->packets += stats->pkts; this_cpu->bytes += stats->bytes; write_seqcount_end(myseq); - preempt_enable(); + local_bh_enable(); } void nft_counter_init_seqcount(void) From a0b39e2dc7017ac667b70bdeee5293e410fab2fb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 09:54:31 +0200 Subject: [PATCH 1270/1523] netfilter: nft_counter: Synchronize nft_counter_reset() against reader. nft_counter_reset() resets the counter by subtracting the previously retrieved value from the counter. This is a write operation on the counter and as such it requires to be performed with a write sequence of nft_counter_seq to serialize against its possible reader. Update the packets/ bytes within write-sequence of nft_counter_seq. Fixes: d84701ecbcd6a ("netfilter: nft_counter: rework atomic dump and reset") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_counter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 16f40b503d37..eab0dc66bee6 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -107,11 +107,16 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); this_cpu->packets -= total->packets; this_cpu->bytes -= total->bytes; + write_seqcount_end(myseq); local_bh_enable(); } From 46ee21e9f59205e54943dfe51b2dc8a9352ca37d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 16 Aug 2024 09:36:26 -0700 Subject: [PATCH 1271/1523] platform/x86: ISST: Fix return value on last invalid resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When only the last resource is invalid, tpmi_sst_dev_add() is returing error even if there are other valid resources before. This function should return error when there are no valid resources. Here tpmi_sst_dev_add() is returning "ret" variable. But this "ret" variable contains the failure status of last call to sst_main(), which failed for the invalid resource. But there may be other valid resources before the last entry. To address this, do not update "ret" variable for sst_main() return status. If there are no valid resources, it is already checked for by !inst below the loop and -ENODEV is returned. Fixes: 9d1d36268f3d ("platform/x86: ISST: Support partitioned systems") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org # 6.10+ Link: https://lore.kernel.org/r/20240816163626.415762-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6e..404582307109 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. From 4b3e33fcc38f7750604b065c55a43e94c5bc3145 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 15 Aug 2024 17:14:16 +0200 Subject: [PATCH 1272/1523] ip6_tunnel: Fix broken GRO GRO code checks for matching layer 2 headers to see, if packet belongs to the same flow and because ip6 tunnel set dev->hard_header_len this check fails in cases, where it shouldn't. To fix this don't set hard_header_len, but use needed_headroom like ipv4/ip_tunnel.c does. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Thomas Bogendoerfer Link: https://patch.msgid.link/20240815151419.109864-1-tbogendoerfer@suse.de Signed-off-by: Paolo Abeni --- net/ipv6/ip6_tunnel.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9dee0c127955..87dfb565a9f8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1507,7 +1507,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { - dev->hard_header_len = tdev->hard_header_len + t_hlen; + dev->needed_headroom = tdev->hard_header_len + + tdev->needed_headroom + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; @@ -1731,7 +1732,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); + int t_hlen; + t_hlen = tnl->hlen + sizeof(struct ipv6hdr); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; @@ -1740,10 +1743,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { - if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } else { - if (new_mtu > IP_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); @@ -1887,12 +1890,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); From 6275c7bc8dd07644ea8142a1773d826800f0f3f7 Mon Sep 17 00:00:00 2001 From: Ben Whitten Date: Sun, 11 Aug 2024 22:22:11 +0100 Subject: [PATCH 1273/1523] mmc: dw_mmc: allow biu and ciu clocks to defer Fix a race condition if the clock provider comes up after mmc is probed, this causes mmc to fail without retrying. When given the DEFER error from the clk source, pass it on up the chain. Fixes: f90a0612f0e1 ("mmc: dw_mmc: lookup for optional biu and ciu clocks") Signed-off-by: Ben Whitten Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240811212212.123255-1-ben.whitten@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 2333ef4893ee..e9f6e4e62290 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -3299,6 +3299,10 @@ int dw_mci_probe(struct dw_mci *host) host->biu_clk = devm_clk_get(host->dev, "biu"); if (IS_ERR(host->biu_clk)) { dev_dbg(host->dev, "biu clock not available\n"); + ret = PTR_ERR(host->biu_clk); + if (ret == -EPROBE_DEFER) + return ret; + } else { ret = clk_prepare_enable(host->biu_clk); if (ret) { @@ -3310,6 +3314,10 @@ int dw_mci_probe(struct dw_mci *host) host->ciu_clk = devm_clk_get(host->dev, "ciu"); if (IS_ERR(host->ciu_clk)) { dev_dbg(host->dev, "ciu clock not available\n"); + ret = PTR_ERR(host->ciu_clk); + if (ret == -EPROBE_DEFER) + goto err_clk_biu; + host->bus_hz = host->pdata->bus_hz; } else { ret = clk_prepare_enable(host->ciu_clk); From a1e627af32ed60713941cbfc8075d44cad07f6dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2024 11:44:08 +0300 Subject: [PATCH 1274/1523] mmc: mmc_test: Fix NULL dereference on allocation failure If the "test->highmem = alloc_pages()" allocation fails then calling __free_pages(test->highmem) will result in a NULL dereference. Also change the error code to -ENOMEM instead of returning success. Fixes: 2661081f5ab9 ("mmc_test: highmem tests") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/8c90be28-67b4-4b0d-a105-034dc72a0b31@stanley.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_test.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8f7f587a0025..b7f627a9fdea 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3125,13 +3125,13 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL); #ifdef CONFIG_HIGHMEM test->highmem = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, BUFFER_ORDER); + if (!test->highmem) { + count = -ENOMEM; + goto free_test_buffer; + } #endif -#ifdef CONFIG_HIGHMEM - if (test->buffer && test->highmem) { -#else if (test->buffer) { -#endif mutex_lock(&mmc_test_lock); mmc_test_run(test, testcase); mutex_unlock(&mmc_test_lock); @@ -3139,6 +3139,7 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, #ifdef CONFIG_HIGHMEM __free_pages(test->highmem, BUFFER_ORDER); +free_test_buffer: #endif kfree(test->buffer); kfree(test); From 783bf5d09f86b9736605f3e01a3472e55ef98ff8 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Tue, 20 Aug 2024 15:06:58 +0800 Subject: [PATCH 1275/1523] spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register Referring to the errata ERR051608 of I.MX93, LPSPI TCR[PRESCALE] can only be configured to be 0 or 1, other values are not valid and will cause LPSPI to not work. Add the prescale limitation for LPSPI in I.MX93. Other platforms are not affected. Signed-off-by: Carlos Song Link: https://patch.msgid.link/20240820070658.672127-1-carlos.song@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index be261ac09df8..350c5d91d869 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -82,6 +82,10 @@ #define TCR_RXMSK BIT(19) #define TCR_TXMSK BIT(18) +struct fsl_lpspi_devtype_data { + u8 prescale_max; +}; + struct lpspi_config { u8 bpw; u8 chip_select; @@ -119,10 +123,25 @@ struct fsl_lpspi_data { bool usedma; struct completion dma_rx_completion; struct completion dma_tx_completion; + + const struct fsl_lpspi_devtype_data *devtype_data; +}; + +/* + * ERR051608 fixed or not: + * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf + */ +static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { + .prescale_max = 1, +}; + +static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { + .prescale_max = 8, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { - { .compatible = "fsl,imx7ulp-spi", }, + { .compatible = "fsl,imx7ulp-spi", .data = &imx7ulp_lpspi_devtype_data,}, + { .compatible = "fsl,imx93-spi", .data = &imx93_lpspi_devtype_data,}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids); @@ -297,9 +316,11 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; unsigned int perclk_rate, scldiv, div; + u8 prescale_max; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); + prescale_max = fsl_lpspi->devtype_data->prescale_max; if (!config.speed_hz) { dev_err(fsl_lpspi->dev, @@ -315,7 +336,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale < 8; prescale++) { + for (prescale = 0; prescale < prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; @@ -822,6 +843,7 @@ static int fsl_lpspi_init_rpm(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_probe(struct platform_device *pdev) { + const struct fsl_lpspi_devtype_data *devtype_data; struct fsl_lpspi_data *fsl_lpspi; struct spi_controller *controller; struct resource *res; @@ -830,6 +852,10 @@ static int fsl_lpspi_probe(struct platform_device *pdev) u32 temp; bool is_target; + devtype_data = of_device_get_match_data(&pdev->dev); + if (!devtype_data) + return -ENODEV; + is_target = of_property_read_bool((&pdev->dev)->of_node, "spi-slave"); if (is_target) controller = devm_spi_alloc_target(&pdev->dev, @@ -848,6 +874,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->is_target = is_target; fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node, "fsl,spi-only-use-cs1-sel"); + fsl_lpspi->devtype_data = devtype_data; init_completion(&fsl_lpspi->xfer_done); From fc59b9a5f7201b9f7272944596113a82cc7773d5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:10 +0300 Subject: [PATCH 1276/1523] bonding: fix bond_ipsec_offload_ok return type Fix the return type which should be bool. Fixes: 955b785ec6b3 ("bonding: fix suspicious RCU usage in bond_ipsec_offload_ok()") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1cd92c12e782..85b5868deeea 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -599,34 +599,28 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) struct net_device *real_dev; struct slave *curr_active; struct bonding *bond; - int err; + bool ok = false; bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); real_dev = curr_active->dev; - if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - err = false; + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) goto out; - } - if (!xs->xso.real_dev) { - err = false; + if (!xs->xso.real_dev) goto out; - } if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(real_dev)) { - err = false; + netif_is_bond_master(real_dev)) goto out; - } - err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); out: rcu_read_unlock(); - return err; + return ok; } static const struct xfrmdev_ops bond_xfrmdev_ops = { From 95c90e4ad89d493a7a14fa200082e466e2548f9d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:11 +0300 Subject: [PATCH 1277/1523] bonding: fix null pointer deref in bond_ipsec_offload_ok We must check if there is an active slave before dereferencing the pointer. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 85b5868deeea..65ddb71eebcd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -604,6 +604,8 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); + if (!curr_active) + goto out; real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) From f8cde9805981c50d0c029063dc7d82821806fc44 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:12 +0300 Subject: [PATCH 1278/1523] bonding: fix xfrm real_dev null pointer dereference We shouldn't set real_dev to NULL because packets can be in transit and xfrm might call xdo_dev_offload_ok() in parallel. All callbacks assume real_dev is set. Example trace: kernel: BUG: unable to handle page fault for address: 0000000000001030 kernel: bond0: (slave eni0np1): making interface the new active one kernel: #PF: supervisor write access in kernel mode kernel: #PF: error_code(0x0002) - not-present page kernel: PGD 0 P4D 0 kernel: Oops: 0002 [#1] PREEMPT SMP kernel: CPU: 4 PID: 2237 Comm: ping Not tainted 6.7.7+ #12 kernel: Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 kernel: RIP: 0010:nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: Code: e0 0f 0b 48 83 7f 38 00 74 de 0f 0b 48 8b 47 08 48 8b 37 48 8b 78 40 e9 b2 e5 9a d7 66 90 0f 1f 44 00 00 48 8b 86 80 02 00 00 <83> 80 30 10 00 00 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f kernel: bond0: (slave eni0np1): making interface the new active one kernel: RSP: 0018:ffffabde81553b98 EFLAGS: 00010246 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: kernel: RAX: 0000000000000000 RBX: ffff9eb404e74900 RCX: ffff9eb403d97c60 kernel: RDX: ffffffffc090de10 RSI: ffff9eb404e74900 RDI: ffff9eb3c5de9e00 kernel: RBP: ffff9eb3c0a42000 R08: 0000000000000010 R09: 0000000000000014 kernel: R10: 7974203030303030 R11: 3030303030303030 R12: 0000000000000000 kernel: R13: ffff9eb3c5de9e00 R14: ffffabde81553cc8 R15: ffff9eb404c53000 kernel: FS: 00007f2a77a3ad00(0000) GS:ffff9eb43bd00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000001030 CR3: 00000001122ab000 CR4: 0000000000350ef0 kernel: bond0: (slave eni0np1): making interface the new active one kernel: Call Trace: kernel: kernel: ? __die+0x1f/0x60 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ? page_fault_oops+0x142/0x4c0 kernel: ? do_user_addr_fault+0x65/0x670 kernel: ? kvm_read_and_reset_apf_flags+0x3b/0x50 kernel: bond0: (slave eni0np1): making interface the new active one kernel: ? exc_page_fault+0x7b/0x180 kernel: ? asm_exc_page_fault+0x22/0x30 kernel: ? nsim_bpf_uninit+0x50/0x50 [netdevsim] kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ? nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] kernel: bond0: (slave eni0np1): making interface the new active one kernel: bond_ipsec_offload_ok+0x7b/0x90 [bonding] kernel: xfrm_output+0x61/0x3b0 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ip_push_pending_frames+0x56/0x80 Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 65ddb71eebcd..f74bacf071fc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -582,7 +582,6 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) } else { slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); } - ipsec->xs->xso.real_dev = NULL; } spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); From c4c5c5d2ef40a9f67a9241dc5422eac9ffe19547 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:13 +0300 Subject: [PATCH 1279/1523] bonding: fix xfrm state handling when clearing active slave If the active slave is cleared manually the xfrm state is not flushed. This leads to xfrm add/del imbalance and adding the same state multiple times. For example when the device cannot handle anymore states we get: [ 1169.884811] bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA because it's filled with the same state after multiple active slave clearings. This change also has a few nice side effects: user-space gets a notification for the change, the old device gets its mac address and promisc/mcast adjusted properly. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index bc80fb6397dc..95d59a18c022 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -936,7 +936,7 @@ static int bond_option_active_slave_set(struct bonding *bond, /* check to see if we are clearing active */ if (!slave_dev) { netdev_dbg(bond->dev, "Clearing current active slave\n"); - RCU_INIT_POINTER(bond->curr_active_slave, NULL); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); } else { struct slave *old_active = rtnl_dereference(bond->curr_active_slave); From 7586fc52b14e0b8edd0d1f8a434e0de2078b7b2b Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:30 +0000 Subject: [PATCH 1280/1523] drm/xe: Fix missing workqueue destroy in xe_gt_pagefault On driver reload we never free up the memory for the pagefault and access counter workqueues. Add those destroy calls here. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Stuart Summers Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/c9a951505271dc3a7aee76de7656679f69c11518.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..b2a7fa55bd18 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) From 3338e4f90c143cf32f77d64f464cb7f2c2d24700 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:31 +0000 Subject: [PATCH 1281/1523] drm/xe: Use topology to determine page fault queue size Currently the page fault queue size is hard coded. However the hardware supports faulting for each EU and each CS. For some applications running on hardware with a large number of EUs and CSs, this can result in an overflow of the page fault queue. Add a small calculation to determine the page fault queue size based on the number of EUs and CSs in the platform as detmined by fuses. Signed-off-by: Stuart Summers Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/24d582a3b48c97793b8b6a402f34b4b469471636.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 54 +++++++++++++++++++++------- drivers/gpu/drm/xe/xe_gt_types.h | 9 +++-- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index b2a7fa55bd18..401c0527d914 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) PFD_VIRTUAL_ADDR_LO_SHIFT; pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - PF_QUEUE_NUM_DW; + pf_queue->num_dw; ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, + pf_queue->num_dw) <= PF_MSG_LEN_DW; } @@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); - if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); + spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; + pf_queue->head = (pf_queue->head + len) % + pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -386,26 +388,54 @@ static void pagefault_fini(void *arg) { struct xe_gt *gt = arg; struct xe_device *xe = gt_to_xe(gt); + int i; if (!xe->info.has_usm) return; destroy_workqueue(gt->usm.acc_wq); destroy_workqueue(gt->usm.pf_wq); + + for (i = 0; i < NUM_PF_QUEUE; ++i) + kfree(gt->usm.pf_queue[i].data); +} + +static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) +{ + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + /* user can issue separate page faults per EU and per CS */ + pf_queue->num_dw = + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + + pf_queue->gt = gt; + pf_queue->data = kzalloc(pf_queue->num_dw, GFP_KERNEL); + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, pf_queue_work_func); + + return 0; } int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int i; + int i, ret = 0; if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { - gt->usm.pf_queue[i].gt = gt; - spin_lock_init(>->usm.pf_queue[i].lock); - INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); + if (ret) + return ret; } for (i = 0; i < NUM_ACC_QUEUE; ++i) { gt->usm.acc_queue[i].gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 82cf9f631736..31946d7fe701 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -247,9 +247,14 @@ struct xe_gt { struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; -#define PF_QUEUE_NUM_DW 128 /** @usm.pf_queue.data: data in the page fault queue */ - u32 data[PF_QUEUE_NUM_DW]; + u32 *data; + /** + * @usm.pf_queue.num_dw: number of DWORDS in the page + * fault queue. Dynamically calculated based on the number + * of compute resources available. + */ + u32 num_dw; /** * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * moved by worker which processes faults (consumer). From df2dbc925fad3274b952b865f85d26d1e056c1cc Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:32 +0000 Subject: [PATCH 1282/1523] drm/xe/guc: Bump the G2H queue size to account for page faults With the increase in the size of the recoverable page fault queue, we want to ensure the initial messages from GuC in the G2H buffer have space while we transfer those out to the actual pf_queue. Bump the G2H queue size to account for this increase in the pf_queue size. Reviewed-by: Matthew Brost Signed-off-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/4c2b6974801bcffd8a010d838c8733fa4092573d.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index beeeb120d1fc..f24dd5223926 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct) * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. + * + * In addition to submissions, the G2H buffer needs to be able to hold + * enough space for recoverable page fault notifications. The number of + * page faults is interrupt driven and can be as much as the number of + * compute resources available. However, most of the actual work for these + * is in a separate page fault worker thread. Therefore we only need to + * make sure the queue has enough space to handle all of the submissions + * and responses and an extra buffer for incoming page faults. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) -#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) +#define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full From 60db6f540af9f93144d5039140aa2ed17171d168 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 12:35:22 -0700 Subject: [PATCH 1283/1523] drm/xe: Drop HW fence pointer to HW fence ctx The HW fence ctx objects are not ref counted rather tied to the life of an LRC object. HW fences reference the HW fence ctx, HW fences can outlive LRCs thus resulting in UAF. Drop the HW fence pointer to HW fence ctx rather just store what is needed directly in HW fence. v2: - Fix typo in commit (Ashutosh) - Use snprintf (Ashutosh) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240815193522.16008-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_fence.c | 9 +++++---- drivers/gpu/drm/xe/xe_hw_fence_types.h | 7 +++++-- drivers/gpu/drm/xe/xe_trace.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include #include +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 1abdb30cb7ad..8573d7a87d84 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) From 50b2143356e888777fc5bca023c39f34f404613a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:24 +0200 Subject: [PATCH 1284/1523] ice: fix page reuse when PAGE_SIZE is over 8k Architectures that have PAGE_SIZE >= 8192 such as arm64 should act the same as x86 currently, meaning reuse of a page should only take place when no one else is busy with it. Do two things independently of underlying PAGE_SIZE: - store the page count under ice_rx_buf::pgcnt - then act upon its value vs ice_rx_buf::pagecnt_bias when making the decision regarding page reuse Fixes: 2b245cb29421 ("ice: Implement transmit and NAPI support") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8d25b6981269..50211188c1a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -837,16 +837,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (!dev_page_is_reusable(page)) return false; -#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; -#else +#if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; -#endif /* PAGE_SIZE < 8192) */ +#endif /* PAGE_SIZE >= 8192) */ /* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the @@ -949,12 +948,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buf->page); -#else - 0; -#endif + rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) From b966ad832942b5a11e002f9b5ef102b08425b84a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:25 +0200 Subject: [PATCH 1285/1523] ice: fix ICE_LAST_OFFSET formula For bigger PAGE_SIZE archs, ice driver works on 3k Rx buffers. Therefore, ICE_LAST_OFFSET should take into account ICE_RXBUF_3072, not ICE_RXBUF_2048. Fixes: 7237f5b0dba4 ("ice: introduce legacy Rx flag") Suggested-by: Luiz Capitulino Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 50211188c1a7..4b690952bb40 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -842,7 +842,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) return false; #if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; #endif /* PAGE_SIZE >= 8192) */ From d53d4dcce69be5773e2d0878c9899ebfbf58c393 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:26 +0200 Subject: [PATCH 1286/1523] ice: fix truesize operations for PAGE_SIZE >= 8192 When working on multi-buffer packet on arch that has PAGE_SIZE >= 8192, truesize is calculated and stored in xdp_buff::frame_sz per each processed Rx buffer. This means that frame_sz will contain the truesize based on last received buffer, but commit 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") assumed this value will be constant for each buffer, which breaks the page recycling scheme and mess up the way we update the page::page_offset. To fix this, let us work on constant truesize when PAGE_SIZE >= 8192 instead of basing this on size of a packet read from Rx descriptor. This way we can simplify the code and avoid calculating truesize per each received frame and on top of that when using xdp_update_skb_shared_info(), current formula for truesize update will be valid. This means ice_rx_frame_truesize() can be removed altogether. Furthermore, first call to it within ice_clean_rx_irq() for 4k PAGE_SIZE was redundant as xdp_buff::frame_sz is initialized via xdp_init_buff() in ice_vsi_cfg_rxq(). This should have been removed at the point where xdp_buff struct started to be a member of ice_rx_ring and it was no longer a stack based variable. There are two fixes tags as my understanding is that the first one exposed us to broken truesize and page_offset handling and then second introduced broken skb_shared_info update in ice_{construct,build}_skb(). Reported-and-tested-by: Luiz Capitulino Closes: https://lore.kernel.org/netdev/8f9e2a5c-fd30-4206-9311-946a06d031bb@redhat.com/ Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 21 ++++++++++++++- drivers/net/ethernet/intel/ice/ice_txrx.c | 33 ----------------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1facf179a96f..f448d3a84564 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -512,6 +512,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) xsk_pool_fill_cb(ring->xsk_pool, &desc); } +/** + * ice_get_frame_sz - calculate xdp_buff::frame_sz + * @rx_ring: the ring being configured + * + * Return frame size based on underlying PAGE_SIZE + */ +static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) +{ + unsigned int frame_sz; + +#if (PAGE_SIZE >= 8192) + frame_sz = rx_ring->rx_buf_len; +#else + frame_sz = ice_rx_pg_size(rx_ring) / 2; +#endif + + return frame_sz; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } - xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 4b690952bb40..c9bc3f1add5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -521,30 +521,6 @@ err: return -ENOMEM; } -/** - * ice_rx_frame_truesize - * @rx_ring: ptr to Rx ring - * @size: size - * - * calculate the truesize with taking into the account PAGE_SIZE of - * underlying arch - */ -static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} - /** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring @@ -1154,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) bool failure; u32 first; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); -#endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1217,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); -#endif xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { break; From 503ab6ee40fc103ea55cc9e50bb879e571d65aac Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 19 Aug 2024 09:17:42 +0200 Subject: [PATCH 1287/1523] ice: use internal pf id instead of function number Use always the same pf id in devlink port number. When doing pass-through the PF to VM bus info func number can be any value. Fixes: 2ae0aa4758b0 ("ice: Move devlink port to PF/VF struct") Reviewed-by: Wojciech Drewek Suggested-by: Jiri Pirko Signed-off-by: Michal Swiatkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c index 00fed5a61d62..62ef8e2fb5f1 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -337,7 +337,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) return -EIO; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; + attrs.phys.port_number = pf->hw.pf_id; /* As FW supports only port split options for whole device, * set port split options only for first PF. @@ -455,7 +455,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) return -EINVAL; attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.pf = pf->hw.pf_id; attrs.pci_vf.vf = vf->vf_id; ice_devlink_set_switch_id(pf, &attrs.switch_id); From c50e7475961c36ec4d21d60af055b32f9436b431 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 17 Aug 2024 09:52:46 +0300 Subject: [PATCH 1288/1523] dpaa2-switch: Fix error checking in dpaa2_switch_seed_bp() The dpaa2_switch_add_bufs() function returns the number of bufs that it was able to add. It returns BUFS_PER_CMD (7) for complete success or a smaller number if there are not enough pages available. However, the error checking is looking at the total number of bufs instead of the number which were added on this iteration. Thus the error checking only works correctly for the first iteration through the loop and subsequent iterations are always counted as a success. Fix this by checking only the bufs added in the current iteration. Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Ioana Ciornei Tested-by: Ioana Ciornei Link: https://patch.msgid.link/eec27f30-b43f-42b6-b8ee-04a6f83423b6@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a71f848adc05..a293b08f36d4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) { - int *count, i; + int *count, ret, i; for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { + ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); count = ðsw->buf_count; - *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + *count += ret; - if (unlikely(*count < BUFS_PER_CMD)) + if (unlikely(ret < BUFS_PER_CMD)) return -ENOMEM; } From 80a1e7b83bb1834b5568a3872e64c05795d88f31 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Mon, 19 Aug 2024 10:54:08 +0300 Subject: [PATCH 1289/1523] cxgb4: add forgotten u64 ivlan cast before shift It is done everywhere in cxgb4 code, e.g. in is_filter_exact_match() There is no reason it should not be done here Found by Linux Verification Center (linuxtesting.org) with SVACE Signed-off-by: Nikolay Kuratov Cc: stable@vger.kernel.org Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240819075408.92378-1-kniv@yandex-team.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 786ceae34488..dd9e68465e69 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1244,7 +1244,8 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs, * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && fs->mask.ivlan) - ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + ntuple |= (u64)(FT_VLAN_VLD_F | + fs->val.ivlan) << tp->vlan_shift; if (tp->port_shift >= 0 && fs->mask.iport) ntuple |= (u64)fs->val.iport << tp->port_shift; From 8aba27c4a5020abdf60149239198297f88338a8d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 16 Aug 2024 17:20:34 +0200 Subject: [PATCH 1290/1523] igb: cope with large MAX_SKB_FRAGS Sabrina reports that the igb driver does not cope well with large MAX_SKB_FRAG values: setting MAX_SKB_FRAG to 45 causes payload corruption on TX. An easy reproducer is to run ssh to connect to the machine. With MAX_SKB_FRAGS=17 it works, with MAX_SKB_FRAGS=45 it fails. This has been reported originally in https://bugzilla.redhat.com/show_bug.cgi?id=2265320 The root cause of the issue is that the driver does not take into account properly the (possibly large) shared info size when selecting the ring layout, and will try to fit two packets inside the same 4K page even when the 1st fraglist will trump over the 2nd head. Address the issue by checking if 2K buffers are insufficient. Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") Reported-by: Jan Tluka Reported-by: Jirka Hladky Reported-by: Sabrina Dubroca Tested-by: Sabrina Dubroca Tested-by: Corinna Vinschen Signed-off-by: Paolo Abeni Signed-off-by: Corinna Vinschen Link: https://patch.msgid.link/20240816152034.1453285-1-vinschen@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 11be39f435f3..33a42b4c21e0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4808,6 +4808,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, #if (PAGE_SIZE < 8192) if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + IGB_2K_TOO_SMALL_WITH_PADDING || rd32(E1000_RCTL) & E1000_RCTL_SBP) set_ring_uses_large_buffer(rx_ring); #endif From 6efea5135417ae8194485d1d05ea79a21cf1a11c Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Sat, 17 Aug 2024 10:41:41 +0100 Subject: [PATCH 1291/1523] net: dsa: microchip: fix PTP config failure when using multiple ports When performing the port_hwtstamp_set operation, ptp_schedule_worker() will be called if hardware timestamoing is enabled on any of the ports. When using multiple ports for PTP, port_hwtstamp_set is executed for each port. When called for the first time ptp_schedule_worker() returns 0. On subsequent calls it returns 1, indicating the worker is already scheduled. Currently the ksz driver treats 1 as an error and fails to complete the port_hwtstamp_set operation, thus leaving the timestamping configuration for those ports unchanged. This patch fixes this by ignoring the ptp_schedule_worker() return value. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/7aae307a-35ca-4209-a850-7b2749d40f90@martin-whitaker.me.uk Fixes: bb01ad30570b0 ("net: dsa: microchip: ptp: manipulating absolute time using ptp hw clock") Signed-off-by: Martin Whitaker Reviewed-by: Andrew Lunn Acked-by: Arun Ramadoss Link: https://patch.msgid.link/20240817094141.3332-1-foss@martin-whitaker.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_ptp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index f0bd46e5d4ec..050f17c43ef6 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -266,7 +266,6 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) struct ksz_port *prt; struct dsa_port *dp; bool tag_en = false; - int ret; dsa_switch_for_each_user_port(dp, dev->ds) { prt = &dev->ports[dp->index]; @@ -277,9 +276,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) } if (tag_en) { - ret = ptp_schedule_worker(ptp_data->clock, 0); - if (ret) - return ret; + ptp_schedule_worker(ptp_data->clock, 0); } else { ptp_cancel_worker_sync(ptp_data->clock); } From 528876d867a23b5198022baf2e388052ca67c952 Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Mon, 19 Aug 2024 19:52:50 -0400 Subject: [PATCH 1292/1523] net: dsa: mv88e6xxx: Fix out-of-bound access If an ATU violation was caused by a CPU Load operation, the SPID could be larger than DSA_MAX_PORTS (the size of mv88e6xxx_chip.ports[] array). Fixes: 75c05a74e745 ("net: dsa: mv88e6xxx: Fix counting of ATU violations") Signed-off-by: Joseph Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240819235251.1331763-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global1_atu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index ce3b3690c3c0..c47f068f56b3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -457,7 +457,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) trace_mv88e6xxx_atu_full_violation(chip->dev, spid, entry.portvec, entry.mac, fid); - chip->ports[spid].atu_full_violation++; + if (spid < ARRAY_SIZE(chip->ports)) + chip->ports[spid].atu_full_violation++; } return IRQ_HANDLED; From c07ff8592d57ed258afee5a5e04991a48dbaf382 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 19 Aug 2024 10:56:45 -0700 Subject: [PATCH 1293/1523] netem: fix return value if duplicate enqueue fails There is a bug in netem_enqueue() introduced by commit 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") that can lead to a use-after-free. This commit made netem_enqueue() always return NET_XMIT_SUCCESS when a packet is duplicated, which can cause the parent qdisc's q.qlen to be mistakenly incremented. When this happens qlen_notify() may be skipped on the parent during destruction, leaving a dangling pointer for some classful qdiscs like DRR. There are two ways for the bug happen: - If the duplicated packet is dropped by rootq->enqueue() and then the original packet is also dropped. - If rootq->enqueue() sends the duplicated packet to a different qdisc and the original packet is dropped. In both cases NET_XMIT_SUCCESS is returned even though no packets are enqueued at the netem qdisc. The fix is to defer the enqueue of the duplicate packet until after the original packet has been guaranteed to return NET_XMIT_SUCCESS. Fixes: 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") Reported-by: Budimir Markovic Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240819175753.5151-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 47 ++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edc72962ae63..0f8d581438c3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; - struct sk_buff *skb2; + struct sk_buff *skb2 = NULL; struct sk_buff *segs = NULL; unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; - int rc = NET_XMIT_SUCCESS; - int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_orphan_partial(skb); /* - * If we need to duplicate packet, then re-insert at top of the - * qdisc tree, since parent queuer expects that only one - * skb will be queued. + * If we need to duplicate packet, then clone it before + * original is modified. */ - if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - - q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; - rc_drop = NET_XMIT_SUCCESS; - } + if (count > 1) + skb2 = skb_clone(skb, GFP_ATOMIC); /* * Randomized packet corruption. @@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) - return rc_drop; + goto finish_segs; + segs = skb->next; skb_mark_not_on_list(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); - return rc_drop; + if (skb2) + __qdisc_drop(skb2, to_free); + return NET_XMIT_DROP; + } + + /* + * If doing duplication then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (skb2) { + struct Qdisc *rootq = qdisc_root_bh(sch); + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + + q->duplicate = 0; + rootq->enqueue(skb2, rootq, to_free); + q->duplicate = dupsave; + skb2 = NULL; } qdisc_qstats_backlog_inc(sch, skb); @@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } finish_segs: + if (skb2) + __qdisc_drop(skb2, to_free); + if (segs) { unsigned int len, last_len; - int nb; + int rc, nb; len = skb ? skb->len : 0; nb = skb ? 1 : 0; From 0005e01e1e875c5e27130c5e2ed0189749d1e08a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 20 Aug 2024 16:56:19 +0800 Subject: [PATCH 1294/1523] erofs: fix out-of-bound access when z_erofs_gbuf_growsize() partially fails If z_erofs_gbuf_growsize() partially fails on a global buffer due to memory allocation failure or fault injection (as reported by syzbot [1]), new pages need to be freed by comparing to the existing pages to avoid memory leaks. However, the old gbuf->pages[] array may not be large enough, which can lead to null-ptr-deref or out-of-bound access. Fix this by checking against gbuf->nrpages in advance. [1] https://lore.kernel.org/r/000000000000f7b96e062018c6e3@google.com Reported-by: syzbot+242ee56aaa9585553766@syzkaller.appspotmail.com Fixes: d6db47e571dc ("erofs: do not use pagepool in z_erofs_gbuf_growsize()") Cc: # 6.10+ Reviewed-by: Chunhai Guo Reviewed-by: Sandeep Dhavale Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240820085619.1375963-1-hsiangkao@linux.alibaba.com --- fs/erofs/zutil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 9b53883e5caf..37afe2024840 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -111,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } From e255683c06df572ead96db5efb5d21be30c0efaa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:19 +0200 Subject: [PATCH 1295/1523] mptcp: pm: re-using ID of unused removed ADD_ADDR If no subflow is attached to the 'signal' endpoint that is being removed, the addr ID will not be marked as available again. Mark the linked ID as available when removing the address entry from the list to cover this case. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-1-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4cae2aa7be5c..26f0329e16bb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1431,7 +1431,10 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_signaled -= ret; + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } From a13d5aad4dd9a309eecdc33cfd75045bd5f376a3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:20 +0200 Subject: [PATCH 1296/1523] selftests: mptcp: join: check re-using ID of unused ADD_ADDR This test extends "delete re-add signal" to validate the previous commit. An extra address is announced by the server, but this address cannot be used by the client. The result is that no subflow will be established to this address. Later, the server will delete this extra endpoint, and set a new one, with a valid address, but re-using the same ID. Before the previous commit, the server would not have been able to announce this new address. While at it, extra checks have been added to validate the expected numbers of MPJ, ADD_ADDR and RM_ADDR. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-2-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ea6d698e9d3..25077ccf31d2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3601,9 +3601,11 @@ endpoint_tests() # remove and re-add if reset "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3615,15 +3617,21 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 - chk_mptcp_info subflows 1 subflows 1 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_add_nr 4 4 + chk_rm_nr 2 1 invert fi } From edd8b5d868a4d459f3065493001e293901af758d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:21 +0200 Subject: [PATCH 1297/1523] mptcp: pm: re-using ID of unused removed subflows If no subflow is attached to the 'subflow' endpoint that is being removed, the addr ID will not be marked as available again. Mark the linked ID as available when removing the 'subflow' endpoint if no subflow is attached to it. While at it, the local_addr_used counter is decremented if the ID was marked as being used to reflect the reality, but also to allow adding new endpoints after that. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-3-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 26f0329e16bb..8b232a210a06 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1469,8 +1469,17 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - if (remove_subflow) + + if (remove_subflow) { mptcp_pm_remove_subflow(msk, &list); + } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + /* If the subflow has been used, but now closed */ + spin_lock_bh(&msk->pm.lock); + if (!__test_and_set_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + msk->pm.local_addr_used--; + spin_unlock_bh(&msk->pm.lock); + } + release_sock(sk); next: From 65fb58afa341ad68e71e5c4d816b407e6a683a66 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:22 +0200 Subject: [PATCH 1298/1523] selftests: mptcp: join: check re-using ID of closed subflow This test extends "delete and re-add" to validate the previous commit. A new 'subflow' endpoint is added, but the subflow request will be rejected. The result is that no subflow will be established from this address. Later, the endpoint is removed and re-added after having cleared the firewall rule. Before the previous commit, the client would not have been able to create this new subflow. While at it, extra checks have been added to validate the expected numbers of MPJ and RM_ADDR. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-4-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 25077ccf31d2..fbb0174145ad 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -436,9 +436,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -3571,10 +3572,10 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & @@ -3591,11 +3592,27 @@ endpoint_tests() chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_rm_nr 1 1 fi # remove and re-add From ef34a6ea0cab1800f4b3c9c3c2cefd5091e03379 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:23 +0200 Subject: [PATCH 1299/1523] mptcp: pm: re-using ID of unused flushed subflows If no subflows are attached to the 'subflow' endpoints that are being flushed, the corresponding addr IDs will not be marked as available again. Mark all ID as being available when flushing all the 'subflow' endpoints, and reset local_addr_used counter to cover these cases. Note that mptcp_pm_remove_addrs_and_subflows() helper is only called for flushing operations, not to remove a specific set of addresses and subflows. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-5-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8b232a210a06..2c26696b820e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1623,8 +1623,15 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } + if (slist.nr) mptcp_pm_remove_subflow(msk, &slist); + + /* Reset counters: maybe some subflows have been removed before */ + spin_lock_bh(&msk->pm.lock); + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); } static void mptcp_nl_remove_addrs_list(struct net *net, From e06959e9eebdfea4654390f53b65cff57691872e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:24 +0200 Subject: [PATCH 1300/1523] selftests: mptcp: join: test for flush/re-add endpoints After having flushed endpoints that didn't cause the creation of new subflows, it is important to check endpoints can be re-created, re-using previously used IDs. Before the previous commit, the client would not have been able to re-create the subflow that was previously rejected. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-6-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fbb0174145ad..f609c02c6123 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3651,6 +3651,36 @@ endpoint_tests() chk_rm_nr 2 1 invert fi + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert + fi } # [$1: error message] From f448451aa62d54be16acb0034223c17e0d12bc69 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:25 +0200 Subject: [PATCH 1301/1523] mptcp: pm: remove mptcp_pm_remove_subflow() This helper is confusing. It is in pm.c, but it is specific to the in-kernel PM and it cannot be used by the userspace one. Also, it simply calls one in-kernel specific function with the PM lock, while the similar mptcp_pm_remove_addr() helper requires the PM lock. What's left is the pr_debug(), which is not that useful, because a similar one is present in the only function called by this helper: mptcp_pm_nl_rm_subflow_received() After these modifications, this helper can be marked as 'static', and the lock can be taken only once in mptcp_pm_flush_addrs_and_subflows(). Note that it is not a bug fix, but it will help backporting the following commits. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-7-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 10 ---------- net/mptcp/pm_netlink.c | 16 +++++++--------- net/mptcp/protocol.h | 3 --- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 23bb89c94e90..925123e99889 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) -{ - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); - - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, rm_list); - spin_unlock_bh(&msk->pm.lock); - return 0; -} - /* path manager event handlers */ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 2c26696b820e..44fc1c5959ac 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -857,8 +857,8 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) +static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } @@ -1471,7 +1471,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); if (remove_subflow) { - mptcp_pm_remove_subflow(msk, &list); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { /* If the subflow has been used, but now closed */ spin_lock_bh(&msk->pm.lock); @@ -1617,18 +1619,14 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, alist.ids[alist.nr++] = entry->addr.id; } + spin_lock_bh(&msk->pm.lock); if (alist.nr) { - spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); } - if (slist.nr) - mptcp_pm_remove_subflow(msk, &slist); - + mptcp_pm_nl_rm_subflow_received(msk, &slist); /* Reset counters: maybe some subflows have been removed before */ - spin_lock_bh(&msk->pm.lock); bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); msk->pm.local_addr_used = 0; spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60c6b073d65f..a1c1b0ff1ce1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1026,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -1133,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); From 322ea3778965da72862cca2a0c50253aacf65fe6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:26 +0200 Subject: [PATCH 1302/1523] mptcp: pm: only mark 'subflow' endp as available Adding the following warning ... WARN_ON_ONCE(msk->pm.local_addr_used == 0) ... before decrementing the local_addr_used counter helped to find a bug when running the "remove single address" subtest from the mptcp_join.sh selftests. Removing a 'signal' endpoint will trigger the removal of all subflows linked to this endpoint via mptcp_pm_nl_rm_addr_or_subflow() with rm_type == MPTCP_MIB_RMSUBFLOW. This will decrement the local_addr_used counter, which is wrong in this case because this counter is linked to 'subflow' endpoints, and here it is a 'signal' endpoint that is being removed. Now, the counter is decremented, only if the ID is being used outside of mptcp_pm_nl_rm_addr_or_subflow(), only for 'subflow' endpoints, and if the ID is not 0 -- local_addr_used is not taking into account these ones. This marking of the ID as being available, and the decrement is done no matter if a subflow using this ID is currently available, because the subflow could have been closed before. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-8-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 44fc1c5959ac..4cf7cc851f80 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -833,10 +833,10 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); - else if (rm_type == MPTCP_MIB_RMADDR) + + if (rm_type == MPTCP_MIB_RMADDR) __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (!removed) continue; @@ -846,8 +846,6 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMADDR) { msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); - } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { - msk->pm.local_addr_used--; } } } @@ -1441,6 +1439,14 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, return ret; } +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { @@ -1474,11 +1480,11 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); spin_unlock_bh(&msk->pm.lock); - } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - /* If the subflow has been used, but now closed */ + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { spin_lock_bh(&msk->pm.lock); - if (!__test_and_set_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - msk->pm.local_addr_used--; + __mark_subflow_endp_available(msk, list.ids[0]); spin_unlock_bh(&msk->pm.lock); } @@ -1516,6 +1522,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1917,6 +1924,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); } From 1c1f721375989579e46741f59523e39ec9b2a9bd Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:27 +0200 Subject: [PATCH 1303/1523] mptcp: pm: only decrement add_addr_accepted for MPJ req Adding the following warning ... WARN_ON_ONCE(msk->pm.add_addr_accepted == 0) ... before decrementing the add_addr_accepted counter helped to find a bug when running the "remove single subflow" subtest from the mptcp_join.sh selftest. Removing a 'subflow' endpoint will first trigger a RM_ADDR, then the subflow closure. Before this patch, and upon the reception of the RM_ADDR, the other peer will then try to decrement this add_addr_accepted. That's not correct because the attached subflows have not been created upon the reception of an ADD_ADDR. A way to solve that is to decrement the counter only if the attached subflow was an MP_JOIN to a remote id that was not 0, and initiated by the host receiving the RM_ADDR. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-9-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4cf7cc851f80..882781571c7b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -829,7 +829,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - removed = true; + removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } @@ -843,7 +843,11 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (!mptcp_pm_is_kernel(msk)) continue; - if (rm_type == MPTCP_MIB_RMADDR) { + if (rm_type == MPTCP_MIB_RMADDR && rm_id && + !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); } From 0137a3c7c2ea3f9df8ebfc65d78b4ba712a187bb Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:28 +0200 Subject: [PATCH 1304/1523] mptcp: pm: check add_addr_accept_max before accepting new ADD_ADDR The limits might have changed in between, it is best to check them before accepting new ADD_ADDR. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-10-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 882781571c7b..28a9a3726146 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -848,8 +848,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, /* Note: if the subflow has been closed before, this * add_addr_accepted counter will not be decremented. */ - msk->pm.add_addr_accepted--; - WRITE_ONCE(msk->pm.accept_addr, true); + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); } } } From ca6e55a703ca2894611bb5c5bca8bfd2290fd91e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:29 +0200 Subject: [PATCH 1305/1523] mptcp: pm: only in-kernel cannot have entries with ID 0 The ID 0 is specific per MPTCP connections. The per netns entries cannot have this special ID 0 then. But that's different for the userspace PM where the entries are per connection, they can then use this special ID 0. Fixes: f40be0db0b76 ("mptcp: unify pm get_flags_and_ifindex_by_id") Cc: stable@vger.kernel.org Acked-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-11-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 --- net/mptcp/pm_netlink.c | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 925123e99889..3e6e0f5510bb 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -434,9 +434,6 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id *flags = 0; *ifindex = 0; - if (!id) - return 0; - if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 28a9a3726146..d0a80f537fc3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1395,6 +1395,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + /* No entries with ID 0 */ + if (id == 0) + return 0; + rcu_read_lock(); entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { From 09355f7abb9fbfc1a240be029837921ea417bf4f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:30 +0200 Subject: [PATCH 1306/1523] mptcp: pm: fullmesh: select the right ID later When reacting upon the reception of an ADD_ADDR, the in-kernel PM first looks for fullmesh endpoints. If there are some, it will pick them, using their entry ID. It should set the ID 0 when using the endpoint corresponding to the initial subflow, it is a special case imposed by the MPTCP specs. Note that msk->mpc_endpoint_id might not be set when receiving the first ADD_ADDR from the server. So better to compare the addresses. Fixes: 1a0d6136c5f0 ("mptcp: local addresses fullmesh") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-12-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d0a80f537fc3..a2e37ab1c40f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -636,6 +636,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; struct pm_nl_pernet *pernet; unsigned int subflows_max; int i = 0; @@ -643,6 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) @@ -653,7 +656,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, if (msk->pm.subflows < subflows_max) { msk->pm.subflows++; - addrs[i++] = entry->addr; + addrs[i] = entry->addr; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) + addrs[i].id = 0; + + i++; } } rcu_read_unlock(); From 4878f9f8421f4587bee7b232c1c8a9d3a7d4d782 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:31 +0200 Subject: [PATCH 1307/1523] selftests: mptcp: join: validate fullmesh endp on 1st sf This case was not covered, and the wrong ID was set before the previous commit. The rest is not modified, it is just that it will increase the code coverage. The right address ID can be verified by looking at the packet traces. We could automate that using Netfilter with some cBPF code for example, but that's always a bit cryptic. Packetdrill seems better fitted for that. Fixes: 4f49d63352da ("selftests: mptcp: add fullmesh testcases") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-13-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f609c02c6123..89e553e0e0c2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3059,6 +3059,7 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 From 48e50dcbcbaaf713d82bf2da5c16aeced94ad07d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:32 +0200 Subject: [PATCH 1308/1523] mptcp: pm: avoid possible UaF when selecting endp select_local_address() and select_signal_address() both select an endpoint entry from the list inside an RCU protected section, but return a reference to it, to be read later on. If the entry is dereferenced after the RCU unlock, reading info could cause a Use-after-Free. A simple solution is to copy the required info while inside the RCU protected section to avoid any risk of UaF later. The address ID might need to be modified later to handle the ID0 case later, so a copy seems OK to deal with. Reported-by: Paolo Abeni Closes: https://lore.kernel.org/45cd30d3-7710-491c-ae4d-a1368c00beb1@redhat.com Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-14-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 64 ++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a2e37ab1c40f..3e4ad801786f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, return false; } -static struct mptcp_pm_addr_entry * +static bool select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk) + const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; msk_owned_by_me(msk); @@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } -static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; rcu_read_lock(); /* do not keep any additional per socket state, just signal @@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) @@ -512,9 +520,10 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { - struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; + bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; unsigned int subflows_max; @@ -565,23 +574,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - local = select_signal_address(pernet, msk); - if (!local) + if (!select_signal_address(pernet, msk, &local)) goto subflow; /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ - if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); - if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - signal_and_subflow = local; + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; } subflow: @@ -592,26 +600,22 @@ subflow: bool fullmesh; int i, nr; - if (signal_and_subflow) { - local = signal_and_subflow; - signal_and_subflow = NULL; - } else { - local = select_local_address(pernet, msk); - if (!local) - break; - } + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) + break; - fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); From dba1a6cfc311833e10df978f07147ea93b7045fa Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Mar 2024 06:42:38 +0530 Subject: [PATCH 1309/1523] drm/amdgpu: Enforce isolation as part of the job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new parameter 'enforce_isolation' to the amdgpu_job structure. This parameter is used to determine whether shader isolation should be enforced for a job. The enforce_isolation parameter is then stored in the amdgpu_job structure and used when flushing the VM. The enforce_isolation field of the amdgpu_job structure is set directly after the job is allocated This change allows more fine-grained control over shader isolation, making it possible to enforce isolation on a per-job basis rather than globally. This can be useful in scenarios where only certain jobs require isolation. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5d5ba1e3d90f..1e475eb01417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -296,6 +296,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; + p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; } p->gang_leader = p->jobs[p->gang_leader_idx]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a963a25ddd62..ce6b9ba967ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + /* enforce isolation */ + bool enforce_isolation; + uint32_t num_ibs; struct amdgpu_ib ibs[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 71ef3308be92..1468222ea0cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -682,7 +682,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, ring->funcs->emit_wreg; if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader) + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) ring->funcs->emit_cleaner_shader(ring); if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) From e189be9b2e3820c88164d95090f1fd6343cd77fc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 May 2024 07:30:47 +0530 Subject: [PATCH 1310/1523] drm/amdgpu: Add enforce_isolation sysfs attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a new sysfs attribute 'enforce_isolation' to control the 'enforce_isolation' setting per GPU. The attribute can be read and written, and accepts values 0 (disabled) and 1 (enabled). When 'enforce_isolation' is enabled, reserved VMIDs are allocated for each ring. When it's disabled, the reserved VMIDs are freed. The set function locks a mutex before changing the 'enforce_isolation' flag and the VMIDs, and unlocks it afterwards. This ensures that these operations are atomic and prevents race conditions and other concurrency issues. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 101 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 4 files changed, 107 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0dceeea235cf..aa97bbefe934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1164,6 +1164,8 @@ struct amdgpu_device { bool debug_enable_ras_aca; bool enforce_isolation[MAX_XCP]; + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + struct mutex enforce_isolation_mutex; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e623af740aa3..2f1bc02309fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4065,6 +4065,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 4ed69fcfe9c1..2e35fc2577f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1391,6 +1391,88 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int i; + ssize_t size = 0; + + if (adev->xcp_mgr) { + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); + if (i < (adev->xcp_mgr->num_xcps - 1)) + size += sysfs_emit_at(buf, size, " "); + } + buf[size++] = '\n'; + } else { + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); + } + + return size; +} + +static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + long partition_values[MAX_XCP] = {0}; + int ret, i, num_partitions; + const char *input_buf = buf; + + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + ret = sscanf(input_buf, "%ld", &partition_values[i]); + if (ret <= 0) + break; + + /* Move the pointer to the next value in the string */ + input_buf = strchr(input_buf, ' '); + if (input_buf) { + input_buf++; + } else { + i++; + break; + } + } + num_partitions = i; + + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) + return -EINVAL; + + if (!adev->xcp_mgr && num_partitions != 1) + return -EINVAL; + + for (i = 0; i < num_partitions; i++) { + if (partition_values[i] != 0 && partition_values[i] != 1) + return -EINVAL; + } + + mutex_lock(&adev->enforce_isolation_mutex); + + for (i = 0; i < num_partitions; i++) { + if (adev->enforce_isolation[i] && !partition_values[i]) { + /* Going from enabled to disabled */ + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + } else if (!adev->enforce_isolation[i] && partition_values[i]) { + /* Going from disabled to enabled */ + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } + adev->enforce_isolation[i] = partition_values[i]; + } + + mutex_unlock(&adev->enforce_isolation_mutex); + + return count; +} + +static DEVICE_ATTR(enforce_isolation, 0644, + amdgpu_gfx_get_enforce_isolation, + amdgpu_gfx_set_enforce_isolation); + static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); @@ -1417,6 +1499,25 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_available_compute_partition); } +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_sriov_vf(adev)) { + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); + if (r) + return r; + } + + return 0; +} + +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + device_remove_file(adev->dev, &dev_attr_enforce_isolation); +} + int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 09379ef7388f..f7b37c340e36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -561,6 +561,8 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size, const void *cleaner_shader_ptr); +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From d361ad5d2fc0e4d59d5d538092c9b37889756642 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 May 2024 07:38:21 +0530 Subject: [PATCH 1311/1523] drm/amdgpu: Add sysfs interface for running cleaner shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new sysfs interface for running the cleaner shader on AMD GPUs. The cleaner shader is used to clear GPU memory before it's reused, which can help prevent data leakage between different processes. The new sysfs file is write-only and is named `run_cleaner_shader`. Write the number of the partition to this file to trigger the cleaner shader on that partition. There is only one partition on GPUs which do not support partitioning. Changes made in this patch: - Added `amdgpu_set_run_cleaner_shader` function to handle writes to the `run_cleaner_shader` sysfs file. - Added `run_cleaner_shader` to the list of device attributes in `amdgpu_device_attrs`. - Updated `default_attr_update` to handle `run_cleaner_shader`. - Added `AMDGPU_DEVICE_ATTR_WO` macro to create write-only device attributes. v2: fix error handling (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 134 ++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 2e35fc2577f9..76f77cf562af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -24,10 +24,13 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" @@ -1391,6 +1394,129 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + long timeout = msecs_to_jiffies(1000); + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); + if (r) + goto err; + + job->enforce_isolation = true; + + ib = &job->ibs[0]; + for (i = 0; i <= ring->funcs->align_mask; ++i) + ib->ptr[i] = ring->funcs->nop; + ib->length_dw = ring->funcs->align_mask + 1; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + amdgpu_ib_free(adev, ib, f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + amdgpu_ib_free(adev, ib, f); +err: + return r; +} + +static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) +{ + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + struct amdgpu_ring *ring; + int num_xcc_to_clear; + int i, r, xcc_id; + + if (adev->gfx.num_xcc_per_xcp) + num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; + else + num_xcc_to_clear = 1; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + if ((ring->xcp_id == xcp_id) && ring->sched.ready) { + r = amdgpu_gfx_run_cleaner_shader_job(ring); + if (r) + return r; + num_xcc_to_clear--; + break; + } + } + } + + if (num_xcc_to_clear) + return -ENOENT; + + return 0; +} + +static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + long value; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtol(buf, 0, &value); + + if (ret) + return -EINVAL; + + if (value < 0) + return -EINVAL; + + if (adev->xcp_mgr) { + if (value >= adev->xcp_mgr->num_xcps) + return -EINVAL; + } else { + if (value > 1) + return -EINVAL; + } + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_gfx_run_cleaner_shader(adev, value); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, struct device_attribute *attr, char *buf) @@ -1469,6 +1595,9 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return count; } +static DEVICE_ATTR(run_cleaner_shader, 0200, + NULL, amdgpu_gfx_set_run_cleaner_shader); + static DEVICE_ATTR(enforce_isolation, 0644, amdgpu_gfx_get_enforce_isolation, amdgpu_gfx_set_enforce_isolation); @@ -1509,6 +1638,10 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) return r; } + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); + if (r) + return r; + return 0; } @@ -1516,6 +1649,7 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) { if (!amdgpu_sriov_vf(adev)) device_remove_file(adev->dev, &dev_attr_enforce_isolation); + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, From 22ff907d4f0457b2800f4c6d4f40d4d4d31f7de1 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 7 Jul 2024 08:54:04 +0530 Subject: [PATCH 1312/1523] drm/amdgpu: Add PACKET3_RUN_CLEANER_SHADER for cleaner shader execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the PACKET3_RUN_CLEANER_SHADER definition. This packet is a command packet used to instruct the GPU to execute the cleaner shader. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. The PACKET3_RUN_CLEANER_SHADER packet is used to trigger the execution of the cleaner shader on the GPU. The packet consists of a header followed by a RESERVED field, which is programmed to zero. When the GPU receives this packet, it fetches and executes the cleaner shader instructions from the location specified in the packet. The cleaner shader feature helps to enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15d.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e74e1983da53..b9cbeb389edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -413,6 +413,10 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 From c2e70d307f4491ff970208a41cce84c95771f340 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 21:56:57 +0530 Subject: [PATCH 1313/1523] drm/amdgpu/gfx9: Implement cleaner shader support for GFX9 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch modifies the gfx_v9_0_kiq_set_resources function to write the cleaner shader's memory controller address to the ring buffer. It also adds a new function, gfx_v9_0_ring_emit_cleaner_shader, which emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer. This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the gfx_v9_0 module. This packet is used to emit the cleaner shader, which is used to clear GPU memory before it's reused, helping to prevent data leakage between different processes. Finally, the patch updates the ring function structures to include the new gfx_v9_0_ring_emit_cleaner_shader function. This allows the cleaner shader to be emitted as part of the ring's operations. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 45 ++++++++++++++++--- .../drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h | 26 +++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index db21fb951e0e..3045b8b0796d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -50,6 +50,7 @@ #include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" +#include "gfx_v9_0_cleaner_shader.h" #include "gfx_v9_4_2.h" #include "asic_reg/pwr/pwr_10_0_offset.h" @@ -899,6 +900,12 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -908,8 +915,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -2211,6 +2218,12 @@ static int gfx_v9_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -2373,6 +2386,10 @@ static int gfx_v9_0_sw_init(void *handle) gfx_v9_0_alloc_ip_dump(adev); + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + return 0; } @@ -2408,6 +2425,8 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -3952,6 +3971,9 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_0_init_golden_registers(adev); @@ -7368,6 +7390,13 @@ static void gfx_v9_ip_dump(void *handle) } +static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7417,7 +7446,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7439,6 +7469,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .reset = gfx_v9_0_reset_kgq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7471,7 +7502,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7495,6 +7527,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_cntl = gfx_v9_0_ring_patch_cntl, .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7515,7 +7548,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7534,6 +7568,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h new file mode 100644 index 000000000000..36c0292b5110 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* Define the cleaner shader gfx_9_0 */ +static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = { + /* Add the cleaner shader code here */ +}; From d4c38154951b2bff6bfa4d5eb56df0bd08703cf9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:12:02 +0530 Subject: [PATCH 1314/1523] drm/amdgpu/gfx9: Implement cleaner shader support for GFX9.4.3 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch modifies the gfx_v9_4_3_kiq_set_resources function to write the cleaner shader's memory controller address to the ring buffer. It also adds a new function, gfx_v9_4_3_ring_emit_cleaner_shader, which emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer. This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the gfx_v9_4_3 module. This packet is used to emit the cleaner shader, which is used to clear GPU memory before it's reused, helping to prevent data leakage between different processes. Finally, the patch updates the ring function structures to include the new gfx_v9_4_3_ring_emit_cleaner_shader function. This allows the cleaner shader to be emitted as part of the ring's operations. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 57 +++++++++++++++++-- .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.h | 26 +++++++++ 2 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 619ff3ec2c86..28f4212a8db2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -37,6 +37,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "gfx_v9_4_3_cleaner_shader.h" #include "amdgpu_xcp.h" #include "amdgpu_aca.h" @@ -169,6 +170,12 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_i static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -178,8 +185,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -1047,6 +1054,24 @@ static int gfx_v9_4_3_sw_init(void *handle) int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 153) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -1140,12 +1165,19 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + } gfx_v9_4_3_alloc_ip_dump(adev); - return r; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + + return 0; } static int gfx_v9_4_3_sw_fini(void *handle) @@ -1163,11 +1195,14 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev, i); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -2308,6 +2343,9 @@ static int gfx_v9_4_3_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_4_3_init_golden_registers(adev); @@ -4565,6 +4603,13 @@ static void gfx_v9_4_3_ip_dump(void *handle) amdgpu_gfx_off_ctrl(adev, true); } +static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4604,7 +4649,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_4_3_emit_mem_sync */ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, .emit_fence = gfx_v9_4_3_ring_emit_fence, @@ -4623,6 +4669,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, .reset = gfx_v9_4_3_reset_kcq, + .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h new file mode 100644 index 000000000000..042944ac75df --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_9_4_3 */ +static const u32 gfx_9_4_3_cleaner_shader_hex[] = { +}; From 335288315af18c0def7f47a37fe7eaa782c98f6d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:14:41 +0530 Subject: [PATCH 1315/1523] drm/amdgpu/gfx9: Add cleaner shader for GFX9.4.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX9.4.3 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_9_4_3_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps improve GPU performance and resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. v2: fix copyright date (Alex) Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm | 153 ++++++++++++++++++ .../amd/amdgpu/gfx_v9_4_3_cleaner_shader.h | 38 +++++ 2 files changed, 191 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm new file mode 100644 index 000000000000..d5325ef80ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI300 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI300) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iteraions + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h index 042944ac75df..69aa567c6c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -23,4 +23,42 @@ /* Define the cleaner shader gfx_9_4_3 */ static const u32 gfx_9_4_3_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, }; From b1f49ff9cbe14264c7eb33462fb700c49c7d91a8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jul 2024 22:18:45 +0530 Subject: [PATCH 1316/1523] drm/amdgpu/gfx9: Add cleaner shader support for GFX9.4.4 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit extends the cleaner shader feature to support GFX9.4.4 hardware. The cleaner shader feature is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). This operation needs to be performed in isolation, while no other tasks should be running on the GPU at the same time. Previously, the cleaner shader feature was implemented for GFX9.4.3 hardware. This commit adds support for GFX9.4.4 hardware by allowing the cleaner shader to be used with this hardware version. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 28f4212a8db2..fa6752585a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1056,6 +1056,7 @@ static int gfx_v9_4_3_sw_init(void *handle) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); if (adev->gfx.mec_fw_version >= 153) { From 234eebe16138f94de3046f60c52763dc17fe5fed Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 29 Jul 2024 14:22:30 -0400 Subject: [PATCH 1317/1523] drm/amdkfd: APIs to stop/start KFD scheduling Provide amdgpu_amdkfd_stop_sched() for amdgpu to stop KFD scheduling compute work on HIQ. amdgpu_amdkfd_start_sched() resumes the scheduling. When amdgpu_amdkfd_stop_sched is called, KFD will unmap queues from runlist. If users send ioctls to KFD to create queues, they'll be added but those queues won't be mapped to runlist (so not scheduled) until amdgpu_amdkfd_start_sched is called. v2: fix build (Alex) Signed-off-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 18 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 39 +++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 58 ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 9 +++ 5 files changed, 137 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c272461d70a9..64a989cbc301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -887,3 +887,21 @@ free_ring_funcs: return r; } + +/* Stop scheduling on KFD */ +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched(adev->kfd.dev, node_id); +} + +/* Start scheduling on KFD */ +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched(adev->kfd.dev, node_id); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4ed49265c764..825c7ffe4bc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -264,6 +264,8 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, u32 inst); +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -426,6 +428,8 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -496,5 +500,15 @@ static inline int kgd2kfd_check_and_lock_kfd(void) static inline void kgd2kfd_unlock_kfd(void) { } + +static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c2d2598f776c..fad1c8f2bc83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1446,6 +1446,45 @@ void kgd2kfd_unlock_kfd(void) mutex_unlock(&kfd_processes_mutex); } +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + int ret; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + node = kfd->nodes[node_id]; + + ret = node->dqm->ops.unhalt(node->dqm); + if (ret) + dev_err(kfd_device, "Error in starting scheduler\n"); + + return ret; +} + +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + + node = kfd->nodes[node_id]; + return node->dqm->ops.halt(node->dqm); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f6e211070299..d23388ea8181 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1679,6 +1679,60 @@ static int initialize_cpsch(struct device_queue_manager *dqm) return 0; } +/* halt_cpsch: + * Unmap queues so the schedule doesn't continue remaining jobs in the queue. + * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch + * is called. + */ +static int halt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); + + if (!dqm->is_hws_hang) { + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + else + ret = remove_all_queues_mes(dqm); + } + dqm->sched_halt = true; + dqm_unlock(dqm); + + return ret; +} + +/* unhalt_cpsch + * Unset dqm->sched_halt and map queues back to runlist + */ +static int unhalt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running || !dqm->sched_halt) { + WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); + dqm_unlock(dqm); + return 0; + } + dqm->sched_halt = false; + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); + dqm_unlock(dqm); + + return ret; +} + static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; @@ -1984,7 +2038,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) struct device *dev = dqm->dev->adev->dev; int retval; - if (!dqm->sched_running) + if (!dqm->sched_running || dqm->sched_halt) return 0; if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) return 0; @@ -2727,6 +2781,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.halt = halt_cpsch; + dqm->ops.unhalt = unhalt_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index dfb36a246637..08b40826ad1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -106,6 +106,12 @@ union GRBM_GFX_INDEX_BITS { * @uninitialize: Destroys all the device queue manager resources allocated in * initialize routine. * + * @halt: This routine unmaps queues from runlist and set halt status to true + * so no more queues will be mapped to runlist until unhalt. + * + * @unhalt: This routine unset halt status to flase and maps queues back to + * runlist. + * * @create_kernel_queue: Creates kernel queue. Used for debug queue. * * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. @@ -153,6 +159,8 @@ struct device_queue_manager_ops { int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); + int (*halt)(struct device_queue_manager *dqm); + int (*unhalt)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); @@ -264,6 +272,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + bool sched_halt; /* used for GFX 9.4.3 only */ uint32_t current_logical_xcc_start; From afefd6f245024684fff75100052065d6a9e8f75f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 6 Jun 2024 13:28:02 +0530 Subject: [PATCH 1318/1523] drm/amdgpu: Implement Enforce Isolation Handler for KGD/KFD serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the Enforce Isolation Handler designed to enforce shader isolation on AMD GPUs, which helps to prevent data leakage between different processes. The handler counts the number of emitted fences for each GFX and compute ring. If there are any fences, it schedules the `enforce_isolation_work` to be run after a delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion Driver (KFD) to resume the runqueue. The function is synchronized using the `enforce_isolation_mutex`. This commit also introduces a reference count mechanism (kfd_sch_req_count) to keep track of the number of requests to enable the KFD scheduler. When a request to enable the KFD scheduler is made, the reference count is decremented. When the reference count reaches zero, a delayed work is scheduled to enforce isolation after a delay of GFX_SLICE_PERIOD. When a request to disable the KFD scheduler is made, the function first checks if the reference count is zero. If it is, it cancels the delayed work for enforcing isolation and checks if the KFD scheduler is active. If the KFD scheduler is active, it sends a request to stop the KFD scheduler and sets the KFD scheduler state to inactive. Then, it increments the reference count. The function is synchronized using the kfd_sch_mutex to ensure that the KFD scheduler state and reference count are updated atomically. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König Suggested-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 167 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 15 ++ 4 files changed, 200 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index aa97bbefe934..e8c284aea1f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,6 +118,8 @@ #define MAX_GPU_INSTANCE 64 +#define GFX_SLICE_PERIOD msecs_to_jiffies(250) + struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f1bc02309fe..ad97f03f1358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4067,6 +4067,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + mutex_init(&adev->gfx.kfd_sch_mutex); amdgpu_device_init_apu_flags(adev); @@ -4098,6 +4099,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 76f77cf562af..b4efeef848de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1686,3 +1686,170 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, cleaner_shader_size); } + +/** + * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) + * @adev: amdgpu_device pointer + * @idx: Index of the scheduler to control + * @enable: Whether to enable or disable the KFD scheduler + * + * This function is used to control the KFD (Kernel Fusion Driver) scheduler + * from the KGD. It is part of the cleaner shader feature. This function plays + * a key role in enforcing process isolation on the GPU. + * + * The function uses a reference count mechanism (kfd_sch_req_count) to keep + * track of the number of requests to enable the KFD scheduler. When a request + * to enable the KFD scheduler is made, the reference count is decremented. + * When the reference count reaches zero, a delayed work is scheduled to + * enforce isolation after a delay of GFX_SLICE_PERIOD. + * + * When a request to disable the KFD scheduler is made, the function first + * checks if the reference count is zero. If it is, it cancels the delayed work + * for enforcing isolation and checks if the KFD scheduler is active. If the + * KFD scheduler is active, it sends a request to stop the KFD scheduler and + * sets the KFD scheduler state to inactive. Then, it increments the reference + * count. + * + * The function is synchronized using the kfd_sch_mutex to ensure that the KFD + * scheduler state and reference count are updated atomically. + * + * Note: If the reference count is already zero when a request to enable the + * KFD scheduler is made, it means there's an imbalance bug somewhere. The + * function triggers a warning in this case. + */ +static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, + bool enable) +{ + mutex_lock(&adev->gfx.kfd_sch_mutex); + + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); + goto unlock; + } + + adev->gfx.kfd_sch_req_count[idx]--; + + if (adev->gfx.kfd_sch_req_count[idx] == 0 && + adev->gfx.kfd_sch_inactive[idx]) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } + } else { + if (adev->gfx.kfd_sch_req_count[idx] == 0) { + cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); + if (!adev->gfx.kfd_sch_inactive[idx]) { + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = true; + } + } + + adev->gfx.kfd_sch_req_count[idx]++; + } + +unlock: + mutex_unlock(&adev->gfx.kfd_sch_mutex); +} + +/** + * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation + * + * @work: work_struct. + * + * This function is the work handler for enforcing shader isolation on AMD GPUs. + * It counts the number of emitted fences for each GFX and compute ring. If there + * are any fences, it schedules the `enforce_isolation_work` to be run after a + * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion + * Driver (KFD) to resume the runqueue. The function is synchronized using the + * `enforce_isolation_mutex`. + */ +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) +{ + struct amdgpu_isolation_work *isolation_work = + container_of(work, struct amdgpu_isolation_work, work.work); + struct amdgpu_device *adev = isolation_work->adev; + u32 i, idx, fences = 0; + + if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = isolation_work->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { + if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + } + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { + if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + } + if (fences) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } else { + /* Tell KFD to resume the runqueue */ + if (adev->kfd.init_complete) { + WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f7b37c340e36..e28c1ebfa98f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -34,6 +34,7 @@ #include "soc15.h" #include "amdgpu_ras.h" #include "amdgpu_ring_mux.h" +#include "amdgpu_xcp.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -343,6 +344,12 @@ struct amdgpu_me { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; +struct amdgpu_isolation_work { + struct amdgpu_device *adev; + u32 xcp_id; + struct delayed_work work; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -454,6 +461,11 @@ struct amdgpu_gfx { void *cleaner_shader_cpu_ptr; const void *cleaner_shader_ptr; bool enable_cleaner_shader; + struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; + /* Mutex for synchronizing KFD scheduler operations */ + struct mutex kfd_sch_mutex; + u64 kfd_sch_req_count[MAX_XCP]; + bool kfd_sch_inactive[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -563,6 +575,9 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, const void *cleaner_shader_ptr); int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From b710dbe55dee946d82bc4815c40373cf8a391581 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 18 Jul 2024 18:22:35 +0530 Subject: [PATCH 1319/1523] drm/amdgpu/gfx9: Apply Isolation Enforcement to GFX & Compute rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit applies isolation enforcement to the GFX and Compute rings in the gfx_v9_0 module. The commit sets `amdgpu_gfx_enforce_isolation_ring_begin_use` and `amdgpu_gfx_enforce_isolation_ring_end_use` as the functions to be called when a ring begins and ends its use, respectively. `amdgpu_gfx_enforce_isolation_ring_begin_use` is called when a ring begins its use. This function cancels any scheduled `enforce_isolation_work` and, if necessary, signals the Kernel Fusion Driver (KFD) to stop the runqueue. `amdgpu_gfx_enforce_isolation_ring_end_use` is called when a ring ends its use. This function schedules `enforce_isolation_work` to be run after a delay. These functions are part of the Enforce Isolation Handler, which enforces shader isolation on AMD GPUs to prevent data leakage between different processes. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Suggested-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3045b8b0796d..21089aadbb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7470,6 +7470,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v9_0_emit_mem_sync, .reset = gfx_v9_0_reset_kgq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7528,6 +7530,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7569,6 +7573,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { From f846250b8a20e6c1225c64ce87a90d4f29cbf351 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 14 May 2024 23:55:20 +0530 Subject: [PATCH 1320/1523] drm/amdgpu/gfx_v9_4_3: Apply Isolation Enforcement to GFX & Compute rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit applies isolation enforcement to the GFX and Compute rings in the gfx_v9_4_3 module. The commit sets `amdgpu_gfx_enforce_isolation_ring_begin_use` and `amdgpu_gfx_enforce_isolation_ring_end_use` as the functions to be called when a ring begins and ends its use, respectively. `amdgpu_gfx_enforce_isolation_ring_begin_use` is called when a ring begins its use. This function cancels any scheduled `enforce_isolation_work` and, if necessary, signals the Kernel Fusion Driver (KFD) to stop the runqueue. `amdgpu_gfx_enforce_isolation_ring_end_use` is called when a ring ends its use. This function schedules `enforce_isolation_work` to be run after a delay. These functions are part of the Enforce Isolation Handler, which enforces shader isolation on AMD GPUs to prevent data leakage between different processes. The commit also includes a check for the type of the ring. If the type of the ring is `AMDGPU_RING_TYPE_COMPUTE`, the `xcp_id` of the `enforce_isolation` structure in the `gfx` structure of the `amdgpu_device` is set to the `xcp_id` of the ring. This ensures that the correct `xcp_id` is used when enforcing isolation on compute rings. The `xcp_id` is an identifier for an XCP partition, and different rings can be associated with different XCP partitions. Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 228fd4dd32f1..26e2188101e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -75,6 +75,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_mask; ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return; @@ -103,6 +105,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index fa6752585a72..2067f26d3a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4671,6 +4671,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, .reset = gfx_v9_4_3_reset_kcq, .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { From 87758a0ef12cfebb9fab8ef1d0e234dd7b3f4579 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 29 Apr 2024 16:40:44 -0400 Subject: [PATCH 1321/1523] drm/amdkfd: Enable processes isolation on gfx9 When amdgpu enable enforce_isolation, KFD enables single-process mode in HWS and sets exec_cleaner_shader bit in MAP_PROCESS. Signed-off-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 14 +++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 5 +++-- .../gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 00776f08351c..1f9f5bfeaf86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -37,11 +37,14 @@ static int pm_map_process_v9(struct packet_manager *pm, struct kfd_node *kfd = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); + if (adev->enforce_isolation[kfd->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -89,14 +92,18 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_node *knode = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); int i; + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); + if (adev->enforce_isolation[knode->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -144,17 +151,22 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, int concurrent_proc_cnt = 0; struct kfd_node *kfd = pm->dqm->dev; + struct amdgpu_device *adev = kfd->adev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number * of processes in the runlist and kfd module parameter * hws_max_conc_proc. + * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs + * cleaner between process switch), enable single-process mode + * in HWS. * Note: the arbitration between the number of VMIDs and * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = min(pm->dqm->processes_count, + concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 8b6b2bd5c148..cd8611401a66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -145,8 +145,9 @@ struct pm4_mes_map_process { union { struct { - uint32_t pasid:16; - uint32_t reserved1:2; + uint32_t pasid:16; /* 0 - 15 */ + uint32_t reserved1:1; /* 16 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; uint32_t new_debug:1; uint32_t reserved2:1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h index 38f5cb6a222a..e0ed62c4ade0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran { struct { uint32_t pasid:16; /* 0 - 15 */ uint32_t single_memops:1; /* 16 */ - uint32_t reserved1:1; /* 17 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; /* 18 - 21 */ uint32_t new_debug:1; /* 22 */ uint32_t tmz:1; /* 23 */ From ccf8ef6b7506cc43e7fd504a85465c1c0786a107 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 3 Jun 2024 11:48:23 -0400 Subject: [PATCH 1322/1523] drm/amdgpu: Implement MES Suspend and Resume APIs for GFX11 Add implementation for MES Suspend and Resume APIs to unmap/map all queues for GFX11. Support for GFX12 will be added when the corresponding firmware support is in place. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 71 +++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 32 ++++++++++- 3 files changed, 69 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 04a4f0dfec15..44c74a08987d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -501,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) int amdgpu_mes_suspend(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_suspend_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); + input.suspend_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to suspend pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to suspend all gangs"); + + return r; } int amdgpu_mes_resume(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_resume_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); + input.resume_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->resume_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to resume pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to resume all gangs"); + + return r; } static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -1651,6 +1641,19 @@ out: return r; } +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) +{ + uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + bool is_supported = false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) + is_supported = true; + + return is_supported; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 5c8867d2380a..a5b1ea60cac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -503,4 +503,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) memalloc_noreclaim_restore(mes->saved_flags); mutex_unlock(&mes->mutex_hidden); } + +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 6f5a80519af9..8edcd85a1261 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -441,13 +441,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) From 9a16042f02cd08bbd0a5a2d8e9c95347717165a3 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 3 Jun 2024 11:57:50 -0400 Subject: [PATCH 1323/1523] drm/amdkfd: Update queue unmap after VM fault with MES MEC FW expects MES to unmap all queues when a VM fault is observed on a queue and then resumed once the affected process is terminated. Use the MES Suspend and Resume APIs to achieve this. Signed-off-by: Mukul Joshi Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 87 ++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d23388ea8181..5825e805da50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -319,6 +319,46 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) return retval; } +static int suspend_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_suspend(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to suspend gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int resume_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_resume(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to resume gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -2891,6 +2931,44 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device *dev = dqm->dev->adev->dev; + int ret = 0; + + /* Check if process is already evicted */ + dqm_lock(dqm); + if (qpd->evicted) { + /* Increment the evicted count to make sure the + * process stays evicted before its terminated. + */ + qpd->evicted++; + dqm_unlock(dqm); + goto out; + } + dqm_unlock(dqm); + + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + ret = dqm->ops.evict_process_queues(dqm, qpd); + if (ret) { + dev_err(dev, "Evicting process queues failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + +out: + return ret; +} + int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; @@ -2901,8 +2979,13 @@ int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) return -EINVAL; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); - if (pdd) - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + if (pdd) { + if (dqm->dev->kfd->shared_resources.enable_mes) + ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); + else + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + } + kfd_unref_process(p); return ret; From eb067d65c33eecd4b81771384183ad42eec259bf Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 12 Aug 2024 11:11:28 -0400 Subject: [PATCH 1324/1523] drm/amdkfd: Update BadOpcode Interrupt handling with MES Based on the recommendation of MEC FW, update BadOpcode interrupt handling by unmapping all queues, removing the queue that got the interrupt from scheduling and remapping rest of the queues back when using MES scheduler. This is done to prevent the case where unmapping of the bad queue can fail thereby causing a GPU reset. Signed-off-by: Mukul Joshi Acked-by: Harish Kasiviswanathan Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 51 +++++++++++++++++++ .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 9 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5825e805da50..577d121cc6d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2931,6 +2931,57 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct device_queue_manager *dqm = knode->dqm; + struct device *dev = dqm->dev->adev->dev; + struct qcm_process_device *qpd; + struct queue *q = NULL; + int ret = 0; + + if (!p) + return -EINVAL; + + dqm_lock(dqm); + + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) { + qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->doorbell_id == doorbell_id && q->properties.is_active) { + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + q->properties.is_evicted = true; + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + ret = remove_queue_mes(dqm, q, qpd); + if (ret) { + dev_err(dev, "Removing bad queue failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + + break; + } + } + } + +out: + dqm_unlock(dqm); + return ret; +} + static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f524a55eee11..b3f988b275a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -330,11 +330,14 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && - KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) - kfd_set_dbg_ev_from_interrupt(dev, pasid, - KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); + + kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id, KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f7c12d4f0abb..7bba6bed2f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1324,6 +1324,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id); /* Process Queue Manager */ struct process_queue_node { From c0a04e3570d72aaf090962156ad085e37c62e442 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 15 Aug 2024 11:37:28 +0800 Subject: [PATCH 1325/1523] drm/amdgpu: Validate TA binary size Add TA binary size validation to avoid OOB write. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); From 18ac82c26da45d033df7eb993139da83dd53ee68 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Thu, 15 Aug 2024 18:45:13 -0400 Subject: [PATCH 1326/1523] Revert "drm/amd/display: Update to using new dccg callbacks" [Why] Revert updated DCCG wrappers due to regression [How] This reverts commit 680458d41aa46a009909482f58358205b5c4b438. Reviewed-by: Chris Park Signed-off-by: Hansen Dsouza Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 004c4fe3ddfc..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -2396,11 +2396,11 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk_be_new; (void)&dccg35_set_symclk32_le_root_clock_gating; (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_funcs; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; - base->funcs = &dccg35_funcs_new; + base->funcs = &dccg35_funcs; dccg_dcn->regs = regs; dccg_dcn->dccg_shift = dccg_shift; From 9de60462cdba60f575f97ca2655533b35273c715 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 15 Aug 2024 18:45:14 -0400 Subject: [PATCH 1327/1523] drm/amd/display: Update HPO I/O When Handling Link Retrain Automation Request [WHY] Previous multi-display HPO fix moved where HPO I/O enable/disable is performed. The codepath now taken to enable/disable HPO I/O is not used for compliance test automation, meaning that if a compliance box being driven at a DP1 rate requests retrain at UHBR, HPO I/O will remain off if it was previously off. [HOW] Explicitly update HPO I/O after allocating encoders for test request. Reviewed-by: Charlene Liu Reviewed-by: Wenjing Liu Signed-off-by: Michael Strauss Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 13 ++++++++++++ .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 21 ++++--------------- .../amd/display/dc/hwss/dcn31/dcn31_init.c | 1 + .../amd/display/dc/hwss/dcn314/dcn314_init.c | 1 + .../amd/display/dc/hwss/dcn351/dcn351_init.c | 1 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 1 + drivers/gpu/drm/amd/display/dc/inc/resource.h | 2 ++ .../display/dc/link/accessories/link_dp_cts.c | 8 +++++++ 8 files changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b38340c690c6..b6377efc6253 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5303,3 +5303,16 @@ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *o } return det_segments; } + +bool resource_is_hpo_acquired(struct dc_state *context) +{ + int i; + + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 217344ccf644..246fa300ee95 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2350,19 +2350,6 @@ static void dce110_setup_audio_dto( } } -static bool dce110_is_hpo_enabled(struct dc_state *context) -{ - int i; - - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { - return true; - } - } - - return false; -} - enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context) @@ -2371,8 +2358,8 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_bios *dcb = dc->ctx->dc_bios; enum dc_status status; int i; - bool was_hpo_enabled = dce110_is_hpo_enabled(dc->current_state); - bool is_hpo_enabled = dce110_is_hpo_enabled(context); + bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state); + bool is_hpo_acquired = resource_is_hpo_acquired(context); /* reset syncd pipes from disabled pipes */ if (dc->config.use_pipe_ctx_sync_logic) @@ -2415,8 +2402,8 @@ enum dc_status dce110_apply_ctx_to_hw( dce110_setup_audio_dto(dc, context); - if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_enabled != is_hpo_enabled) { - dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_enabled); + if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) { + dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired); } for (i = 0; i < dc->res_pool->pipe_count; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index b57dd45611f2..56f3c70d4b55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -111,6 +111,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn31_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index fe5495a8e7a2..68e6de6b5758 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -114,6 +114,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn314_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 5da3069fc1ab..d00822e8daa5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -123,6 +123,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .program_outstanding_updates = dcn32_program_outstanding_updates, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn351_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 326854489802..ac9205625623 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -461,6 +461,7 @@ struct hw_sequencer_funcs { void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); void (*program_outstanding_updates)(struct dc *dc, struct dc_state *context); + void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 9cd80d3864c7..cd1157d225ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -644,4 +644,6 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe */ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); + +bool resource_is_hpo_acquired(struct dc_state *context); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index df3781081da7..ff8fe1a94965 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -67,6 +67,8 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); + bool is_hpo_acquired; uint8_t count; int i; @@ -83,6 +85,12 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]); } + if (link->dc->hwss.setup_hpo_hw_control) { + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + } + for (i = count-1; i >= 0; i--) link_set_dpms_on(state, pipes[i]); } From 8783a18409b48455b3a63f0cd930c7c88beee93d Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 15 Aug 2024 18:45:15 -0400 Subject: [PATCH 1328/1523] drm/amd/display: remove an extraneous call for checking dchub clock when removing the amdgpu module and reinserting it, a call trace is triggered: [ 334.230602] RIP: 0010:hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.230807] Code: 25 28 00 00 00 75 3c 48 8d 65 f0 5b 41 5c 5d 31 c0 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 45 31 d2 45 31 db e9 55 a1 ca de <0f> 0b eb c6 0f 0b eb c2 d1 eb 8d 83 c0 63 ff ff 3d 20 4e 00 00 76 [ 334.230809] RSP: 0018:ffffbc8b823fb540 EFLAGS: 00010246 [ 334.230811] RAX: 0000000000001000 RBX: 00000000000186a0 RCX: 0000000000000000 [ 334.230812] RDX: ffffbc8b823fb544 RSI: 0000000000000000 RDI: 0000000000000000 [ 334.230813] RBP: ffffbc8b823fb560 R08: 0000000000000000 R09: 0000000000000000 [ 334.230814] R10: 0000000000000000 R11: 000000000000000f R12: ffff9e644f1f2bb0 [ 334.230815] R13: ffff9e6451361300 R14: 0000000000000000 R15: ffff9e6452c00000 [ 334.230816] FS: 00007af7c8519000(0000) GS:ffff9e737dd00000(0000) knlGS:0000000000000000 [ 334.230817] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 334.230818] CR2: 0000703576b9cbd0 CR3: 00000001095a2000 CR4: 0000000000750ee0 [ 334.230819] PKRU: 55555554 [ 334.230820] Call Trace: [ 334.230822] [ 334.230824] ? show_regs+0x6d/0x80 [ 334.230828] ? __warn+0x89/0x160 [ 334.230832] ? hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.231024] ? report_bug+0x17e/0x1b0 [ 334.231028] ? handle_bug+0x46/0x90 [ 334.231030] ? exc_invalid_op+0x18/0x80 [ 334.231032] ? asm_exc_invalid_op+0x1b/0x20 [ 334.231036] ? hubbub2_get_dchub_ref_freq+0xbb/0xe0 [amdgpu] [ 334.231217] dc_create_resource_pool+0xfd/0x320 [amdgpu] [ 334.231408] dc_create+0x256/0x700 [amdgpu] [ 334.231588] ? srso_alias_return_thunk+0x5/0x7f [ 334.231590] ? dmi_matches+0xa0/0x230 [ 334.231594] amdgpu_dm_init+0x28c/0x25f0 [amdgpu] [ 334.231791] ? prb_read_valid+0x1c/0x30 [ 334.231795] ? __irq_work_queue_local+0x43/0xf0 [ 334.231798] ? srso_alias_return_thunk+0x5/0x7f [ 334.231800] ? irq_work_queue+0x2f/0x70 [ 334.231802] ? srso_alias_return_thunk+0x5/0x7f [ 334.231803] ? __wake_up_klogd.part.0+0x40/0x70 [ 334.231805] ? srso_alias_return_thunk+0x5/0x7f [ 334.231807] ? vprintk_emit+0xd9/0x210 [ 334.231809] ? set_dev_info+0x130/0x1c0 [ 334.231812] ? srso_alias_return_thunk+0x5/0x7f [ 334.231813] ? dev_printk_emit+0xa1/0xe0 [ 334.231819] dm_hw_init+0x14/0x30 [amdgpu] [ 334.231993] amdgpu_device_init+0x23c7/0x2fc0 [amdgpu] [ 334.232134] ? pci_read_config_word+0x25/0x50 [ 334.232139] amdgpu_driver_load_kms+0x1a/0xd0 [amdgpu] [ 334.232284] amdgpu_pci_probe+0x1f9/0x620 [amdgpu] On DCN401, get_dchub_ref_freq() hook is called before init_hw() hook. Hence, it is expected to trigger an assert. Remove the extraneous call to get_dchub_ref_freq() to suppress the call trace Reviewed-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b6377efc6253..ef585a89847b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -342,11 +342,6 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, res_pool->ref_clocks.xtalin_clock_inKhz; res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; - if (dc->debug.using_dml2) - if (res_pool->hubbub && res_pool->hubbub->funcs->get_dchub_ref_freq) - res_pool->hubbub->funcs->get_dchub_ref_freq(res_pool->hubbub, - res_pool->ref_clocks.dccg_ref_clock_inKhz, - &res_pool->ref_clocks.dchub_ref_clock_inKhz); } else ASSERT_CRITICAL(false); } From 7c9cb6d1bf122fdac6a7d51f7dd8cc2d8c94b452 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 15 Aug 2024 18:45:16 -0400 Subject: [PATCH 1329/1523] drm/amd/display: Remove redundant check in DCN35 hwseq Removing redundant condition. Reviewed-by: Hansen Dsouza Signed-off-by: Nicholas Susanto Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 899e239352aa..fbbb20b9dbee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1024,9 +1024,6 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired) update_state->pg_res_update[PG_HPO] = true; - if (hpo_frl_stream_enc_acquired) - update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; - update_state->pg_res_update[PG_DWB] = true; for (i = 0; i < dc->res_pool->pipe_count; i++) { From 4e9e50b6aeda3e3ce727453c5455cf08c68dac8b Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 15 Aug 2024 18:45:17 -0400 Subject: [PATCH 1330/1523] drm/amd/display: Allow UHBR Interop With eDP Supported Link Rates Table [WHY] eDP 2.0 is introducing support for UHBR link rates, however current eDP ILR link optimization does not account for UHBR capabilities. Either UHBR capabilities will be provided via the same 128b/132b rate DPCD caps that are currently used on DP2.1, or Table 4-13 may be updated to include UHBR rates. [HOW] Add extra Supported Link Rates table translations for UHBR10/13.5/20. Update eDP link setting optimization search to be aware of 128b/132b DPCD rate caps in order to unblock UHBR on panels with Supported Link Rates table. Reviewed-by: Wenjing Liu Signed-off-by: Michael Strauss Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/link_detection.c | 3 +- .../dc/link/protocols/link_dp_capability.c | 59 ++++++++++--------- .../link/protocols/link_edp_panel_control.c | 11 ++-- .../link/protocols/link_edp_panel_control.h | 2 +- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 391dbe81534d..d21ee9d12d26 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1189,8 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, //sink only can use supported link rate table, we are foreced to enable it if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) link->panel_config.ilr.optimize_edp_link_rate = true; - if (edp_is_ilr_optimization_enabled(link)) - link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); + link->reported_link_cap.link_rate = get_max_edp_link_rate(link); } } else { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 59c9dde10885..34a618a7278b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -212,6 +212,13 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in case 10000000: link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane break; + case 13500000: + link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane + break; + case 20000000: + link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane + break; + default: link_rate = LINK_RATE_UNKNOWN; break; @@ -541,6 +548,23 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link, } } +static void increase_edp_link_rate(struct dc_link *link, + struct dc_link_settings *current_link_setting) +{ + if (current_link_setting->use_link_rate_set) { + if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting->link_rate_set++; + current_link_setting->link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set]; + } else { + current_link_setting->use_link_rate_set = false; + current_link_setting->link_rate = LINK_RATE_UHBR10; + } + } else { + current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate); + } +} + static bool decide_fallback_link_setting_max_bw_policy( struct dc_link *link, const struct dc_link_settings *max, @@ -759,14 +783,7 @@ bool edp_decide_link_settings(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else - break; + increase_edp_link_rate(link, ¤t_link_setting); } } return false; @@ -818,9 +835,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, if (policy) { /* minimize lane */ if (current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { @@ -839,9 +854,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); current_link_setting.lane_count = initial_link_setting.lane_count; } @@ -874,18 +887,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, } if (policy) { /* minimize lane */ - if (current_link_setting.link_rate_set < - link->dpcd_caps.edp_supported_link_rates_count - && current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + if (current_link_setting.link_rate < max_link_rate) { + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { current_link_setting.lane_count = increase_lane_count( current_link_setting.lane_count); current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set; current_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; } else @@ -899,13 +909,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else + increase_edp_link_rate(link, ¤t_link_setting); + if (current_link_setting.link_rate == LINK_RATE_UNKNOWN) break; } } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf820d2b4dc4..070b6c8c1aef 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -305,16 +305,17 @@ bool edp_is_ilr_optimization_enabled(struct dc_link *link) return true; } -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link) { - enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN; + enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate; for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { - if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) - link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i]) + max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i]; } - return link_rate; + return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate); } bool edp_is_ilr_optimization_required(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 8df8ac5bde5b..30dc8c24c008 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,7 +69,7 @@ bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); bool edp_is_ilr_optimization_enabled(struct dc_link *link); -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); From 272e6aab14bbf98d7a06b2b1cd6308a02d4a10a1 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Thu, 15 Aug 2024 18:45:18 -0400 Subject: [PATCH 1331/1523] drm/amd/display: Hardware cursor changes color when switched to software cursor [Why & How] DCN4 Cursor has separate degamma block and should always do Cursor degamma for Cursor color modes. Reviewed-by: Chris Park Signed-off-by: Nevenko Stupar Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 92b34fe47f74..3b6ca7974e18 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,11 +120,10 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; + // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { - cur_rom_en = 1; - } + cur_rom_en = 1; } REG_UPDATE_3(CURSOR0_CONTROL, From f327189389785b26e49904a7d3ba0c96506a4586 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 15 Aug 2024 18:45:19 -0400 Subject: [PATCH 1332/1523] drm/amd/display: Support UHBR10 link rate on eDP [why] Supporting UHBR10 link rate on eDP leverages the existing DP2.0 code but need to add some small adjustments in code. [how] Acknowledge the given DPCD caps for UHBR10 link rate support and allow DP2.0 programming sequence and link training for eDP. Reviewed-by: Wenjing Liu Signed-off-by: Sung Joon Kim Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 23 ++++++++++--------- .../gpu/drm/amd/display/dc/link/link_dpms.c | 4 ++-- .../link/protocols/link_edp_panel_control.c | 3 +++ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6b036417a73a..3de311533571 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1760,6 +1760,7 @@ struct dc_link { bool dongle_mode_timing_override; bool blank_stream_on_ocs_change; bool read_dpcd204h_on_irq_hpd; + bool disable_assr_for_uhbr; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 246fa300ee95..d52ce58c6a98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1232,20 +1232,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) { - /* - * Sometimes, DP receiver chip power-controlled externally by an - * Embedded Controller could be treated and used as eDP, - * if it drives mobile display. In this case, - * we shouldn't be doing power-sequencing, hence we can skip - * waiting for T9-ready. - */ - link->dc->link_srv->edp_receiver_ready_T9(link); - } } } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + !link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + link->dc->link_srv->edp_receiver_ready_T9(link); + } + } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index d6550b904b16..c4e03482ba9a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2358,7 +2358,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, false); @@ -2591,7 +2591,7 @@ void link_set_dpms_on( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 070b6c8c1aef..3aa05a2be6c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1168,6 +1168,9 @@ static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, link_enc_index = link->link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (link_res->hpo_dp_link_enc) { + if (link->wa_flags.disable_assr_for_uhbr) + return; + link_enc_index = link_res->hpo_dp_link_enc->inst; use_hpo_dp_link_enc = true; } From ec9e2e7acc6dabb8f00c2c60785931310caaa883 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Thu, 15 Aug 2024 18:45:20 -0400 Subject: [PATCH 1333/1523] drm/amd/display: Fix construct_phy with MXM connector [Why/How] The call to construct_phy will fail in cases where connector type is MXM, and the dc_link won't be properly created/initialized. Reviewed-by: Wenjing Liu Signed-off-by: Ilya Bakoulin Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8246006857b3..85fd6e422238 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -524,6 +524,7 @@ static bool construct_phy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; From 2344413205521775d3b1d418e5659e3ae3bc263f Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 15 Aug 2024 18:45:21 -0400 Subject: [PATCH 1334/1523] drm/amd/display: DCN35 set min dispclk to 50Mhz [Why] Causes hard hangs when resuming after display off on extended/duplicate modes [How] Set the min dispclk to 50Mhz for DCN35 Reviewed-by: Nicholas Kazlauskas Signed-off-by: Nicholas Susanto Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 3 +++ drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index e2d906327e2e..0ce9b40dfc68 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -305,6 +305,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 5f3705f97bd7..46ad684fe192 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -786,6 +786,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .disable_timeout = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { From 20b5a8f9f4670a8503aa9fa95ca632e77c6bf55d Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 15 Aug 2024 18:45:22 -0400 Subject: [PATCH 1335/1523] drm/amd/display: fix double free issue during amdgpu module unload Flexible endpoints use DIGs from available inflexible endpoints, so only the encoders of inflexible links need to be freed. Otherwise, a double free issue may occur when unloading the amdgpu module. [ 279.190523] RIP: 0010:__slab_free+0x152/0x2f0 [ 279.190577] Call Trace: [ 279.190580] [ 279.190582] ? show_regs+0x69/0x80 [ 279.190590] ? die+0x3b/0x90 [ 279.190595] ? do_trap+0xc8/0xe0 [ 279.190601] ? do_error_trap+0x73/0xa0 [ 279.190605] ? __slab_free+0x152/0x2f0 [ 279.190609] ? exc_invalid_op+0x56/0x70 [ 279.190616] ? __slab_free+0x152/0x2f0 [ 279.190642] ? asm_exc_invalid_op+0x1f/0x30 [ 279.190648] ? dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191096] ? __slab_free+0x152/0x2f0 [ 279.191102] ? dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191469] kfree+0x260/0x2b0 [ 279.191474] dcn10_link_encoder_destroy+0x19/0x30 [amdgpu] [ 279.191821] link_destroy+0xd7/0x130 [amdgpu] [ 279.192248] dc_destruct+0x90/0x270 [amdgpu] [ 279.192666] dc_destroy+0x19/0x40 [amdgpu] [ 279.193020] amdgpu_dm_fini+0x16e/0x200 [amdgpu] [ 279.193432] dm_hw_fini+0x26/0x40 [amdgpu] [ 279.193795] amdgpu_device_fini_hw+0x24c/0x400 [amdgpu] [ 279.194108] amdgpu_driver_unload_kms+0x4f/0x70 [amdgpu] [ 279.194436] amdgpu_pci_remove+0x40/0x80 [amdgpu] [ 279.194632] pci_device_remove+0x3a/0xa0 [ 279.194638] device_remove+0x40/0x70 [ 279.194642] device_release_driver_internal+0x1ad/0x210 [ 279.194647] driver_detach+0x4e/0xa0 [ 279.194650] bus_remove_driver+0x6f/0xf0 [ 279.194653] driver_unregister+0x33/0x60 [ 279.194657] pci_unregister_driver+0x44/0x90 [ 279.194662] amdgpu_exit+0x19/0x1f0 [amdgpu] [ 279.194939] __do_sys_delete_module.isra.0+0x198/0x2f0 [ 279.194946] __x64_sys_delete_module+0x16/0x20 [ 279.194950] do_syscall_64+0x58/0x120 [ 279.194954] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 279.194980] Reviewed-by: Rodrigo Siqueira Signed-off-by: Tim Huang Reviewed-by: Roman Li Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 85fd6e422238..5e1b5ab9fbc6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -385,7 +385,7 @@ static void link_destruct(struct dc_link *link) if (link->panel_cntl) link->panel_cntl->funcs->destroy(&link->panel_cntl); - if (link->link_enc) { + if (link->link_enc && !link->is_dig_mapping_flexible) { /* Update link encoder resource tracking variables. These are used for * the dynamic assignment of link encoders to streams. Virtual links * are not assigned encoder resources on creation. From d07722e1fc749fbd78992650b6d00c9a2619be70 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 15 Aug 2024 18:45:23 -0400 Subject: [PATCH 1336/1523] drm/amd/display: DML2.1 Reintegration for Various Fixes [Why and How] DML2.1 reintegration for several fixes and updates to the DML code. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 3 - .../dml21/inc/bounding_boxes/dcn4_soc_bb.h | 2 +- .../dml21/inc/dml_top_soc_parameter_types.h | 1 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 1 - .../src/dml2_core/dml2_core_dcn4_calcs.c | 482 +++++++------ .../dml21/src/dml2_core/dml2_core_factory.c | 2 +- .../dml21/src/dml2_core/dml2_core_shared.h | 37 - .../src/dml2_core/dml2_core_shared_types.h | 22 +- .../dml21/src/dml2_core/dml2_core_utils.c | 631 ++++++++++++++++++ .../dml21/src/dml2_core/dml2_core_utils.h | 39 ++ .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 54 +- .../dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 2 +- .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 20 +- .../dml21/src/dml2_pmo/dml2_pmo_factory.c | 2 +- 14 files changed, 1011 insertions(+), 287 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index cf979ab172bd..c4378e620cbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -79,7 +79,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) @@ -101,7 +100,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o : CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) @@ -122,7 +120,6 @@ DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_core/dml2_core_shared.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 898b1dd69edd..8ef7977841de 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -355,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, .scheduling_delay_us = 125, - .vertical_interrupt_ack_delay_us = 18, + .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 4a46b21c3e55..ebd8abe894a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -151,6 +151,7 @@ struct dml2_soc_bb { double phy_downspread_percent; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; bool do_urgent_latency_adjustment; unsigned int mem_word_bytes; unsigned int num_dcc_mcaches; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 9375c6ae1147..698307f3ca39 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -273,7 +273,6 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in programming->fams2_required = display_cfg->stage3.fams2_required; dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); - programming->fams2_global_config.features.bits.enable = display_cfg->stage3.fams2_required; } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c3c4d8d9525c..e2c45e498664 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8,32 +8,55 @@ #include "dml2_debug.h" #include "lib_float_math.h" #include "dml_top_types.h" -#include "dml2_core_shared.h" -//#define DML_TVM_UPDATE_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 -static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { dml2_printf("DML: ===================================== \n"); dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = %d\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) @@ -42,74 +65,87 @@ static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); - if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); - if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); - if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->ModeSupport == 0) dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); dml2_printf("DML: ===================================== \n"); } @@ -2849,16 +2885,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->hostvm_enable == true) { + if (p->display_cfg->gpuvm_enable == true) { p->vm_group_bytes[k] = 512; p->dpte_group_bytes[k] = 512; - } else if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 2048; - if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } } else { p->vm_group_bytes[k] = 0; p->dpte_group_bytes[k] = 0; @@ -4556,15 +4585,6 @@ static void calculate_tdlut_setting( return; } - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_per_group = 0; - return; - } - if (p->tdlut_mpc_width_flag) { tdlut_mpc_width = 33; tdlut_bytes_per_group_simple = 39*256; @@ -4624,7 +4644,7 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } @@ -4637,7 +4657,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); @@ -4703,11 +4723,12 @@ static void CalculateTarb( static double CalculateTWait( long reserved_vblank_time_ns, double UrgentLatency, - double Ttrip) + double Ttrip, + double g6_temp_read_blackout_us) { double TWait; double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); @@ -4855,13 +4876,23 @@ static double get_urgent_bandwidth_required( } if (!exclude_this_plane) { - surface_required_bw[k] = math_max4(NumberOfDPP[k] * prefetch_vmrow_bw[k], - l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, - l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre, - (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]); + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif } else { surface_required_bw[k] = 0.0; } @@ -4870,6 +4901,8 @@ static double get_urgent_bandwidth_required( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); @@ -4883,6 +4916,8 @@ static double get_urgent_bandwidth_required( dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); @@ -5037,7 +5072,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = 0.0; s->dep_bytes = 0.0; s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; @@ -5059,7 +5096,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); @@ -5092,21 +5128,15 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; s->trip_to_mem = p->Ttrip; -#ifdef DML_TVM_UPDATE_EN *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); if (dcc_mrq_enable) *p->Tvm_trips_flip = *p->Tvm_trips; else *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#else - *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#endif *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled == true) { *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; @@ -5114,15 +5144,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tdmdl_vm = 0; *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex } -#else - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex - } -#endif if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { @@ -5186,7 +5207,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif - s->NoTimeToPrefetch = false; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); @@ -5199,14 +5219,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; } else { -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#endif + s->Tvm_trips_rounded = s->LineTime / 4.0; *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; } @@ -5235,16 +5251,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tno_bw = 0; } -#ifdef DML_TVM_UPDATE_EN if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) *p->Tno_bw_flip = *p->Tno_bw; else *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip -#else - *p->Tno_bw_flip = 0; - if (p->display_cfg->gpuvm_enable == true) - *p->Tno_bw_flip = *p->Tno_bw; -#endif if (dml_is_420(p->myPipe->SourcePixelFormat)) { s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; @@ -5266,6 +5276,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); @@ -5289,11 +5303,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { -#ifdef DML_TVM_UPDATE_EN s->Tvm_oto = s->Tvm_trips_rounded; -#else - s->Tvm_oto = s->LineTime / 4.0; -#endif } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { @@ -5317,19 +5327,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); //Tpre_equ in line time -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; -#else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; -#endif s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); @@ -5367,6 +5374,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; +#ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); @@ -5387,18 +5395,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); - dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); - } + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif *p->dst_y_per_vm_vblank = 0; *p->dst_y_per_row_vblank = 0; @@ -5411,7 +5413,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - if (s->dst_y_prefetch_equ > 1) { + bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; @@ -5428,28 +5432,35 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else + (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + } else s->prefetch_bw2 = 0; + dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + // prefetch_bw3: 2*R0 + SW if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / @@ -5459,8 +5470,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } @@ -5476,6 +5487,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); @@ -5496,9 +5508,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // here is to make sure equ bw wont be more agressive than the latency-based requirement. // check vm time >= vm_trips // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + if (s->prefetch_bw1 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } } @@ -5508,8 +5529,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time >= vm_trips // check r0 time < r0_trips if (s->prefetch_bw2 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } } @@ -5518,8 +5542,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time < vm_trips // check r0 time >= r0_trips if (s->prefetch_bw3 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } } @@ -5585,13 +5612,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - if (p->VStartup == p->MaxVStartup) { - *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif @@ -5635,7 +5658,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5658,7 +5681,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5680,14 +5703,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); @@ -5698,7 +5720,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5730,7 +5754,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5746,7 +5770,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5763,11 +5787,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; } dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; } @@ -6174,7 +6203,7 @@ static void CalculateFlipSchedule( { struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ @@ -6250,7 +6279,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); @@ -6261,6 +6290,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6277,7 +6307,7 @@ static void CalculateFlipSchedule( *dst_y_per_vm_flip = 1; // not used *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = true; + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) @@ -6343,6 +6373,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); @@ -6373,6 +6404,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ @@ -6579,13 +6616,13 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); else p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); @@ -6915,6 +6952,21 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + static void calculate_pstate_keepout_dst_lines( const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_watermarks *watermarks, @@ -6997,7 +7049,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); @@ -7223,12 +7274,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #endif */ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); if (mode_lib->ms.BytePerPixelC[k] == 0.0) { mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); } @@ -7310,7 +7361,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ViewportExceedsSurface = false; if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); @@ -7319,11 +7371,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; } } } @@ -7599,7 +7651,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7684,8 +7736,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) mode_lib->ms.support.P2IWith420 = true; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) mode_lib->ms.support.DSC422NativeNotSupported = true; @@ -8483,7 +8533,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out { mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - calculate_hostvm_inefficiency_factor( &s->HostVMInefficiencyFactor, &s->HostVMInefficiencyFactorPrefetch, @@ -8568,7 +8617,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory); + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; @@ -8615,7 +8666,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -8697,8 +8747,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -8711,20 +8761,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.VRatioInPrefetchSupported = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { @@ -8961,6 +9006,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->mSOCParameters.USRRetrainingLatency = 0; s->mSOCParameters.SMNLatency = 0; s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -8980,7 +9028,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; @@ -9011,22 +9058,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } - + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; //Re-ordering Buffer Support Check - mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9055,15 +9095,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ { - // s->dram_clock_change_support = 1; - // s->f_clock_change_support = 1; - if (mode_lib->ms.support.ScaleRatioAndTapsSupport && mode_lib->ms.support.SourceFormatPixelAndScanSupport && mode_lib->ms.support.ViewportSizeSupport @@ -9074,9 +9111,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMultistreamSlots && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink && !mode_lib->ms.support.NotEnoughLanesForMSO - //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP && !mode_lib->ms.support.DSC422NativeNotSupported && mode_lib->ms.support.DSCSlicesODMModeSupported && !mode_lib->ms.support.NotEnoughDSCUnits @@ -9144,7 +9179,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.ModeSupport) - dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_print_mode_support_info(&mode_lib->ms.support, true); dml2_printf("DML::%s: --- DONE --- \n", __func__); #endif @@ -9163,6 +9198,10 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -10697,7 +10736,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory); + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10743,7 +10784,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -10829,9 +10869,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.dst_y_prefetch[k] < 2) s->DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; + dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); @@ -11165,6 +11209,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -11184,7 +11231,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; @@ -11515,9 +11561,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { + dml2_printf("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: ------------- START ----------\n", __func__); dml2_printf("DML::%s: result = %0d\n", __func__, result); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -12427,7 +12473,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_shared_div_rem(phantom_processing_delay_pix, + dml2_core_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); if (rem) @@ -12470,7 +12516,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 640087e862f8..28394de02885 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -10,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h deleted file mode 100644 index f3356b072b59..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_SHARED_H__ -#define __DML2_CORE_SHARED_H__ - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + dml2_printf("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..a5cc6a07167a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index f19f6ebaae13..8869ea089312 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -203,6 +203,26 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma return true; } +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) { bool result = false; @@ -555,31 +575,39 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // but still the required dispclk can be more than the maximum dispclk speed: dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); - // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); - } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + } + + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index dfd01440737d..3861bc6c9621 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -20,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 68b333b68933..30767f330fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -718,7 +718,7 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -1444,7 +1444,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && - (pmo->options->disable_drr_clamped || + (pmo->options->disable_drr_clamped || (!stream_descriptor->timing.drr_config.enabled || (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || (pmo->options->disable_drr_clamped_when_var_active && @@ -1910,7 +1910,8 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met if (is_bit_set_in_bitfield(plane_mask, plane_index)) { plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; @@ -2196,15 +2197,15 @@ bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in unsigned int i; - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { success = false; break; } if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { success = false; break; } @@ -2223,8 +2224,11 @@ bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in if (!in_out->last_candidate_failed) { if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { - in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); } success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index 95f716e2641f..add51d41a515 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -26,7 +26,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); From e389eefe34cebc6219dbe76a923b342b2f31e3ba Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Mon, 12 Aug 2024 00:55:56 -0400 Subject: [PATCH 1337/1523] drm/amd/display: Promote DC to 3.2.297 - Various DML 2.1 fixes - Fix module unload - Fix construct_phy with MXM connector - Support UHBR10 link rate on eDP - Revert updated DCCG wrappers Reviewed-by: Roman Li Signed-off-by: Martin Leung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3de311533571..5bbc7d2daca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.296" +#define DC_VER "3.2.297" #define MAX_SURFACES 3 #define MAX_PLANES 6 From c69b07f7bbc905022491c45097923d3487479529 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Aug 2024 11:14:29 -0400 Subject: [PATCH 1338/1523] drm/amdgpu: fix eGPU hotplug regression The driver needs to wait for the on board firmware to finish its initialization before probing the card. Commit 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") switched from using msleep() to using usleep_range() which seems to have caused init failures on some navi1x boards. Switch back to msleep(). Fixes: 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3559 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3500 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..7b561e8e3caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } From bf2bc61638033d118c9ef4ab1204295ba6694401 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Mon, 19 Aug 2024 11:16:13 +0800 Subject: [PATCH 1339/1523] drm/amd/amdgpu: allow use kiq to do hdp flush under sriov when use cpu to do page table update under sriov runtime, since mmio access is blocked, kiq has to be used to flush hdp. change WREG32_NO_KIQ to WREG32 to allow kiq. Signed-off-by: Victor Zhao Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 077c6d920e27..e019249883fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index a9ea23fa0def..ed7facacf2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index ab06c2b4b20b..33736d361dd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 8d7d0813e331..1c99bb09e2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,7 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 186fb12e7a7b038c2710ceb2fb74068f1b5d55a4 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 7 Aug 2024 17:15:12 +0800 Subject: [PATCH 1340/1523] drm/amd/pm: ensure the fw_info is not null before using it This resolves the dereference null return value warning reported by Coverity. Signed-off-by: Tim Huang Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index ca1c7ae8d146..f06b29e33ba4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -1183,6 +1183,8 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); + PP_ASSERT_WITH_CODE(fw_info != NULL, + "Missing firmware info!", return -EINVAL); if ((fw_info->ucTableFormatRevision == 1) && (le16_to_cpu(fw_info->usStructureSize) >= sizeof(ATOM_FIRMWARE_INFO_V1_4))) From 88c511dea151b931ba4873119b1b3555aac0ce53 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 10:35:49 -0400 Subject: [PATCH 1341/1523] drm/amd/gfx11: move the gfx mutex into the caller Otherwise we can fail to drop the software mutex when we fail to take the hardware mutex. Fixes: 76acba7b7f12 ("drm/amdgpu/gfx11: add a mutex for the gfx semaphore") Reported-by: Dan Carpenter Reviewed-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5685aee479df..ee8604722467 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4747,8 +4747,6 @@ int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, { u32 i, tmp, val; - if (req) - mutex_lock(&adev->gfx.reset_sem_mutex); for (i = 0; i < adev->usec_timeout; i++) { /* Request with MeId=2, PipeId=0 */ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); @@ -4769,8 +4767,6 @@ int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, } udelay(1); } - if (!req) - mutex_unlock(&adev->gfx.reset_sem_mutex); if (i >= adev->usec_timeout) return -EINVAL; @@ -4818,8 +4814,10 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + mutex_lock(&adev->gfx.reset_sem_mutex); r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; } @@ -4834,6 +4832,7 @@ static int gfx_v11_0_soft_reset(void *handle) /* release the gfx mutex */ r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; From 0b43312902d165c4c8429cd49e8c91479f52b7c4 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 13 Aug 2024 13:51:48 +0800 Subject: [PATCH 1342/1523] drm/amdgpu: fixing rlc firmware loading failure issue Skip rlc firmware validation to ignore firmware header size mismatch issues. This restores the workaround added in commit 849e133c973c ("drm/amdgpu: Fix the null pointer when load rlc firmware") Fixes: 3af2c80ae2f5 ("drm/amdgpu: refine gfx10 firmware loading") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3551 Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 89ec85d16eb8110d88c273d1d34f1fe5a70ba8cc) --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..e444e621ddaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; From e3e4bf58bad1576ac732a1429f53e3d4bfb82b4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 10:28:24 -0400 Subject: [PATCH 1343/1523] drm/amdgpu/sdma5.2: limit wptr workaround to sdma 5.2.1 The workaround seems to cause stability issues on other SDMA 5.2.x IPs. Fixes: a03ebf116303 ("drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3556 Acked-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 2dc3851ef7d9c5439ea8e9623fc36878f3b40649) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index af1e90159ce3..2e72d445415f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,14 +176,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " From c99769bceab4ecb6a067b9af11f9db281eea3e2a Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 15 Aug 2024 11:37:28 +0800 Subject: [PATCH 1344/1523] drm/amdgpu: Validate TA binary size Add TA binary size validation to avoid OOB write. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit c0a04e3570d72aaf090962156ad085e37c62e442) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); From 9cead81eff635e3b3cbce51b40228f3bdc6f2b8c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Aug 2024 11:14:29 -0400 Subject: [PATCH 1345/1523] drm/amdgpu: fix eGPU hotplug regression The driver needs to wait for the on board firmware to finish its initialization before probing the card. Commit 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") switched from using msleep() to using usleep_range() which seems to have caused init failures on some navi1x boards. Switch back to msleep(). Fixes: 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3559 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3500 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: Ma Jun (cherry picked from commit c69b07f7bbc905022491c45097923d3487479529) Cc: stable@vger.kernel.org # 6.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..7b561e8e3caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } From 822c8020aebcf5804a143b891e34f29873fee5e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 13:03:58 +1000 Subject: [PATCH 1346/1523] ata: pata_macio: Fix DMA table overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kolbjørn and Jonáš reported that their 32-bit PowerMacs were crashing in pata-macio since commit 09fe2bfa6b83 ("ata: pata_macio: Fix max_segment_size with PAGE_SIZE == 64K"). For example: kernel BUG at drivers/ata/pata_macio.c:544! Oops: Exception in kernel mode, sig: 5 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 DEBUG_PAGEALLOC PowerMac ... NIP pata_macio_qc_prep+0xf4/0x190 LR pata_macio_qc_prep+0xfc/0x190 Call Trace: 0xc1421660 (unreliable) ata_qc_issue+0x14c/0x2d4 __ata_scsi_queuecmd+0x200/0x53c ata_scsi_queuecmd+0x50/0xe0 scsi_queue_rq+0x788/0xb1c __blk_mq_issue_directly+0x58/0xf4 blk_mq_plug_issue_direct+0x8c/0x1b4 blk_mq_flush_plug_list.part.0+0x584/0x5e0 __blk_flush_plug+0xf8/0x194 __submit_bio+0x1b8/0x2e0 submit_bio_noacct_nocheck+0x230/0x304 btrfs_work_helper+0x200/0x338 process_one_work+0x1a8/0x338 worker_thread+0x364/0x4c0 kthread+0x100/0x104 start_kernel_thread+0x10/0x14 That commit increased max_segment_size to 64KB, with the justification that the SCSI core was already using that size when PAGE_SIZE == 64KB, and that there was existing logic to split over-sized requests. However with a sufficiently large request, the splitting logic causes each sg to be split into two commands in the DMA table, leading to overflow of the DMA table, triggering the BUG_ON(). With default settings the bug doesn't trigger, because the request size is limited by max_sectors_kb == 1280, however max_sectors_kb can be increased, and apparently some distros do that by default using udev rules. Fix the bug for 4KB kernels by reverting to the old max_segment_size. For 64KB kernels the sg_tablesize needs to be halved, to allow for the possibility that each sg will be split into two. Fixes: 09fe2bfa6b83 ("ata: pata_macio: Fix max_segment_size with PAGE_SIZE == 64K") Cc: stable@vger.kernel.org # v6.10+ Reported-by: Kolbjørn Barmen Closes: https://lore.kernel.org/all/62d248bb-e97a-25d2-bcf2-9160c518cae5@kolla.no/ Reported-by: Jonáš Vidra Closes: https://lore.kernel.org/all/3b6441b8-06e6-45da-9e55-f92f2c86933e@ufal.mff.cuni.cz/ Tested-by: Kolbjørn Barmen Signed-off-by: Michael Ellerman Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 1b85e8bf4ef9..1cb8d24b088f 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -208,6 +208,19 @@ static const char* macio_ata_names[] = { /* Don't let a DMA segment go all the way to 64K */ #define MAX_DBDMA_SEG 0xff00 +#ifdef CONFIG_PAGE_SIZE_64KB +/* + * The SCSI core requires the segment size to cover at least a page, so + * for 64K page size kernels it must be at least 64K. However the + * hardware can't handle 64K, so pata_macio_qc_prep() will split large + * requests. To handle the split requests the tablesize must be halved. + */ +#define PATA_MACIO_MAX_SEGMENT_SIZE SZ_64K +#define PATA_MACIO_SG_TABLESIZE (MAX_DCMDS / 2) +#else +#define PATA_MACIO_MAX_SEGMENT_SIZE MAX_DBDMA_SEG +#define PATA_MACIO_SG_TABLESIZE MAX_DCMDS +#endif /* * Wait 1s for disk to answer on IDE bus after a hard reset @@ -912,16 +925,10 @@ static int pata_macio_do_resume(struct pata_macio_priv *priv) static const struct scsi_host_template pata_macio_sht = { __ATA_BASE_SHT(DRV_NAME), - .sg_tablesize = MAX_DCMDS, + .sg_tablesize = PATA_MACIO_SG_TABLESIZE, /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, - /* - * The SCSI core requires the segment size to cover at least a page, so - * for 64K page size kernels this must be at least 64K. However the - * hardware can't handle 64K, so pata_macio_qc_prep() will split large - * requests. - */ - .max_segment_size = SZ_64K, + .max_segment_size = PATA_MACIO_MAX_SEGMENT_SIZE, .device_configure = pata_macio_device_configure, .sdev_groups = ata_common_sdev_groups, .can_queue = ATA_DEF_QUEUE, From d4bc0a264fb482b019c84fbc7202dd3cab059087 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 13:04:07 +1000 Subject: [PATCH 1347/1523] ata: pata_macio: Use WARN instead of BUG The overflow/underflow conditions in pata_macio_qc_prep() should never happen. But if they do there's no need to kill the system entirely, a WARN and failing the IO request should be sufficient and might allow the system to keep running. Signed-off-by: Michael Ellerman Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 1cb8d24b088f..f2f36e55a1f4 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -554,7 +554,8 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) while (sg_len) { /* table overflow should never happen */ - BUG_ON (pi++ >= MAX_DCMDS); + if (WARN_ON_ONCE(pi >= MAX_DCMDS)) + return AC_ERR_SYSTEM; len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG; table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE); @@ -566,11 +567,13 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) addr += len; sg_len -= len; ++table; + ++pi; } } /* Should never happen according to Tejun */ - BUG_ON(!pi); + if (WARN_ON_ONCE(!pi)) + return AC_ERR_SYSTEM; /* Convert the last command to an input/output */ table--; From a9556637a23311dea96f27fa3c3e5bfba0b38ae4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 12:07:01 +0300 Subject: [PATCH 1348/1523] drm/i915: move rawclk from runtime to display runtime info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's mostly about display, so move it under display. This should also fix rawclk freq initialization in the xe driver. v2: Change the init location Link: https://lore.kernel.org/r/20240819133138.147511-2-maarten.lankhorst@linux.intel.com Cc: Maarten Lankhorst Cc: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/39330d09c48509e013f01fd0247a9b7c291173e2.1724144570.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_backlight.c | 10 +++++----- drivers/gpu/drm/i915/display/intel_display_device.c | 5 +++++ drivers/gpu/drm/i915/display/intel_display_device.h | 2 ++ .../gpu/drm/i915/display/intel_display_power_well.c | 4 ++-- drivers/gpu/drm/i915/display/intel_dp_aux.c | 4 ++-- drivers/gpu/drm/i915/display/intel_pps.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 5 ----- drivers/gpu/drm/i915/intel_device_info.h | 2 -- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 18933b003cbe..9e05745d797d 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1011,7 +1011,7 @@ static u32 cnp_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(i915)->rawclk_freq), + return DIV_ROUND_CLOSEST(KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq), pwm_freq_hz); } @@ -1073,7 +1073,7 @@ static u32 pch_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(i915)->rawclk_freq), + return DIV_ROUND_CLOSEST(KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq), pwm_freq_hz * 128); } @@ -1091,7 +1091,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) int clock; if (IS_PINEVIEW(i915)) - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); else clock = KHz(i915->display.cdclk.hw.cdclk); @@ -1109,7 +1109,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) int clock; if (IS_G4X(i915)) - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); else clock = KHz(i915->display.cdclk.hw.cdclk); @@ -1133,7 +1133,7 @@ static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) clock = MHz(25); mul = 16; } else { - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); mul = 128; } diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index a31f89df2c0a..b28d55fa0c3a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1474,6 +1474,9 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 } } + display_runtime->rawclk_freq = intel_read_rawclk(i915); + drm_dbg_kms(&i915->drm, "rawclk rate: %d kHz\n", display_runtime->rawclk_freq); + return; display_fused_off: @@ -1516,6 +1519,8 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "has_hdcp: %s\n", str_yes_no(runtime->has_hdcp)); drm_printf(p, "has_dmc: %s\n", str_yes_no(runtime->has_dmc)); drm_printf(p, "has_dsc: %s\n", str_yes_no(runtime->has_dsc)); + + drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 13453ea4daea..ad60c676c84d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -204,6 +204,8 @@ struct intel_display_runtime_info { u16 step; } ip; + u32 rawclk_freq; + u8 pipe_mask; u8 cpu_transcoder_mask; u16 port_mask; diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 919f712fef13..adf5d1fbccb5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -1176,9 +1176,9 @@ static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); intel_de_write(dev_priv, CBR1_VLV, 0); - drm_WARN_ON(&dev_priv->drm, RUNTIME_INFO(dev_priv)->rawclk_freq == 0); + drm_WARN_ON(&dev_priv->drm, DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq == 0); intel_de_write(dev_priv, RAWCLK_FREQ_VLV, - DIV_ROUND_CLOSEST(RUNTIME_INFO(dev_priv)->rawclk_freq, + DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq, 1000)); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index b8a53bb174da..cbc817bb0cc3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -83,7 +83,7 @@ static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * The clock divider is based off the hrawclk, and would like to run at * 2MHz. So, take the hrawclk value and divide by 2000 and use that */ - return DIV_ROUND_CLOSEST(RUNTIME_INFO(i915)->rawclk_freq, 2000); + return DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq, 2000); } static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) @@ -103,7 +103,7 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) if (dig_port->aux_ch == AUX_CH_A) freq = i915->display.cdclk.hw.cdclk; else - freq = RUNTIME_INFO(i915)->rawclk_freq; + freq = DISPLAY_RUNTIME_INFO(i915)->rawclk_freq; return DIV_ROUND_CLOSEST(freq, 2000); } diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 0918eb218fc8..68141af4da54 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -1483,7 +1483,7 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 pp_on, pp_off, port_sel = 0; - int div = RUNTIME_INFO(dev_priv)->rawclk_freq / 1000; + int div = DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq / 1000; struct pps_registers regs; enum port port = dp_to_dig_port(intel_dp)->base.port; const struct edp_power_seq *seq = &intel_dp->pps.pps_delays; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d26de37719a7..91acbf99574c 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -124,7 +124,6 @@ void intel_device_info_print(const struct intel_device_info *info, #undef PRINT_FLAG drm_printf(p, "has_pooled_eu: %s\n", str_yes_no(runtime->has_pooled_eu)); - drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } #define ID(id) (id) @@ -370,10 +369,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) "Disabling ppGTT for VT-d support\n"); runtime->ppgtt_type = INTEL_PPGTT_NONE; } - - runtime->rawclk_freq = intel_read_rawclk(dev_priv); - drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq); - } /* diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d1a2abc7e513..fb8a08623eb0 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -204,8 +204,6 @@ struct intel_runtime_info { u16 device_id; - u32 rawclk_freq; - struct intel_step_info step; unsigned int page_sizes; /* page sizes supported by the HW */ From f15e5587448989a55cf8b4feaad0df72ca3aa6a0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 12:07:02 +0300 Subject: [PATCH 1349/1523] drm/xe/display: drop unused rawclk_freq and RUNTIME_INFO() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With rawclk_freq moved to display runtime info, xe has no users left for them. Reviewed-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/9f09274bddc14f555c0102f37af6df23b4433102.1724144570.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 - drivers/gpu/drm/xe/xe_device_types.h | 6 ------ 2 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 2feedddf1e40..182c38905626 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -116,7 +116,6 @@ struct i915_sched_attr { #define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) #define pdev_to_i915 pdev_to_xe_device -#define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) #define FORCEWAKE_ALL XE_FORCEWAKE_ALL diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744..1ba1a0101405 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -291,12 +291,6 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; - -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) - struct { - u32 rawclk_freq; - } i915_runtime; -#endif } info; /** @irq: device interrupt state */ From a8efd8ce280996fe29f2564f705e96e18da3fa62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 19 Aug 2024 12:25:49 +0300 Subject: [PATCH 1350/1523] drm/i915/psr: Prevent Panel Replay if CRC calculation is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly as for PSR2 CRC calculation seems to timeout when Panel Replay is enabled. Fix this by falling back to PSR if CRC calculation is enabled. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2266 Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240819092549.1298233-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 9cb1cdaaeefa..572dcdf77453 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1586,6 +1586,12 @@ _panel_replay_compute_config(struct intel_dp *intel_dp, if (!alpm_config_valid(intel_dp, crtc_state, true)) return false; + if (crtc_state->crc_enabled) { + drm_dbg_kms(&i915->drm, + "Panel Replay not enabled because it would inhibit pipe CRC calculation\n"); + return false; + } + return true; } From e0ee967630c8ee67bb47a5b38d235cd5a8789c48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 Aug 2024 18:31:58 -0600 Subject: [PATCH 1351/1523] io_uring/kbuf: sanitize peek buffer setup Harden the buffer peeking a bit, by adding a sanity check for it having a valid size. Outside of that, arg->max_len is a size_t, though it's only ever set to a 32-bit value (as it's governed by MAX_RW_COUNT). Bump our needed check to a size_t so we know it fits. Finally, cap the calculated needed iov value to the PEEK_MAX_IMPORT, which is the maximum number of segments that should be peeked. Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index c95dc1736dd9..1af2bd56af44 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -218,10 +218,13 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { - int needed; + u32 len = READ_ONCE(buf->len); + size_t needed; - needed = (arg->max_len + buf->len - 1) / buf->len; - needed = min(needed, PEEK_MAX_IMPORT); + if (unlikely(!len)) + return -ENOBUFS; + needed = (arg->max_len + len - 1) / len; + needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); if (nr_avail > needed) nr_avail = needed; } From 609d8b1c422cd8d1f93bf526fb236f2e07687e7c Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 7 Aug 2024 11:05:20 +0100 Subject: [PATCH 1352/1523] drm/i915/gem: Do not look for the exact address in node In preparation for the upcoming partial memory mapping feature, we want to make sure that when looking for a node we consider also the offset and not just the starting address of the virtual memory node. Signed-off-by: Andi Shyti Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240807100521.478266-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index e9b2424156f0..99fde0a05632 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -1079,9 +1079,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) rcu_read_lock(); drm_vma_offset_lock_lookup(dev->vma_offset_manager); - node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); + node = drm_vma_offset_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); if (node && drm_vma_node_is_allowed(node, priv)) { /* * Skip 0-refcnted objects as it is in the process of being From 255fc1703e42321b5afdedc8259ad03c7cc533ec Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 7 Aug 2024 11:05:21 +0100 Subject: [PATCH 1353/1523] drm/i915/gem: Calculate object page offset for partial memory mapping To enable partial memory mapping of GPU virtual memory, it's necessary to introduce an offset to the object's memory (obj->mm.pages) scatterlist. This adjustment compensates for instances when userspace mappings do not start from the beginning of the object. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240807100521.478266-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 4 +++- drivers/gpu/drm/i915/i915_mm.c | 12 +++++++++++- drivers/gpu/drm/i915/i915_mm.h | 3 ++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 99fde0a05632..21274aa9bddd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -252,6 +252,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct i915_mmap_offset *mmo = area->vm_private_data; struct drm_i915_gem_object *obj = mmo->obj; + unsigned long obj_offset; resource_size_t iomap; int err; @@ -273,10 +274,11 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) iomap -= obj->mm.region->region.start; } + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); /* PTEs are revoked in obj->ops->put_pages() */ err = remap_io_sg(area, area->vm_start, area->vm_end - area->vm_start, - obj->mm.pages->sgl, iomap); + obj->mm.pages->sgl, obj_offset, iomap); if (area->vm_flags & VM_WRITE) { GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 7998bc74ab49..f5c97a620962 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma, * @addr: target user address to start at * @size: size of map area * @sgl: Start sg entry + * @offset: offset from the start of the page * @iobase: Use stored dma address offset by this address or pfn if -1 * * Note: this is only safe if the mm semaphore is held when called. */ int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, - struct scatterlist *sgl, resource_size_t iobase) + struct scatterlist *sgl, unsigned long offset, + resource_size_t iobase) { struct remap_pfn r = { .mm = vma->vm_mm, @@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma, /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); + while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) { + offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT; + r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase)); + if (!r.sgt.sgp) + return -EINVAL; + } + r.sgt.curr = offset << PAGE_SHIFT; + if (!use_dma(iobase)) flush_cache_range(vma, addr, size); diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h index 04c8974d822b..69f9351b1a1c 100644 --- a/drivers/gpu/drm/i915/i915_mm.h +++ b/drivers/gpu/drm/i915/i915_mm.h @@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma, int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, - struct scatterlist *sgl, resource_size_t iobase); + struct scatterlist *sgl, unsigned long offset, + resource_size_t iobase); #endif /* __I915_MM_H__ */ From a3ca27c405faad584af6e8e38cdafe5be73230a1 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 9 Aug 2024 08:47:15 +0200 Subject: [PATCH 1354/1523] s390/mm: Prevent lowcore vs identity mapping overlap The identity mapping position in virtual memory is randomized together with the kernel mapping. That position can never overlap with the lowcore even when the lowcore is relocated. Prevent overlapping with the lowcore to allow independent positioning of the identity mapping. With the current value of the alternative lowcore address of 0x70000 the overlap could happen in case the identity mapping is placed at zero. This is a prerequisite for uncoupling of randomization base of kernel image and identity mapping in virtual memory. Acked-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4ec99f73fa27..a3fea683b227 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void) } /* - * Reserve memory used for lowcore/command line/kernel image. + * Reserve memory used for lowcore. + */ +static void __init reserve_lowcore(void) +{ + void *lowcore_start = get_lowcore(); + void *lowcore_end = lowcore_start + sizeof(struct lowcore); + void *start, *end; + + if ((void *)__identity_base < lowcore_end) { + start = max(lowcore_start, (void *)__identity_base); + end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); + memblock_reserve(__pa(start), __pa(end)); + } +} + +/* + * Reserve memory used for absolute lowcore/command line/kernel image. */ static void __init reserve_kernel(void) { @@ -918,6 +934,7 @@ void __init setup_arch(char **cmdline_p) /* Do some memory reservations *before* memory is added to memblock */ reserve_pgtables(); + reserve_lowcore(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); From 32db401965f165f7c44447d0508097f070c8f576 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 9 Aug 2024 08:47:16 +0200 Subject: [PATCH 1355/1523] s390/mm: Pin identity mapping base to zero SIE instruction performs faster when the virtual address of SIE block matches the physical one. Pin the identity mapping base to zero for the benefit of SIE and other instructions that have similar performance impact. Still, randomize the base when DEBUG_VM kernel configuration option is enabled. Suggested-by: Vasily Gorbik Reviewed-by: Christian Borntraeger Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 13 +++++++++++++ arch/s390/boot/startup.c | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a822f952f64a..c60e699e99f5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -604,6 +604,19 @@ config RANDOMIZE_BASE as a security feature that deters exploit attempts relying on knowledge of the location of kernel internals. +config RANDOMIZE_IDENTITY_BASE + bool "Randomize the address of the identity mapping base" + depends on RANDOMIZE_BASE + default DEBUG_VM + help + The identity mapping base address is pinned to zero by default. + Allow randomization of that base to expose otherwise missed + notion of physical and virtual addresses of data structures. + That does not have any impact on the base address at which the + kernel image is loaded. + + If unsure, say N + config KERNEL_IMAGE_BASE hex "Kernel image base address" range 0x100000 0x1FFFFFE0000000 if !KASAN diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index ce232552bc1c..cff34744b5a9 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -341,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); + if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); return asce_limit; } From b4f5bd60d558f6ba451d7e76aa05782c07a182a3 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 6 Aug 2024 12:06:23 +0200 Subject: [PATCH 1356/1523] s390/ap: Refine AP bus bindings complete processing With the rework of the AP bus scan and the introduction of a bindings complete completion also the timing until the userspace finally receives a AP bus binding complete uevent had increased. Unfortunately this event triggers some important jobs for preparation of KVM guests, for example the modification of card/queue masks to reassign AP resources to the alternate AP queue device driver (vfio_ap) which is the precondition for building mediated devices which may be a precondition for starting KVM guests using AP resources. This small fix now triggers the check for binding complete each time an AP device driver has registered. With this patch the bindings complete may be posted up to 30s earlier as there is no need to wait for the next AP bus scan any more. Fixes: 778412ab915d ("s390/ap: rearm APQNs bindings complete completion") Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Cc: stable@vger.kernel.org Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0998b17ecb37..f9f682f19415 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -971,11 +971,16 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, char *name) { struct device_driver *drv = &ap_drv->driver; + int rc; drv->bus = &ap_bus_type; drv->owner = owner; drv->name = name; - return driver_register(drv); + rc = driver_register(drv); + + ap_check_bindings_complete(); + + return rc; } EXPORT_SYMBOL(ap_driver_register); From 32a42c93b74c8ca6d0915ea3eba21bceff53042f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 13:23:09 -0700 Subject: [PATCH 1357/1523] drm/xe: Free job before xe_exec_queue_put Free job depends on job->vm being valid, the last xe_exec_queue_put can destroy the VM. Prevent UAF by freeing job before xe_exec_queue_put. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820202309.1260755-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sched_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..9628f9deb3c0 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } From a6f78359ac75f24cac3c1bdd753c49c1877bcd82 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:30 +0000 Subject: [PATCH 1358/1523] drm/xe: Fix missing workqueue destroy in xe_gt_pagefault On driver reload we never free up the memory for the pagefault and access counter workqueues. Add those destroy calls here. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Stuart Summers Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/c9a951505271dc3a7aee76de7656679f69c11518.1723862633.git.stuart.summers@intel.com (cherry picked from commit 7586fc52b14e0b8edd0d1f8a434e0de2078b7b2b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..b2a7fa55bd18 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) From dd3e840a33b57b92812fbec26273b3f0b4eb5ae3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 12:35:22 -0700 Subject: [PATCH 1359/1523] drm/xe: Drop HW fence pointer to HW fence ctx The HW fence ctx objects are not ref counted rather tied to the life of an LRC object. HW fences reference the HW fence ctx, HW fences can outlive LRCs thus resulting in UAF. Drop the HW fence pointer to HW fence ctx rather just store what is needed directly in HW fence. v2: - Fix typo in commit (Ashutosh) - Use snprintf (Ashutosh) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240815193522.16008-1-matthew.brost@intel.com (cherry picked from commit 60db6f540af9f93144d5039140aa2ed17171d168) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_fence.c | 9 +++++---- drivers/gpu/drm/xe/xe_hw_fence_types.h | 7 +++++-- drivers/gpu/drm/xe/xe_trace.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include #include +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index baba14fb1e32..01837f6f609f 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) From 9e7f30563677fbeff62d368d5d2a5ac7aaa9746a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 13:23:09 -0700 Subject: [PATCH 1360/1523] drm/xe: Free job before xe_exec_queue_put Free job depends on job->vm being valid, the last xe_exec_queue_put can destroy the VM. Prevent UAF by freeing job before xe_exec_queue_put. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820202309.1260755-1-matthew.brost@intel.com (cherry picked from commit 32a42c93b74c8ca6d0915ea3eba21bceff53042f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sched_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..9628f9deb3c0 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } From 77cc3f6c58b1b28cee73904946c46a1415187d04 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 09:01:29 -0700 Subject: [PATCH 1361/1523] drm/xe: Invalidate media_gt TLBs Testing on LNL has shown media TLBs need to be invalidated via the GuC, update xe_vm_invalidate_vma appropriately. v2: Fix 2 tile case v3: Include missing local change Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240820160129.986889-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3cff3ae57643..bfa4880a1673 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3157,9 +3157,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3187,27 +3188,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); + &fence[fence_id], vma); if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); goto wait; } + ++fence_id; - tile_needs_invalidate |= BIT(id); + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; From 57df60e1f981fa8c288a49012a4bbb02ae0ecdbc Mon Sep 17 00:00:00 2001 From: Yang Ruibin <11162571@vivo.com> Date: Wed, 21 Aug 2024 03:59:33 -0400 Subject: [PATCH 1362/1523] thermal/debugfs: Fix the NULL vs IS_ERR() confusion in debugfs_create_dir() The debugfs_create_dir() return value is never NULL, it is either a valid pointer or an error one. Use IS_ERR() to check it. Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes") Fixes: 755113d76786 ("thermal/debugfs: Add thermal cooling device debugfs information") Signed-off-by: Yang Ruibin <11162571@vivo.com> Link: https://patch.msgid.link/20240821075934.12145-1-11162571@vivo.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index 7dd67bf48571..939d3e5f1817 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -178,11 +178,11 @@ struct thermal_debugfs { void thermal_debug_init(void) { d_root = debugfs_create_dir("thermal", NULL); - if (!d_root) + if (IS_ERR(d_root)) return; d_cdev = debugfs_create_dir("cooling_devices", d_root); - if (!d_cdev) + if (IS_ERR(d_cdev)) return; d_tz = debugfs_create_dir("thermal_zones", d_root); @@ -202,7 +202,7 @@ static struct thermal_debugfs *thermal_debugfs_add_id(struct dentry *d, int id) snprintf(ids, IDSLENGTH, "%d", id); thermal_dbg->d_top = debugfs_create_dir(ids, d); - if (!thermal_dbg->d_top) { + if (IS_ERR(thermal_dbg->d_top)) { kfree(thermal_dbg); return NULL; } From 4cce34b3835b6f7dc52ee2da95c96b6364bb72e5 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 16 Aug 2024 14:52:27 +0300 Subject: [PATCH 1363/1523] drm/i915/display: Don't enable decompression on Xe2 with Tile4 >From now on expect Tile4 not to be using compression Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-2-juhapekka.heikkila@gmail.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba5a628b4757..a1ab64db0130 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1085,11 +1085,6 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, if (DISPLAY_VER(dev_priv) == 13) plane_ctl |= adlp_plane_ctl_arb_slots(plane_state); - if (GRAPHICS_VER(dev_priv) >= 20 && - fb->modifier == I915_FORMAT_MOD_4_TILED) { - plane_ctl |= PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; - } - return plane_ctl; } From 5151fa35ae5979821d091b80096b4c790b187bac Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 16 Aug 2024 14:52:28 +0300 Subject: [PATCH 1364/1523] drm/fourcc: define Intel Xe2 related tile4 ccs modifiers Add Tile4 type ccs modifiers to indicate presence of compression on Xe2. Here is defined I915_FORMAT_MOD_4_TILED_LNL_CCS which is meant for integrated graphics with igpu related limitations Here is also defined I915_FORMAT_MOD_4_TILED_BMG_CCS which is meant for discrete graphics with dgpu related limitations Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Lucas De Marchi Acked-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-3-juhapekka.heikkila@gmail.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/drm_fourcc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 2d84a8052b15..78abd819fd62 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -702,6 +702,31 @@ extern "C" { */ #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on integrated graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. + */ +#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on discrete graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. The GEM object must be stored in + * contiguous memory with a size aligned to 64KB + */ +#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * From fca0abb23447c37a6bdbc76798c24623a9b69a11 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 16 Aug 2024 14:52:29 +0300 Subject: [PATCH 1365/1523] drm/i915/display: allow creation of Xe2 ccs framebuffers Add I915_FORMAT_MOD_4_TILED_BMG_CCS and I915_FORMAT_MOD_4_TILED_LNL_CCS to possible created modifier for new framebuffer on Xe driver. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-4-juhapekka.heikkila@gmail.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 2 ++ drivers/gpu/drm/i915/display/intel_fb.c | 18 ++++++++++++++++++ .../gpu/drm/i915/display/skl_universal_plane.c | 5 +++++ 3 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9f2a4a854548..1042f65967ba 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6260,6 +6260,8 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: case I915_FORMAT_MOD_4_TILED: + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: break; default: drm_dbg_kms(&i915->drm, diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index f23547a88b1f..d2716915d046 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -163,6 +163,14 @@ struct intel_modifier_desc { static const struct intel_modifier_desc intel_modifiers[] = { { + .modifier = I915_FORMAT_MOD_4_TILED_LNL_CCS, + .display_ver = { 20, -1 }, + .plane_caps = INTEL_PLANE_CAP_TILING_4, + }, { + .modifier = I915_FORMAT_MOD_4_TILED_BMG_CCS, + .display_ver = { 14, -1 }, + .plane_caps = INTEL_PLANE_CAP_TILING_4, + }, { .modifier = I915_FORMAT_MOD_4_TILED_MTL_MC_CCS, .display_ver = { 14, 14 }, .plane_caps = INTEL_PLANE_CAP_TILING_4 | INTEL_PLANE_CAP_CCS_MC, @@ -437,6 +445,14 @@ static bool plane_has_modifier(struct drm_i915_private *i915, HAS_FLAT_CCS(i915) != !md->ccs.packed_aux_planes) return false; + if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS && + (GRAPHICS_VER(i915) < 20 || !IS_DGFX(i915))) + return false; + + if (md->modifier == I915_FORMAT_MOD_4_TILED_LNL_CCS && + (GRAPHICS_VER(i915) < 20 || IS_DGFX(i915))) + return false; + return true; } @@ -653,6 +669,8 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 128; else return 512; + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC: case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS: diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index a1ab64db0130..0e81d540ecc9 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -537,6 +537,8 @@ static u32 tgl_plane_min_alignment(struct intel_plane *plane, case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC: case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS: + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: /* * Align to at least 4x1 main surface * tiles (16K) to match 64B of AUX. @@ -948,6 +950,9 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier) return PLANE_CTL_TILED_4 | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_4_TILED_MTL_MC_CCS: return PLANE_CTL_TILED_4 | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE; + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: + return PLANE_CTL_TILED_4 | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: return PLANE_CTL_TILED_Y | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; From b128ed5ab27330deeeaf51ea8bb69f1442a96f7f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 19 Aug 2024 17:06:21 +0200 Subject: [PATCH 1366/1523] udp: fix receiving fraglist GSO packets When assembling fraglist GSO packets, udp4_gro_complete does not set skb->csum_start, which makes the extra validation in __udp_gso_segment fail. Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240819150621.59833-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b254a5dadfcf..d842303587af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -279,7 +279,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return ERR_PTR(-EINVAL); if (unlikely(skb_checksum_start(gso_skb) != - skb_transport_header(gso_skb))) + skb_transport_header(gso_skb) && + !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST))) return ERR_PTR(-EINVAL); /* We don't know if egress device can segment and checksum the packet From f8669d7b5f5d2d88959456ae9123d8bb6fdc1ebe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 20 Aug 2024 12:53:47 +0200 Subject: [PATCH 1367/1523] selftests: mlxsw: ethtool_lanes: Source ethtool lib from correct path Source the ethtool library from the correct path and avoid the following error: ./ethtool_lanes.sh: line 14: ./../../../net/forwarding/ethtool_lib.sh: No such file or directory Fixes: 40d269c000bd ("selftests: forwarding: Move several selftests") Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/2112faff02e536e1ac14beb4c2be09c9574b90ae.1724150067.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a1..fe905a7f34b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { From 007d4271a5f10638cba6f0b99698557ef30014b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:20:53 +0000 Subject: [PATCH 1368/1523] netpoll: do not export netpoll_poll_[disable|enable]() netpoll_poll_disable() and netpoll_poll_enable() are only used from core networking code, there is no need to export them. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240820162053.3870927-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 55bcacf67df3..d657b042d5a0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,7 +228,6 @@ void netpoll_poll_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_poll_disable); void netpoll_poll_enable(struct net_device *dev) { @@ -239,7 +238,6 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { From faa389b2fbaaec7fd27a390b4896139f9da662e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:57 +0000 Subject: [PATCH 1369/1523] ipv6: prevent UAF in ip6_send_skb() syzbot reported an UAF in ip6_send_skb() [1] After ip6_local_out() has returned, we no longer can safely dereference rt, unless we hold rcu_read_lock(). A similar issue has been fixed in commit a688caa34beb ("ipv6: take rcu lock in rawv6_send_hdrinc()") Another potential issue in ip6_finish_output2() is handled in a separate patch. [1] BUG: KASAN: slab-use-after-free in ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 Read of size 8 at addr ffff88806dde4858 by task syz.1.380/6530 CPU: 1 UID: 0 PID: 6530 Comm: syz.1.380 Not tainted 6.11.0-rc3-syzkaller-00306-gdf6cbc62cc9b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 sock_write_iter+0x2dd/0x400 net/socket.c:1160 do_iter_readv_writev+0x60a/0x890 vfs_writev+0x37c/0xbb0 fs/read_write.c:971 do_writev+0x1b1/0x350 fs/read_write.c:1018 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f936bf79e79 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f936cd7f038 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00007f936c115f80 RCX: 00007f936bf79e79 RDX: 0000000000000001 RSI: 0000000020000040 RDI: 0000000000000004 RBP: 00007f936bfe7916 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f936c115f80 R15: 00007fff2860a7a8 Allocated by task 6530: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:312 [inline] __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:338 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3988 [inline] slab_alloc_node mm/slub.c:4037 [inline] kmem_cache_alloc_noprof+0x135/0x2a0 mm/slub.c:4044 dst_alloc+0x12b/0x190 net/core/dst.c:89 ip6_blackhole_route+0x59/0x340 net/ipv6/route.c:2670 make_blackhole net/xfrm/xfrm_policy.c:3120 [inline] xfrm_lookup_route+0xd1/0x1c0 net/xfrm/xfrm_policy.c:3313 ip6_dst_lookup_flow+0x13e/0x180 net/ipv6/ip6_output.c:1257 rawv6_sendmsg+0x1283/0x23c0 net/ipv6/raw.c:898 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2680 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 45: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 poison_slab_object+0xe0/0x150 mm/kasan/common.c:240 __kasan_slab_free+0x37/0x60 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2252 [inline] slab_free mm/slub.c:4473 [inline] kmem_cache_free+0x145/0x350 mm/slub.c:4548 dst_destroy+0x2ac/0x460 net/core/dst.c:124 rcu_do_batch kernel/rcu/tree.c:2569 [inline] rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2843 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xac/0xc0 mm/kasan/generic.c:541 __call_rcu_common kernel/rcu/tree.c:3106 [inline] call_rcu+0x167/0xa70 kernel/rcu/tree.c:3210 refdst_drop include/net/dst.h:263 [inline] skb_dst_drop include/net/dst.h:275 [inline] nf_ct_frag6_queue net/ipv6/netfilter/nf_conntrack_reasm.c:306 [inline] nf_ct_frag6_gather+0xb9a/0x2080 net/ipv6/netfilter/nf_conntrack_reasm.c:485 ipv6_defrag+0x2c8/0x3c0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:67 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip6_local_out+0x6fa/0x800 net/ipv6/output_core.c:143 ip6_local_out+0x26/0x70 net/ipv6/output_core.c:153 ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1959 rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 sock_write_iter+0x2dd/0x400 net/socket.c:1160 do_iter_readv_writev+0x60a/0x890 Fixes: 0625491493d9 ("ipv6: ip6_push_pending_frames() should increment IPSTATS_MIB_OUTDISCARDS") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab504d31f0cd..f7b53effc80f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1956,6 +1956,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; + rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) @@ -1965,6 +1966,7 @@ int ip6_send_skb(struct sk_buff *skb) IPSTATS_MIB_OUTDISCARDS); } + rcu_read_unlock(); return err; } From da273b377ae0d9bd255281ed3c2adb228321687b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:58 +0000 Subject: [PATCH 1370/1523] ipv6: fix possible UAF in ip6_finish_output2() If skb_expand_head() returns NULL, skb has been freed and associated dst/idev could also have been freed. We need to hold rcu_read_lock() to make sure the dst and associated idev are alive. Fixes: 5796015fa968 ("ipv6: allocate enough headroom in ip6_finish_output2()") Signed-off-by: Eric Dumazet Cc: Vasily Averin Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f7b53effc80f..1b9ebee7308f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOMEM; } + rcu_read_unlock(); } hdr = ipv6_hdr(skb); From 2d5ff7e339d04622d8282661df36151906d0e1c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:59 +0000 Subject: [PATCH 1371/1523] ipv6: prevent possible UAF in ip6_xmit() If skb_expand_head() returns NULL, skb has been freed and the associated dst/idev could also have been freed. We must use rcu_read_lock() to prevent a possible UAF. Fixes: 0c9f227bee11 ("ipv6: use skb_expand_head in ip6_xmit") Signed-off-by: Eric Dumazet Cc: Vasily Averin Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1b9ebee7308f..f26841f1490f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -287,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOBUFS; } + rcu_read_unlock(); } if (opt) { From 8baeef7616d5194045c5a6b97fd1246b87c55b13 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 20 Aug 2024 13:34:15 -0700 Subject: [PATCH 1372/1523] bnxt_en: Fix double DMA unmapping for XDP_REDIRECT Remove the dma_unmap_page_attrs() call in the driver's XDP_REDIRECT code path. This should have been removed when we let the page pool handle the DMA mapping. This bug causes the warning: WARNING: CPU: 7 PID: 59 at drivers/iommu/dma-iommu.c:1198 iommu_dma_unmap_page+0xd5/0x100 CPU: 7 PID: 59 Comm: ksoftirqd/7 Tainted: G W 6.8.0-1010-gcp #11-Ubuntu Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS 2.15.2 04/02/2024 RIP: 0010:iommu_dma_unmap_page+0xd5/0x100 Code: 89 ee 48 89 df e8 cb f2 69 ff 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 31 f6 31 ff 45 31 c0 e9 ab 17 71 00 <0f> 0b 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 RSP: 0018:ffffab1fc0597a48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff99ff838280c8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffab1fc0597a78 R08: 0000000000000002 R09: ffffab1fc0597c1c R10: ffffab1fc0597cd3 R11: ffff99ffe375acd8 R12: 00000000e65b9000 R13: 0000000000000050 R14: 0000000000001000 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff9a06efb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565c34c37210 CR3: 00000005c7e3e000 CR4: 0000000000350ef0 ? show_regs+0x6d/0x80 ? __warn+0x89/0x150 ? iommu_dma_unmap_page+0xd5/0x100 ? report_bug+0x16a/0x190 ? handle_bug+0x51/0xa0 ? exc_invalid_op+0x18/0x80 ? iommu_dma_unmap_page+0xd5/0x100 ? iommu_dma_unmap_page+0x35/0x100 dma_unmap_page_attrs+0x55/0x220 ? bpf_prog_4d7e87c0d30db711_xdp_dispatcher+0x64/0x9f bnxt_rx_xdp+0x237/0x520 [bnxt_en] bnxt_rx_pkt+0x640/0xdd0 [bnxt_en] __bnxt_poll_work+0x1a1/0x3d0 [bnxt_en] bnxt_poll+0xaa/0x1e0 [bnxt_en] __napi_poll+0x33/0x1e0 net_rx_action+0x18a/0x2f0 Fixes: 578fcfd26e2a ("bnxt_en: Let the page pool manage the DMA mapping") Reviewed-by: Andy Gospodarek Reviewed-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240820203415.168178-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 345681d5007e..f88b641533fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, * redirect is coming from a frame received by the * bnxt_en driver. */ - rx_buf = &rxr->rx_buf_ring[cons]; - mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, - BNXT_RX_PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { From 40520283e0fd11237ed9dfc0991503b3403d5fa4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 09:16:32 -0700 Subject: [PATCH 1373/1523] drm/xe: Invalidate media_gt TLBs in PT code Testing on LNL has shown media GT's TLBs need to be invalidated via the GuC, update PT code appropriately. v2: - Do dma_fence_get before first call of invalidation_fence_init (Himal) - No need to check for valid chain fence (Himal) Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240820161632.987369-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 99 ++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 579ed31b46db..6c6714af3d5d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include + #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1849,13 +1851,20 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { @@ -1875,13 +1884,20 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present &= ~BIT(tile->id); if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); @@ -1898,7 +1914,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) + struct xe_vma_op *op, struct dma_fence *fence, + struct dma_fence *fence2) { xe_vm_assert_held(vm); @@ -1907,26 +1924,28 @@ static void op_commit(struct xe_vm *vm, if (!op->map.immediate && xe_vm_in_fault_mode(vm)) break; - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, + fence2); break; case DRM_GPUVA_OP_REMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); + gpuva_to_vma(op->base.remap.unmap->va), fence, + fence2); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); + fence, fence2); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); + fence, fence2); break; case DRM_GPUVA_OP_UNMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); + gpuva_to_vma(op->base.unmap.va), fence, fence2); break; case DRM_GPUVA_OP_PREFETCH: bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); + gpuva_to_vma(op->base.prefetch.va), fence, fence2); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -1963,7 +1982,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; + struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence_chain *chain_fence = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -1996,6 +2016,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) err = -ENOMEM; goto kill_vm_tile1; } + if (tile->media_gt) { + mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!mfence) { + err = -ENOMEM; + goto free_ifence; + } + chain_fence = dma_fence_chain_alloc(); + if (!chain_fence) { + err = -ENOMEM; + goto free_ifence; + } + } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -2027,19 +2059,46 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) /* tlb invalidation must be done before signaling rebind */ if (ifence) { + if (mfence) + dma_fence_get(fence); invalidation_fence_init(tile->primary_gt, ifence, fence, pt_update_ops->start, pt_update_ops->last, vm->usm.asid); - fence = &ifence->base.base; + if (mfence) { + invalidation_fence_init(tile->media_gt, mfence, fence, + pt_update_ops->start, + pt_update_ops->last, vm->usm.asid); + dma_fence_chain_init(chain_fence, &ifence->base.base, + &mfence->base.base, 0); + fence = &chain_fence->base; + } else { + fence = &ifence->base.base; + } } - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); + if (!mfence) { + dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else { + dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, + &ifence->base.base, &mfence->base.base); + } if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); @@ -2049,6 +2108,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) free_rfence: kfree(rfence); free_ifence: + dma_fence_chain_free(chain_fence); + kfree(mfence); kfree(ifence); kill_vm_tile1: if (err != -EAGAIN && tile->id) From 321d6b4b9cbe3dd0bc99937d5e5b4d730b5b5798 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 21 Aug 2024 18:19:18 +0100 Subject: [PATCH 1374/1523] drm/xe: fixup xe_alloc_pf_queue kzalloc expects number of bytes, therefore we should convert the number of dw into bytes, otherwise we are likely just accessing beyond the array causing all kinds of carnage. Also fixup the error handling while we are here. v2: - Prefer kcalloc (dim) Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Signed-off-by: Matthew Auld Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Nirmoy Das Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821171917.417386-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 401c0527d914..0be4687bfc20 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -417,7 +417,10 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; pf_queue->gt = gt; - pf_queue->data = kzalloc(pf_queue->num_dw, GFP_KERNEL); + pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + spin_lock_init(&pf_queue->lock); INIT_WORK(&pf_queue->worker, pf_queue_work_func); From 3f53d050416e88122d53aabbadb1fede998004da Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 12:22:23 -0400 Subject: [PATCH 1375/1523] bcachefs: bch2_data_update_init() cleanup Factor out some helpers - this function has gotten much too big. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 177 +++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 77 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1ca628e93e87..5f49f4953b19 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -20,6 +20,76 @@ #include "subvolume.h" #include "trace.h" +static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + bch2_dev_put(bch2_dev_have_ref(c, ptr->dev)); +} + +static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + if (!bch2_dev_tryget(c, ptr->dev)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); + } + return false; + } + } + return true; +} + +static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } +} + +static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + if (ctxt) { + bool locked; + + move_ctxt_wait_event(ctxt, + (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) || + list_empty(&ctxt->ios)); + + if (!locked) + bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); + } else { + if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + + bucket = PTR_BUCKET_POS(ca, ptr2); + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } + return false; + } + } + } + return true; +} + static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_finish_enabled()) { @@ -355,17 +425,11 @@ void bch2_data_update_read_done(struct data_update *m, void bch2_data_update_exit(struct data_update *update) { struct bch_fs *c = update->op.c; - struct bkey_ptrs_c ptrs = - bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); - - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - if (c->opts.nocow_enabled) - bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(ca, ptr), 0); - bch2_dev_put(ca); - } + struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + if (c->opts.nocow_enabled) + bkey_nocow_unlock(c, k); + bkey_put_dev_refs(c, k); bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); bch2_bio_free_pages_pool(c, &update->op.wbio.bio); @@ -546,7 +610,6 @@ int bch2_data_update_init(struct btree_trans *trans, const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; - unsigned ptrs_locked = 0; int ret = 0; /* @@ -557,6 +620,15 @@ int bch2_data_update_init(struct btree_trans *trans, if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) return -BCH_ERR_data_update_done; + if (!bkey_get_dev_refs(c, k)) + return -BCH_ERR_data_update_done; + + if (c->opts.nocow_enabled && + !bkey_nocow_lock(c, ctxt, k)) { + bkey_put_dev_refs(c, k); + return -BCH_ERR_nocow_lock_blocked; + } + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -578,40 +650,24 @@ int bch2_data_update_init(struct btree_trans *trans, m->op.compression_opt = background_compression(io_opts); m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - bkey_for_each_ptr(ptrs, ptr) { - if (!bch2_dev_tryget(c, ptr->dev)) { - bkey_for_each_ptr(ptrs, ptr2) { - if (ptr2 == ptr) - break; - bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); - } - return -BCH_ERR_data_update_done; - } - } - unsigned durability_have = 0, durability_removing = 0; i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - bool locked; + if (!p.ptr.cached) { + rcu_read_lock(); + if (BIT(i) & m->data_opts.rewrite_ptrs) { + if (crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; - rcu_read_lock(); - if (((1U << i) & m->data_opts.rewrite_ptrs)) { - BUG_ON(p.ptr.cached); - - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached && - !((1U << i) & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); + m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } + rcu_read_unlock(); } - rcu_read_unlock(); /* * op->csum_type is normally initialized from the fs/file's @@ -626,24 +682,6 @@ int bch2_data_update_init(struct btree_trans *trans, if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) m->op.incompressible = true; - if (c->opts.nocow_enabled) { - if (ctxt) { - move_ctxt_wait_event(ctxt, - (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, - bucket, 0)) || - list_empty(&ctxt->ios)); - - if (!locked) - bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); - } else { - if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { - ret = -BCH_ERR_nocow_lock_blocked; - goto err; - } - } - ptrs_locked |= (1U << i); - } - i++; } @@ -664,7 +702,7 @@ int bch2_data_update_init(struct btree_trans *trans, /* if iter == NULL, it's just a promote */ if (iter) ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto done; + goto out; } m->op.nr_replicas = min(durability_removing, durability_required) + @@ -684,8 +722,7 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_data_update_to_text(&buf, m); WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); printbuf_exit(&buf); - ret = -BCH_ERR_data_update_done; - goto done; + goto out; } m->op.nr_replicas_required = m->op.nr_replicas; @@ -696,30 +733,16 @@ int bch2_data_update_init(struct btree_trans *trans, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - goto err; + goto out; } if (bkey_extent_is_unwritten(k)) { bch2_update_unwritten_extent(trans, m); - goto done; + goto out; } return 0; -err: - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - if ((1U << i) & ptrs_locked) - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - bch2_dev_put(ca); - i++; - } - - bch2_bkey_buf_exit(&m->k, c); - bch2_bio_free_pages_pool(c, &m->op.wbio.bio); - return ret; -done: +out: bch2_data_update_exit(m); return ret ?: -BCH_ERR_data_update_done; } From 8cc0e50614520c6c609c6ae32a65d0591b7865a1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:13:39 -0400 Subject: [PATCH 1376/1523] bcachefs: Fix "trying to move an extent, but nr_replicas=0" data_update_init() does a bunch of complicated stuff to decide how many replicas to add, since we only want to increase an extent's durability on an explicit rereplicate, but extent pointers may be on devices with different durability settings. There was a corner case when evacuating a device that had been set to durability=0 after data had been written to it, and extents on that device had already been rereplicated - then evacuate only needs to drop pointers on that device, not move them. So the assert for !m->op.nr_replicas was spurious; this was a perfectly legitimate case that needed to be handled. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5f49f4953b19..65176d51b502 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -695,16 +695,6 @@ int bch2_data_update_init(struct btree_trans *trans, * Increasing replication is an explicit operation triggered by * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ - if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - !durability_required) { - m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; - m->data_opts.rewrite_ptrs = 0; - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto out; - } - m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; @@ -716,17 +706,22 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); - if (!m->op.nr_replicas) { - struct printbuf buf = PRINTBUF; + m->op.nr_replicas_required = m->op.nr_replicas; - bch2_data_update_to_text(&buf, m); - WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); - printbuf_exit(&buf); + /* + * It might turn out that we don't need any new replicas, if the + * replicas or durability settings have been changed since the extent + * was written: + */ + if (!m->op.nr_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); goto out; } - m->op.nr_replicas_required = m->op.nr_replicas; - if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas From 548e7f51679bf0ec3cdc2027d780c5d06a2a7ac6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:24:26 -0400 Subject: [PATCH 1377/1523] bcachefs: setting bcachefs_effective.* xattrs is a noop bcachefs_effective.* xattrs show the options inherited from parent directories (as well as explicitly set); this namespace is not for setting bcachefs options. Change the .set() handler to a noop so that if e.g. rsync is copying xattrs it'll do the right thing, and only copy xattrs in the bcachefs.* namespace. We don't want to return an error, because that will cause rsync to bail out or get spammy. Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f2b4c17a0307..331f944d73dc 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -612,10 +612,20 @@ static int bch2_xattr_bcachefs_get_effective( name, buffer, size, true); } +/* Noop - xattrs in the bcachefs_effective namespace are inherited */ +static int bch2_xattr_bcachefs_set_effective(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *vinode, + const char *name, const void *value, + size_t size, int flags) +{ + return 0; +} + static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { .prefix = "bcachefs_effective.", .get = bch2_xattr_bcachefs_get_effective, - .set = bch2_xattr_bcachefs_set, + .set = bch2_xattr_bcachefs_set_effective, }; #endif /* NO_BCACHEFS_FS */ From 49203a6b9d12bfd1a223a67847a631a78f1cd782 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 15:08:12 -0400 Subject: [PATCH 1378/1523] bcachefs: Fix failure to relock in btree_node_get() discovered by new trans->locked asserts Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index f5d85b50b6f2..cc778d7e769e 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -974,6 +974,10 @@ retry: bch2_btree_node_wait_on_read(b); + ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: From e150a7e89c4727176d07f5a0a8966fc2af05821c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:18:34 -0400 Subject: [PATCH 1379/1523] bcachefs: Fix bch2_trigger_alloc assert On testing on an old mangled filesystem, we missed a case. Fixes: bd864bc2d907 ("bcachefs: Fix bch2_trigger_alloc when upgrading from old versions") Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fd3a2522bc3e..488d0710f7b9 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (likely(new.k->type == KEY_TYPE_alloc_v4)) { new_a = bkey_s_to_alloc_v4(new).v; } else { - BUG_ON(!(flags & BTREE_TRIGGER_gc)); + BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); ret = PTR_ERR_OR_ZERO(new_ka); From c2a503f3e98e191d86738f5438a3a2b69575c830 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:38:49 -0400 Subject: [PATCH 1380/1523] bcachefs: Fix bch2_bucket_gens_init() Comparing the wrong bpos - this was missed because normally bucket_gens_init() runs on brand new filesystems, but this bug caused it to overwrite bucket_gens keys with 0s when upgrading ancient filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 488d0710f7b9..ac933142aeda 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -556,7 +556,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) struct bpos pos = alloc_gens_pos(iter.pos, &offset); int ret2 = 0; - if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { + if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) { ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret2) From b8db1bd8020d5fecb3bf46cd8b954a657c20ba14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:13:16 -0400 Subject: [PATCH 1381/1523] bcachefs: fix time_stats_to_text() Fixes: 7423330e30ab ("bcachefs: prt_printf() now respects \r\n\t") Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 138320eaa2ad..1b8554460af4 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -416,7 +416,6 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE + 2); prt_printf(out, "\tsince mount\r\trecent\r\n"); - prt_printf(out, "recent"); printbuf_tabstops_reset(out); printbuf_tabstop_push(out, out->indent + 20); From cecc328240609df17395dfd0ea03cc813d8be36d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 11:25:39 -0400 Subject: [PATCH 1382/1523] bcachefs: fix missing bch2_err_str() Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9138944c5ae6..267c2336b115 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2469,8 +2469,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino : bch2_inode_unpack(inode_k, &inode); if (ret) { /* Should have been caught in dirents pass */ - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error looking up parent directory: %i", ret); + bch_err_msg(c, ret, "error looking up parent directory"); break; } From 1dceae4cc12aa6389d9a8706f0d2a94d1679e79d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 12:10:33 -0400 Subject: [PATCH 1383/1523] bcachefs: unlock_long() before resort in journal replay Fix another SRCU splat - this one pretty harmless. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d89eb43c5ce9..11368dfa96b2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -322,6 +322,7 @@ int bch2_journal_replay(struct bch_fs *c) } } + bch2_trans_unlock_long(trans); /* * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: From 3c5d0b72a8e8c19c960e8fefb7463067e58b6bc4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 15:22:55 -0400 Subject: [PATCH 1384/1523] bcachefs: fix failure to relock in bch2_btree_node_mem_alloc() We weren't always so strict about trans->locked state - but now we are, and new assertions are shaking some bugs out. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 17 +++++++++++ fs/bcachefs/btree_cache.h | 2 ++ fs/bcachefs/btree_update_interior.c | 46 ++++++++++++++++------------- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index cc778d7e769e..063725ecb2b3 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return b; } +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) +{ + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + /* Btree in memory cache - hash table */ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) @@ -736,6 +746,13 @@ out: start_time); memalloc_nofs_restore(flags); + + int ret = bch2_trans_relock(trans); + if (unlikely(ret)) { + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); + } + return b; err: mutex_lock(&bc->lock); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index c0eb87a057cc..f82064007127 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -12,6 +12,8 @@ struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); +void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *); + void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b3454d4619e8..8fd112026e7a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -317,6 +317,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, : 0; int ret; + b = bch2_btree_node_mem_alloc(trans, interior_node); + if (IS_ERR(b)) + return b; + + BUG_ON(b->ob.nr); + mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { struct btree_alloc *a = @@ -325,10 +331,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, obs = a->ob; bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); - goto mem_alloc; + goto out; } mutex_unlock(&c->btree_reserve_cache_lock); - retry: ret = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: @@ -341,7 +346,7 @@ retry: c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) - return ERR_PTR(ret); + goto err; if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; @@ -360,19 +365,16 @@ retry: bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); -mem_alloc: - b = bch2_btree_node_mem_alloc(trans, interior_node); +out: + bkey_copy(&b->key, &tmp.k); + b->ob = obs; six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); - /* we hold cannibalize_lock: */ - BUG_ON(IS_ERR(b)); - BUG_ON(b->ob.nr); - - bkey_copy(&b->key, &tmp.k); - b->ob = obs; - return b; +err: + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); } static struct btree *bch2_btree_node_alloc(struct btree_update *as, @@ -2439,6 +2441,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite } new_hash = bch2_btree_node_mem_alloc(trans, false); + ret = PTR_ERR_OR_ZERO(new_hash); + if (ret) + goto err; } path->intent_ref++; @@ -2446,14 +2451,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite commit_flags, skip_triggers); --path->intent_ref; - if (new_hash) { - mutex_lock(&c->btree_cache.lock); - list_move(&new_hash->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&new_hash->c.lock); - six_unlock_intent(&new_hash->c.lock); - } + if (new_hash) + bch2_btree_node_to_freelist(c, new_hash); +err: closure_sync(&cl); bch2_btree_cache_cannibalize_unlock(trans); return ret; @@ -2522,6 +2522,10 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id b = bch2_btree_node_mem_alloc(trans, false); bch2_btree_cache_cannibalize_unlock(trans); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + set_btree_node_fake(b); set_btree_node_need_rewrite(b); b->c.level = level; @@ -2553,7 +2557,7 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, bch2_btree_root_alloc_fake_trans(trans, id, level)); + bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) From 5dbfc4ef72f15508882aff58c307b8425cf037a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 15:04:15 -0400 Subject: [PATCH 1385/1523] bcachefs: fix failure to relock in btree_node_fill() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 063725ecb2b3..e52a06d3418c 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -873,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, bch2_btree_node_read(trans, b, sync); + int ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + if (!sync) return NULL; From 6575b8c9877c3dd1f7db1d0d61bd250a0bf18b6d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 19:31:20 -0400 Subject: [PATCH 1386/1523] bcachefs: Fix locking in bch2_ioc_setlabel() Fixes: 7a254053a590 ("bcachefs: support FS_IOC_SETFSLABEL") Reported-by: syzbot+7e9efdfec27fbde0141d@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index aea8132d2c40..99c7fe987c74 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -328,9 +328,8 @@ static int bch2_ioc_setlabel(struct bch_fs *c, mutex_lock(&c->sb_lock); strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); - ret = bch2_write_super(c); + mutex_unlock(&c->sb_lock); mnt_drop_write_file(file); return ret; From cab18be6957b6af8cbe3502fd5f6d7b9f02ccceb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 20:49:07 -0400 Subject: [PATCH 1387/1523] bcachefs: Fix replay_now_at() assert Journal replay, in the slowpath where we insert keys in journal order, was inserting keys in the wrong order; keys from early repair come last. Reported-by: syzbot+2c4fcb257ce2b6a29d0e@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 11368dfa96b2..36de1c6fe8c3 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -241,7 +241,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - return cmp_int(l->journal_seq, r->journal_seq); + /* + * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last + * + * journal_seq == 0 means that the key comes from early repair, and + * should be inserted last so as to avoid overflowing the journal + */ + return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } int bch2_journal_replay(struct bch_fs *c) From bdbdd4759f081ca2d0a5d9e8af21d742ffaf8439 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 21:10:45 -0400 Subject: [PATCH 1388/1523] bcachefs: Fix missing validation in bch2_sb_journal_v2_validate() Reported-by: syzbot+47ecc948aadfb2ab3efc@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_sb.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index db80e506e3ab..62b910f2fb27 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -104,6 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; + u64 sum = 0; unsigned nr; unsigned i; struct u64_range *b; @@ -119,6 +120,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f for (i = 0; i < nr; i++) { b[i].start = le64_to_cpu(journal->d[i].start); b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); + + if (b[i].end <= b[i].start) { + prt_printf(err, "journal buckets entry with bad nr: %llu+%llu", + le64_to_cpu(journal->d[i].start), + le64_to_cpu(journal->d[i].nr)); + goto err; + } + + sum += le64_to_cpu(journal->d[i].nr); } sort(b, nr, sizeof(*b), u64_range_cmp, NULL); @@ -148,6 +158,11 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f } } + if (sum > UINT_MAX) { + prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX); + goto err; + } + ret = 0; err: kfree(b); From 06f67437ab356e3140f51aea272d33ce28421f66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:06:44 -0400 Subject: [PATCH 1389/1523] fs/super.c: improve get_tree() error message seeing an odd bug where we fail to correctly return an error from .get_tree(): https://syzkaller.appspot.com/bug?extid=c0360e8367d6d8d04a66 we need to be able to distinguish between accidently returning a positive error (as implied by the log) and no error. Cc: David Howells Signed-off-by: Kent Overstreet --- fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/super.c b/fs/super.c index 38d72a3cf6fc..b7913b55debc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1802,8 +1802,8 @@ int vfs_get_tree(struct fs_context *fc) return error; if (!fc->root) { - pr_err("Filesystem %s get_tree() didn't set fc->root\n", - fc->fs_type->name); + pr_err("Filesystem %s get_tree() didn't set fc->root, returned %i\n", + fc->fs_type->name, error); /* We don't know what the locking state of the superblock is - * if there is a superblock. */ From 7f2de6947f92cfa4be8e5eaa1237e962bb8ee65f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:27:45 -0400 Subject: [PATCH 1390/1523] bcachefs: Fix warning in bch2_fs_journal_stop() j->last_empty_seq needs to match j->seq when the journal is empty Reported-by: syzbot+4093905737cf289b6b38@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 649e3a01608a..f5f7db50ca31 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1260,7 +1260,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) } if (!had_entries) - j->last_empty_seq = cur_seq; + j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); From 8ed823b19214e403ca485532f48c0e02035021ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:57:56 -0400 Subject: [PATCH 1391/1523] bcachefs: Fix compat issue with old alloc_v4 keys we allow new fields to be added to existing key types, and new versions should treat them as being zeroed; this was not handled in alloc_v4_validate. Reported-by: syzbot+3b2968fa4953885dd66a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 50 ++++++++++++++------------- fs/bcachefs/alloc_background_format.h | 1 + fs/bcachefs/btree_iter.h | 9 +++++ 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ac933142aeda..dc97b1f8bc08 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -240,71 +240,73 @@ fsck_err: int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags) { - struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + struct bch_alloc_v4 a; int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k)); + + bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k), c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k)); - bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) && + BCH_ALLOC_V4_NR_BACKPOINTERS(&a), c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type, c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + a.data_type, alloc_data_type(a, a.data_type)); for (unsigned i = 0; i < 2; i++) - bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX, c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", - a.v->io_time[i], LRU_TIME_MAX); + a.io_time[i], LRU_TIME_MAX); - unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) > + unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) > offsetof(struct bch_alloc_v4, stripe_sectors) - ? a.v->stripe_sectors + ? a.stripe_sectors : 0; - switch (a.v->data_type) { + switch (a.data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: bkey_fsck_err_on(stripe_sectors || - a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, + a.dirty_sectors || + a.cached_sectors || + a.stripe, c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, - a.v->dirty_sectors, - a.v->cached_sectors, - a.v->stripe); + a.dirty_sectors, + a.cached_sectors, + a.stripe); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors && + bkey_fsck_err_on(!a.dirty_sectors && !stripe_sectors, c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", - bch2_data_type_str(a.v->data_type)); + bch2_data_type_str(a.data_type)); break; case BCH_DATA_cached: - bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || + bkey_fsck_err_on(!a.cached_sectors || + a.dirty_sectors || stripe_sectors || - a.v->stripe, + a.stripe, c, alloc_key_cached_inconsistency, "data type inconsistency"); - bkey_fsck_err_on(!a.v->io_time[READ] && + bkey_fsck_err_on(!a.io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index 47d9d006502c..f754a2951d8a 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -69,6 +69,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + /* end of fields in original version of alloc_v4 */ __u64 fragmentation_lru; __u32 stripe_sectors; __u32 pad; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index dca62375d7d3..222b7ce8a901 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +#define bkey_val_copy(_dst_v, _src_k) \ +do { \ + unsigned b = min_t(unsigned, sizeof(*_dst_v), \ + bkey_val_bytes(_src_k.k)); \ + memcpy(_dst_v, _src_k.v, b); \ + if (b < sizeof(*_dst_v)) \ + memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ +} while (0) + static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned btree_id, struct bpos pos, unsigned flags, unsigned type, From 0b50b7313ef2494926df30ce8e2ce284f1b847fc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 23:21:52 -0400 Subject: [PATCH 1392/1523] bcachefs: Fix refcounting in discard path bch_dev->io_ref does not protect against the filesystem going away; bch_fs->writes does. Thus the filesystem write ref needs to be the last ref we release. Reported-by: syzbot+9e0404b505e604f67e41@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index dc97b1f8bc08..ba46f1c1d78a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1874,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_dev_do_discards(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_write_ref; if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_discard); -put_ioref: percpu_ref_put(&ca->io_ref); +put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_do_discards(struct bch_fs *c) From dedb2fe37574857c84e9598b9f5272505dedf7af Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 14:21:58 +0800 Subject: [PATCH 1393/1523] bcachefs: Fix double assignment in check_dirent_to_subvol() ret was assigned twice in check_dirent_to_subvol(). Reported by cocci. Signed-off-by: Yuesong Li Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 267c2336b115..6801c37ee803 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2006,7 +2006,6 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret) { bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); ret = -BCH_ERR_fsck_repair_unimplemented; - ret = 0; goto err; } From 87313ac1f134d6ee1e7c858da8bdea9147b537a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 02:13:02 -0400 Subject: [PATCH 1394/1523] bcachefs: clear path->should_be_locked in bch2_btree_key_cache_drop() bch2_btree_key_cache_drop() evicts the key cache entry - it's used when we're doing an update that bypasses the key cache, because for cache coherency reasons a key can't be in the key cache unless it also exists in the btree - i.e. creates have to bypass the cache. After evicting, the path no longer points to a key cache key, and relock() will always fail if should_be_locked is true. Prep for improving path->should_be_locked assertions Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 79954490627c..9b3ec2a3b8ce 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + path->should_be_locked = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, From 1d8c3c23a6bc1527e253b305b4b68c03d833b824 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 22 Aug 2024 07:17:09 +0000 Subject: [PATCH 1395/1523] KVM: arm64: Ensure canonical IPA is hugepage-aligned when handling fault Zenghui reports that VMs backed by hugetlb pages are no longer booting after commit fd276e71d1e7 ("KVM: arm64: nv: Handle shadow stage 2 page faults"). Support for shadow stage-2 MMUs introduced the concept of a fault IPA and canonical IPA to stage-2 fault handling. These are identical in the non-nested case, as the hardware stage-2 context is always that of the canonical IPA space. Both addresses need to be hugepage-aligned when preparing to install a hugepage mapping to ensure that KVM uses the correct GFN->PFN translation and installs that at the correct IPA for the current stage-2. And now I'm feeling thirsty after all this talk of IPAs... Fixes: fd276e71d1e7 ("KVM: arm64: nv: Handle shadow stage 2 page faults") Reported-by: Zenghui Yu Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240822071710.2291690-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6981b1bc0946..a509b63bd4dd 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1540,8 +1540,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = min(vma_pagesize, (long)max_map_size); } - if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) + /* + * Both the canonical IPA and fault IPA must be hugepage-aligned to + * ensure we find the right PFN and lay down the mapping in the right + * place. + */ + if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) { fault_ipa &= ~(vma_pagesize - 1); + ipa &= ~(vma_pagesize - 1); + } gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); From 3e6245ebe7ef341639e9a7e402b3ade8ad45a19f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Aug 2024 11:03:38 +0100 Subject: [PATCH 1396/1523] KVM: arm64: Make ICC_*SGI*_EL1 undef in the absence of a vGICv3 On a system with a GICv3, if a guest hasn't been configured with GICv3 and that the host is not capable of GICv2 emulation, a write to any of the ICC_*SGI*_EL1 registers is trapped to EL2. We therefore try to emulate the SGI access, only to hit a NULL pointer as no private interrupt is allocated (no GIC, remember?). The obvious fix is to give the guest what it deserves, in the shape of a UNDEF exception. Reported-by: Alexander Potapenko Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240820100349.3544850-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 6 ++++++ arch/arm64/kvm/vgic/vgic.h | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c90324060436..31e49da867ff 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -33,6 +33,7 @@ #include #include "sys_regs.h" +#include "vgic/vgic.h" #include "trace.h" @@ -435,6 +436,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, { bool g1; + if (!kvm_has_gicv3(vcpu->kvm)) { + kvm_inject_undefined(vcpu); + return false; + } + if (!p->is_write) return read_from_write_only(vcpu, p, r); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index ba8f790431bd..8532bfe3fed4 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -346,4 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); +static inline bool kvm_has_gicv3(struct kvm *kvm) +{ + return (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && + irqchip_in_kernel(kvm) && + kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3); +} + #endif From 6ea14ccb60c8ab829349979b22b58a941ec4a3ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 13 Aug 2024 12:39:46 +0200 Subject: [PATCH 1397/1523] netfilter: flowtable: validate vlan header Ensure there is sufficient room to access the protocol field of the VLAN header, validate it once before the flowtable lookup. ===================================================== BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] nf_ingress net/core/dev.c:5440 [inline] Fixes: 4cd91f7c290f ("netfilter: flowtable: add vlan support") Reported-by: syzbot+8407d9bb88cd4c6bf61a@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_inet.c | 3 +++ net/netfilter/nf_flow_table_ip.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88787b45e30d..8b541a080342 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return NF_ACCEPT; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); proto = veth->h_vlan_encapsulated_proto; break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index c2c005234dcd..98edcaa37b38 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return false; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; From f2916c83d746eb99f50f42c15cf4c47c2ea5f3b3 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 20 Aug 2024 11:04:25 +0800 Subject: [PATCH 1398/1523] net: ngbe: Fix phy mode set to external phy The MAC only has add the TX delay and it can not be modified. MAC and PHY are both set the TX delay cause transmission problems. So just disable TX delay in PHY, when use rgmii to attach to external phy, set PHY_INTERFACE_MODE_RGMII_RXID to phy drivers. And it is does not matter to internal phy. Fixes: bc2426d74aa3 ("net: ngbe: convert phylib to phylink") Signed-off-by: Mengyuan Lou Cc: stable@vger.kernel.org # 6.3+ Reviewed-by: Jacob Keller Link: https://patch.msgid.link/E6759CF1387CF84C+20240820030425.93003-1-mengyuanlou@net-swift.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index ec54b18c5fe7..a5e9b779c44d 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -124,8 +124,12 @@ static int ngbe_phylink_init(struct wx *wx) MAC_SYM_PAUSE | MAC_ASYM_PAUSE; config->mac_managed_pm = true; - phy_mode = PHY_INTERFACE_MODE_RGMII_ID; - __set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces); + /* The MAC only has add the Tx delay and it can not be modified. + * So just disable TX delay in PHY, and it is does not matter to + * internal phy. + */ + phy_mode = PHY_INTERFACE_MODE_RGMII_RXID; + __set_bit(PHY_INTERFACE_MODE_RGMII_RXID, config->supported_interfaces); phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops); if (IS_ERR(phylink)) From a2f5c505b4378cd6fc7c4a44ff3665ccef2037db Mon Sep 17 00:00:00 2001 From: Sava Jakovljev Date: Wed, 21 Aug 2024 04:16:57 +0200 Subject: [PATCH 1399/1523] net: phy: realtek: Fix setting of PHY LEDs Mode B bit on RTL8211F The current implementation incorrectly sets the mode bit of the PHY chip. Bit 15 (RTL8211F_LEDCR_MODE) should not be shifted together with the configuration nibble of a LED- it should be set independently of the index of the LED being configured. As a consequence, the RTL8211F LED control is actually operating in Mode A. Fix the error by or-ing final register value to write with a const-value of RTL8211F_LEDCR_MODE, thus setting Mode bit explicitly. Fixes: 17784801d888 ("net: phy: realtek: Add support for PHY LEDs on RTL8211F") Signed-off-by: Sava Jakovljev Reviewed-by: Marek Vasut Link: https://patch.msgid.link/PAWP192MB21287372F30C4E55B6DF6158C38E2@PAWP192MB2128.EURP192.PROD.OUTLOOK.COM Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 87865918dab6..25e5bfbb6f89 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -555,7 +555,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, unsigned long rules) { const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); - u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + u16 reg = 0; if (index >= RTL8211F_LED_COUNT) return -EINVAL; @@ -575,6 +575,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, } reg <<= RTL8211F_LEDCR_SHIFT * index; + reg |= RTL8211F_LEDCR_MODE; /* Mode B */ return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } From af688a99eb1fc7ef69774665d61e6be51cea627a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 21 Aug 2024 12:35:58 +0530 Subject: [PATCH 1400/1523] octeontx2-af: Fix CPT AF register offset calculation Some CPT AF registers are per LF and others are global. Translation of PF/VF local LF slot number to actual LF slot number is required only for accessing perf LF registers. CPT AF global registers access do not require any LF slot number. Also, there is no reason CPT PF/VF to know actual lf's register offset. Without this fix microcode loading will fail, VFs cannot be created and hardware is not usable. Fixes: bc35e28af789 ("octeontx2-af: replace cpt slot with lf id on reg write") Signed-off-by: Bharat Bhushan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240821070558.1020101-1-bbhushan2@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 3e09d2285814..daf4b951e905 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -632,7 +632,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu, return ret; } -static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) +static bool validate_and_update_reg_offset(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *req, + u64 *reg_offset) { u64 offset = req->reg_offset; int blkaddr, num_lfs, lf; @@ -663,6 +665,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) if (lf < 0) return false; + /* Translate local LF's offset to global CPT LF's offset to + * access LFX register. + */ + *reg_offset = (req->reg_offset & 0xFF000) + (lf << 3); + return true; } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { /* Registers that can be accessed from PF */ @@ -697,7 +704,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *rsp) { u64 offset = req->reg_offset; - int blkaddr, lf; + int blkaddr; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -708,18 +715,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - if (!is_valid_offset(rvu, req)) + if (!validate_and_update_reg_offset(rvu, req, &offset)) return CPT_AF_ERR_ACCESS_DENIED; - /* Translate local LF used by VFs to global CPT LF */ - lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, - (offset & 0xFFF) >> 3); - - /* Translate local LF's offset to global CPT LF's offset */ - offset &= 0xFF000; - offset += lf << 3; - - rsp->reg_offset = offset; + rsp->reg_offset = req->reg_offset; rsp->ret_val = req->ret_val; rsp->is_write = req->is_write; From 9a8fc292dd93b93db30e01c94c0da4c944852f28 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Aug 2024 14:30:53 +0300 Subject: [PATCH 1401/1523] spi: pxa2xx: Do not override dev->platform_data on probe The platform_data field may be supplied by legacy board code. In other cases we override it, and module remove and probe cycle will crash the kernel since it will carry a stale pointer. Fix this by supplying a third argument to the pxa2xx_spi_probe() and avoid overriding dev->platform_data. Reported-by: Hao Ma Fixes: cc160697a576 ("spi: pxa2xx: Convert PCI driver to use spi-pxa2xx code directly") Fixes: 3d8f037fbcab ("spi: pxa2xx: Move platform driver to a separate file") Fixes: 20ade9b9771c ("spi: pxa2xx: Extract pxa2xx_spi_platform_*() callbacks") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240822113408.750831-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 2 +- drivers/spi/spi-pxa2xx-platform.c | 6 ++---- drivers/spi/spi-pxa2xx.c | 5 ++--- drivers/spi/spi-pxa2xx.h | 3 ++- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 616d032f1a89..c98bb214b6ae 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -297,7 +297,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - return pxa2xx_spi_probe(&dev->dev, ssp); + return pxa2xx_spi_probe(&dev->dev, ssp, pdata); } static void pxa2xx_spi_pci_remove(struct pci_dev *dev) diff --git a/drivers/spi/spi-pxa2xx-platform.c b/drivers/spi/spi-pxa2xx-platform.c index 98a8ceb7db6f..f9504cddc7ba 100644 --- a/drivers/spi/spi-pxa2xx-platform.c +++ b/drivers/spi/spi-pxa2xx-platform.c @@ -63,7 +63,7 @@ static struct ssp_device *pxa2xx_spi_ssp_request(struct platform_device *pdev) ssp = pxa_ssp_request(pdev->id, pdev->name); if (!ssp) - return ssp; + return NULL; status = devm_add_action_or_reset(&pdev->dev, pxa2xx_spi_ssp_release, ssp); if (status) @@ -148,8 +148,6 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) platform_info = pxa2xx_spi_init_pdata(pdev); if (IS_ERR(platform_info)) return dev_err_probe(dev, PTR_ERR(platform_info), "missing platform data\n"); - - dev->platform_data = platform_info; } ssp = pxa2xx_spi_ssp_request(pdev); @@ -158,7 +156,7 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) if (!ssp) ssp = &platform_info->ssp; - return pxa2xx_spi_probe(dev, ssp); + return pxa2xx_spi_probe(dev, ssp, platform_info); } static void pxa2xx_spi_platform_remove(struct platform_device *pdev) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 16b96eb176cd..e3a95adc5279 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1277,16 +1277,15 @@ static size_t pxa2xx_spi_max_dma_transfer_size(struct spi_device *spi) return MAX_DMA_LEN; } -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp) +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info) { - struct pxa2xx_spi_controller *platform_info; struct spi_controller *controller; struct driver_data *drv_data; const struct lpss_config *config; int status; u32 tmp; - platform_info = dev_get_platdata(dev); if (platform_info->is_target) controller = devm_spi_alloc_target(dev, sizeof(*drv_data)); else diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index a470d3d634d3..447be0369384 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -132,7 +132,8 @@ extern void pxa2xx_spi_dma_stop(struct driver_data *drv_data); extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data); extern void pxa2xx_spi_dma_release(struct driver_data *drv_data); -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp); +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info); void pxa2xx_spi_remove(struct device *dev); extern const struct dev_pm_ops pxa2xx_spi_pm_ops; From e17465f78eb92ebb4be17e35d6c0584406f643a0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Aug 2024 14:30:54 +0300 Subject: [PATCH 1402/1523] spi: pxa2xx: Move PM runtime handling to the glue drivers PCI and platform buses have different defaults for runtime PM. In particular PCI probe is assumed to be called when PM runtime is enabled by the PCI core. In this case if we try enable it again the PM runtime complaints with pxa2xx_spi_pci 0000:00:07.0: Unbalanced pm_runtime_enable! Fix this by moving PM runtime handling from the SPI PXA2xx core to the glue drivers. Fixes: cc160697a576 ("spi: pxa2xx: Convert PCI driver to use spi-pxa2xx code directly") Fixes: 3d8f037fbcab ("spi: pxa2xx: Move platform driver to a separate file") Fixes: 20ade9b9771c ("spi: pxa2xx: Extract pxa2xx_spi_platform_*() callbacks") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240822113408.750831-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 15 ++++++++++++++- drivers/spi/spi-pxa2xx-platform.c | 22 ++++++++++++++++++++-- drivers/spi/spi-pxa2xx.c | 15 +-------------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index c98bb214b6ae..cc8dcf782399 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -297,11 +298,23 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - return pxa2xx_spi_probe(&dev->dev, ssp, pdata); + ret = pxa2xx_spi_probe(&dev->dev, ssp, pdata); + if (ret) + return ret; + + pm_runtime_set_autosuspend_delay(&dev->dev, 50); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + + return 0; } static void pxa2xx_spi_pci_remove(struct pci_dev *dev) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pxa2xx_spi_remove(&dev->dev); } diff --git a/drivers/spi/spi-pxa2xx-platform.c b/drivers/spi/spi-pxa2xx-platform.c index f9504cddc7ba..595af9fa4e0f 100644 --- a/drivers/spi/spi-pxa2xx-platform.c +++ b/drivers/spi/spi-pxa2xx-platform.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) struct pxa2xx_spi_controller *platform_info; struct device *dev = &pdev->dev; struct ssp_device *ssp; + int ret; platform_info = dev_get_platdata(dev); if (!platform_info) { @@ -156,12 +158,28 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) if (!ssp) ssp = &platform_info->ssp; - return pxa2xx_spi_probe(dev, ssp, platform_info); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + ret = pxa2xx_spi_probe(dev, ssp, platform_info); + if (ret) + pm_runtime_disable(dev); + + return ret; } static void pxa2xx_spi_platform_remove(struct platform_device *pdev) { - pxa2xx_spi_remove(&pdev->dev); + struct device *dev = &pdev->dev; + + pm_runtime_get_sync(dev); + + pxa2xx_spi_remove(dev); + + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); } static const struct acpi_device_id pxa2xx_spi_acpi_match[] = { diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index e3a95adc5279..bf1f34b0ffc8 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1449,24 +1449,16 @@ int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, } } - pm_runtime_set_autosuspend_delay(dev, 50); - pm_runtime_use_autosuspend(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - /* Register with the SPI framework */ dev_set_drvdata(dev, drv_data); status = spi_register_controller(controller); if (status) { dev_err_probe(dev, status, "problem registering SPI controller\n"); - goto out_error_pm_runtime_enabled; + goto out_error_clock_enabled; } return status; -out_error_pm_runtime_enabled: - pm_runtime_disable(dev); - out_error_clock_enabled: clk_disable_unprepare(ssp->clk); @@ -1483,8 +1475,6 @@ void pxa2xx_spi_remove(struct device *dev) struct driver_data *drv_data = dev_get_drvdata(dev); struct ssp_device *ssp = drv_data->ssp; - pm_runtime_get_sync(dev); - spi_unregister_controller(drv_data->controller); /* Disable the SSP at the peripheral and SOC level */ @@ -1495,9 +1485,6 @@ void pxa2xx_spi_remove(struct device *dev) if (drv_data->controller_info->enable_dma) pxa2xx_spi_dma_release(drv_data); - pm_runtime_put_noidle(dev); - pm_runtime_disable(dev); - /* Release IRQ */ free_irq(ssp->irq, drv_data); } From 453afb1a439994deeacb8d9ecbb48c1f2348ea0a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:34 +0300 Subject: [PATCH 1403/1523] drm/xe/display: fix compat IS_DISPLAY_STEP() range end It's supposed to be an open range at the end like in i915. Fingers crossed that nobody relies on this definition. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Reviewed-by: Lucas De Marchi Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/fe8743770694e429f6902491cdb306c97bdf701a.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 182c38905626..a7d206133922 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -83,7 +83,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 /* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) +#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) From fa509a33e34849cce6c17f969991dc87a864fbde Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:35 +0300 Subject: [PATCH 1404/1523] drm/xe/display: remove intel_display_step_name() to simplify The intel_display_step_name() is an unnecessary extra indirection. Simplify by just adding a macro to map intel_step_name() to xe_step_name(). We'll need to temporarily add a compat INTEL_DISPLAY_STEP() for this. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/cbea7def331cc9d2438da49ae344b9987f27cd12.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- drivers/gpu/drm/i915/intel_step.c | 5 ----- drivers/gpu/drm/i915/intel_step.h | 1 - drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 ++ drivers/gpu/drm/xe/compat-i915-headers/intel_step.h | 9 +-------- 5 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 73977b173898..7c756d5ba2a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -391,7 +391,7 @@ static const struct stepping_info * intel_get_stepping_info(struct drm_i915_private *i915, struct stepping_info *si) { - const char *step_name = intel_display_step_name(i915); + const char *step_name = intel_step_name(INTEL_DISPLAY_STEP(i915)); si->stepping = step_name[0]; si->substepping = step_name[1]; diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index a5adfb5d8fd2..80464e4edcce 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -275,8 +275,3 @@ const char *intel_step_name(enum intel_step step) return "**"; } } - -const char *intel_display_step_name(struct drm_i915_private *i915) -{ - return intel_step_name(RUNTIME_INFO(i915)->step.display_step); -} diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index b6f43b624774..96dfca4cba73 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -78,6 +78,5 @@ enum intel_step { void intel_step_init(struct drm_i915_private *i915); const char *intel_step_name(enum intel_step step); -const char *intel_display_step_name(struct drm_i915_private *i915); #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index a7d206133922..3132a328e42c 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -82,6 +82,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 +#define INTEL_DISPLAY_STEP(xe) ((xe)->info.step.display) + /* Workarounds not handled yet */ #define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h index 0006ef812346..ee3f45b668b9 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -6,15 +6,8 @@ #ifndef __INTEL_STEP_H__ #define __INTEL_STEP_H__ -#include "xe_device_types.h" #include "xe_step.h" -#define intel_display_step_name xe_display_step_name - -static inline -const char *xe_display_step_name(struct xe_device *xe) -{ - return xe_step_name(xe->info.step.display); -} +#define intel_step_name xe_step_name #endif /* __INTEL_STEP_H__ */ From 7bb98ca6542e19a9da7cc322114bb4867acd9614 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:36 +0300 Subject: [PATCH 1405/1523] drm/xe/display: remove the unused compat HAS_GMD_ID() The display code no longer needs or uses HAS_GMD_ID(). Remove it from the compat header. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/67571bb0b10fb5c99dba57757f2213ef39bdedbb.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 3132a328e42c..cd2cb9c74981 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -80,8 +80,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_MOBILE(xe) (xe && 0) -#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 - #define INTEL_DISPLAY_STEP(xe) ((xe)->info.step.display) /* Workarounds not handled yet */ From 85878978fcd2812eaca309ead436bcd38777f6f7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:37 +0300 Subject: [PATCH 1406/1523] drm/xe/step: define more steppings E-J These are primarily needed for compat reasons with display code in upcoming changes. There's no harm in having them. While at it, add a comment about the requirement to match against GMD ID value spacing. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/88074394509d4849f8ec6ab5776394b961032cd0.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_step_types.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h index ccc9b4795e95..95b38d2d6c50 100644 --- a/drivers/gpu/drm/xe/xe_step_types.h +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -17,6 +17,10 @@ struct xe_step_info { #define STEP_ENUM_VAL(name) STEP_##name, +/* + * Always define four minor steppings 0-3 for each stepping to match GMD ID + * spacing of values. See xe_step_gmdid_get(). + */ #define STEP_NAME_LIST(func) \ func(A0) \ func(A1) \ @@ -34,7 +38,30 @@ struct xe_step_info { func(D1) \ func(D2) \ func(D3) \ - func(E0) + func(E0) \ + func(E1) \ + func(E2) \ + func(E3) \ + func(F0) \ + func(F1) \ + func(F2) \ + func(F3) \ + func(G0) \ + func(G1) \ + func(G2) \ + func(G3) \ + func(H0) \ + func(H1) \ + func(H2) \ + func(H3) \ + func(I0) \ + func(I1) \ + func(I2) \ + func(I3) \ + func(J0) \ + func(J1) \ + func(J2) \ + func(J3) /* * Symbolic steppings that do not match the hardware. These are valid both as gt From c8fc8346bdd097d624771e606aef93644c072ef2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:38 +0300 Subject: [PATCH 1407/1523] drm/i915/display: rename IS_DISPLAY_IP_RANGE() to IS_DISPLAY_VER_FULL() Unify macro naming. Be more in line with DISPLAY_VER() and IS_DISPLAY_VER(). Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/adb43f45ad0b1846c2cb9a5861ba1f727c41ae83.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_device.h | 4 ++-- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index ad60c676c84d..73d6c865693b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -161,7 +161,7 @@ enum intel_display_subplatform { #define SUPPORTS_TV(i915) (DISPLAY_INFO(i915)->supports_tv) /* Check that device has a display IP version within the specific range. */ -#define IS_DISPLAY_IP_RANGE(__i915, from, until) ( \ +#define IS_DISPLAY_VER_FULL(__i915, from, until) ( \ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ (DISPLAY_VER_FULL(__i915) >= (from) && \ DISPLAY_VER_FULL(__i915) <= (until))) @@ -182,7 +182,7 @@ enum intel_display_subplatform { * stepping bound for the specified IP version. */ #define IS_DISPLAY_IP_STEP(__i915, ipver, from, until) \ - (IS_DISPLAY_IP_RANGE((__i915), (ipver), (ipver)) && \ + (IS_DISPLAY_VER_FULL((__i915), (ipver), (ipver)) && \ IS_DISPLAY_STEP((__i915), (from), (until))) #define DISPLAY_INFO(i915) (__to_intel_display(i915)->info.__device_info) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 39ab3117265c..ef2fdbf97346 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1684,7 +1684,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, intel_dmc_load_program(dev_priv); /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */ - if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) + if (IS_DISPLAY_VER_FULL(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0, DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR); From 5a48d67ac336436ed4a79c1c9e036a409b502329 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:39 +0300 Subject: [PATCH 1408/1523] drm/i915/display: rename IS_DISPLAY_IP_STEP() to IS_DISPLAY_VER_STEP() Unify macro naming on VER. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/6861e02f3adf15d56e89890000eb195070c33c9b.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_device.h | 6 +++--- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- drivers/gpu/drm/i915/display/intel_hdcp.c | 6 +++--- drivers/gpu/drm/i915/display/intel_pmdemand.c | 2 +- drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 73d6c865693b..ccf1710cb9df 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -175,13 +175,13 @@ enum intel_display_subplatform { * hardware fix is present and the software workaround is no longer necessary. * E.g., * - * IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_B2) - * IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_C0, STEP_FOREVER) + * IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_B2) + * IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_C0, STEP_FOREVER) * * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper * stepping bound for the specified IP version. */ -#define IS_DISPLAY_IP_STEP(__i915, ipver, from, until) \ +#define IS_DISPLAY_VER_STEP(__i915, ipver, from, until) \ (IS_DISPLAY_VER_FULL((__i915), (ipver), (ipver)) && \ IS_DISPLAY_STEP((__i915), (from), (until))) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 4c91a2b69a09..65d8d48d6a9a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1340,7 +1340,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, /* Wa_14016291713 */ if ((IS_DISPLAY_VER(display, 12, 13) || - IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) && + IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) && crtc_state->has_psr && !crtc_state->has_panel_replay) { plane_state->no_fbc_reason = "PSR1 enabled (Wa_14016291713)"; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 05402ae6b569..94418f218448 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -42,11 +42,11 @@ intel_hdcp_disable_hdcp_line_rekeying(struct intel_encoder *encoder, return; if (DISPLAY_VER(dev_priv) >= 14) { - if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_D0, STEP_FOREVER)) + if (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_D0, STEP_FOREVER)) intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(hdcp->cpu_transcoder), 0, HDCP_LINE_REKEY_DISABLE); - else if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 1), STEP_B0, STEP_FOREVER) || - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(20, 0), STEP_B0, STEP_FOREVER)) + else if (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 1), STEP_B0, STEP_FOREVER) || + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(20, 0), STEP_B0, STEP_FOREVER)) intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, hdcp->cpu_transcoder), 0, TRANS_DDI_HDCP_LINE_REKEY_DISABLE); diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index 9ca981b7a12c..ceaf9e3147da 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -92,7 +92,7 @@ int intel_pmdemand_init(struct drm_i915_private *i915) &pmdemand_state->base, &intel_pmdemand_funcs); - if (IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) + if (IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) /* Wa_14016740474 */ intel_de_rmw(i915, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 572dcdf77453..257526362b39 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1868,14 +1868,14 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * cause issues if non-supported panels are used. */ if (!intel_dp->psr.panel_replay_enabled && - (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || + (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || IS_ALDERLAKE_P(dev_priv))) intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), 0, ADLP_1_BASED_X_GRANULARITY); /* Wa_16012604467:adlp,mtl[a0,b0] */ if (!intel_dp->psr.panel_replay_enabled && - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, MTL_CLKGATE_DIS_TRANS(dev_priv, cpu_transcoder), 0, @@ -2057,7 +2057,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) if (intel_dp->psr.sel_update_enabled) { /* Wa_16012604467:adlp,mtl[a0,b0] */ if (!intel_dp->psr.panel_replay_enabled && - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, MTL_CLKGATE_DIS_TRANS(dev_priv, cpu_transcoder), MTL_CLKGATE_DIS_TRANS_DMASC_GATING_DIS, 0); @@ -2542,7 +2542,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, /* Wa_14014971492 */ if (!crtc_state->has_panel_replay && - ((IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || + ((IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || IS_ALDERLAKE_P(dev_priv) || IS_TIGERLAKE(dev_priv))) && crtc_state->splitter.enable) crtc_state->psr2_su_area.y1 = 0; From 4446387572fb7952a387b8e45e8a96b4f08a2a53 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 21 Aug 2024 12:50:36 +0300 Subject: [PATCH 1409/1523] drm/i915/display: identify display steppings in display probe Both i915 and xe have code to identify display steppings. Start deduplicating this by, uh, adding a third copy in display code. This is not yet used for anything other than debug logging. We'll switch over later. For platforms before GMD ID, attach the mapping from PCI revision to stepping in the platform and subplatform descriptors. This is a considerably cleaner approach than having it completely separate. Also add a separate field for stepping in display runtime info, preserving the value from GMD ID. v2: Handle NULL subdesc (Matt) Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821095036.2044654-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_display_device.c | 225 +++++++++++++++++- .../drm/i915/display/intel_display_device.h | 3 +- .../drm/xe/compat-i915-headers/intel_step.h | 1 + 3 files changed, 217 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index b28d55fa0c3a..dfa923672492 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -16,14 +16,25 @@ #include "intel_display_power.h" #include "intel_display_reg_defs.h" #include "intel_fbc.h" +#include "intel_step.h" __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field initialization overrides for display info"); +struct stepping_desc { + const enum intel_step *map; /* revid to step map */ + size_t size; /* map size */ +}; + +#define STEP_INFO(_map) \ + .step_info.map = _map, \ + .step_info.size = ARRAY_SIZE(_map) + struct subplatform_desc { enum intel_display_subplatform subplatform; const char *name; const u16 *pciidlist; + struct stepping_desc step_info; }; struct platform_desc { @@ -31,6 +42,7 @@ struct platform_desc { const char *name; const struct subplatform_desc *subplatforms; const struct intel_display_device_info *info; /* NULL for GMD ID */ + struct stepping_desc step_info; }; #define PLATFORM(_platform) \ @@ -610,6 +622,13 @@ static const u16 skl_ulx_ids[] = { 0 }; +static const enum intel_step skl_steppings[] = { + [0x6] = STEP_G0, + [0x7] = STEP_H0, + [0x9] = STEP_J0, + [0xA] = STEP_I1, +}; + static const struct platform_desc skl_desc = { PLATFORM(SKYLAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -618,6 +637,7 @@ static const struct platform_desc skl_desc = { {}, }, .info = &skl_display, + STEP_INFO(skl_steppings), }; static const u16 kbl_ult_ids[] = { @@ -634,6 +654,16 @@ static const u16 kbl_ulx_ids[] = { 0 }; +static const enum intel_step kbl_steppings[] = { + [1] = STEP_B0, + [2] = STEP_B0, + [3] = STEP_B0, + [4] = STEP_C0, + [5] = STEP_B1, + [6] = STEP_B1, + [7] = STEP_C0, +}; + static const struct platform_desc kbl_desc = { PLATFORM(KABYLAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -642,6 +672,7 @@ static const struct platform_desc kbl_desc = { {}, }, .info = &skl_display, + STEP_INFO(kbl_steppings), }; static const u16 cfl_ult_ids[] = { @@ -706,6 +737,13 @@ static const struct platform_desc cml_desc = { BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) +static const enum intel_step bxt_steppings[] = { + [0xA] = STEP_C0, + [0xB] = STEP_C0, + [0xC] = STEP_D0, + [0xD] = STEP_E0, +}; + static const struct platform_desc bxt_desc = { PLATFORM(BROXTON), .info = &(const struct intel_display_device_info) { @@ -714,6 +752,11 @@ static const struct platform_desc bxt_desc = { .__runtime_defaults.ip.ver = 9, }, + STEP_INFO(bxt_steppings), +}; + +static const enum intel_step glk_steppings[] = { + [3] = STEP_B0, }; static const struct platform_desc glk_desc = { @@ -725,6 +768,7 @@ static const struct platform_desc glk_desc = { .__runtime_defaults.ip.ver = 10, }, + STEP_INFO(glk_steppings), }; #define ICL_DISPLAY \ @@ -773,6 +817,10 @@ static const u16 icl_port_f_ids[] = { 0 }; +static const enum intel_step icl_steppings[] = { + [7] = STEP_D0, +}; + static const struct platform_desc icl_desc = { PLATFORM(ICELAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -784,6 +832,7 @@ static const struct platform_desc icl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D) | BIT(PORT_E), }, + STEP_INFO(icl_steppings), }; static const struct intel_display_device_info jsl_ehl_display = { @@ -792,14 +841,21 @@ static const struct intel_display_device_info jsl_ehl_display = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), }; +static const enum intel_step jsl_ehl_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, +}; + static const struct platform_desc jsl_desc = { PLATFORM(JASPERLAKE), .info = &jsl_ehl_display, + STEP_INFO(jsl_ehl_steppings), }; static const struct platform_desc ehl_desc = { PLATFORM(ELKHARTLAKE), .info = &jsl_ehl_display, + STEP_INFO(jsl_ehl_steppings), }; #define XE_D_DISPLAY \ @@ -850,10 +906,23 @@ static const u16 tgl_uy_ids[] = { 0 }; +static const enum intel_step tgl_steppings[] = { + [0] = STEP_B0, + [1] = STEP_D0, +}; + +static const enum intel_step tgl_uy_steppings[] = { + [0] = STEP_A0, + [1] = STEP_C0, + [2] = STEP_C0, + [3] = STEP_D0, +}; + static const struct platform_desc tgl_desc = { PLATFORM(TIGERLAKE), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_TIGERLAKE_UY, "UY", tgl_uy_ids }, + { INTEL_DISPLAY_TIGERLAKE_UY, "UY", tgl_uy_ids, + STEP_INFO(tgl_uy_steppings) }, {}, }, .info = &(const struct intel_display_device_info) { @@ -866,6 +935,12 @@ static const struct platform_desc tgl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2) | BIT(PORT_TC3) | BIT(PORT_TC4) | BIT(PORT_TC5) | BIT(PORT_TC6), }, + STEP_INFO(tgl_steppings), +}; + +static const enum intel_step dg1_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, }; static const struct platform_desc dg1_desc = { @@ -876,6 +951,13 @@ static const struct platform_desc dg1_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), }, + STEP_INFO(dg1_steppings), +}; + +static const enum intel_step rkl_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, + [4] = STEP_C0, }; static const struct platform_desc rkl_desc = { @@ -892,6 +974,7 @@ static const struct platform_desc rkl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), }, + STEP_INFO(rkl_steppings), }; static const u16 adls_rpls_ids[] = { @@ -899,10 +982,24 @@ static const u16 adls_rpls_ids[] = { 0 }; +static const enum intel_step adl_s_steppings[] = { + [0x0] = STEP_A0, + [0x1] = STEP_A2, + [0x4] = STEP_B0, + [0x8] = STEP_B0, + [0xC] = STEP_C0, +}; + +static const enum intel_step adl_s_rpl_s_steppings[] = { + [0x4] = STEP_D0, + [0xC] = STEP_C0, +}; + static const struct platform_desc adl_s_desc = { PLATFORM(ALDERLAKE_S), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_ALDERLAKE_S_RAPTORLAKE_S, "RPL-S", adls_rpls_ids }, + { INTEL_DISPLAY_ALDERLAKE_S_RAPTORLAKE_S, "RPL-S", adls_rpls_ids, + STEP_INFO(adl_s_rpl_s_steppings) }, {}, }, .info = &(const struct intel_display_device_info) { @@ -913,6 +1010,7 @@ static const struct platform_desc adl_s_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_TC1) | BIT(PORT_TC2) | BIT(PORT_TC3) | BIT(PORT_TC4), }, + STEP_INFO(adl_s_steppings), }; #define XE_LPD_FEATURES \ @@ -986,15 +1084,34 @@ static const u16 adlp_rplp_ids[] = { 0 }; +static const enum intel_step adl_p_steppings[] = { + [0x0] = STEP_A0, + [0x4] = STEP_B0, + [0x8] = STEP_C0, + [0xC] = STEP_D0, +}; + +static const enum intel_step adl_p_adl_n_steppings[] = { + [0x0] = STEP_D0, +}; + +static const enum intel_step adl_p_rpl_pu_steppings[] = { + [0x4] = STEP_E0, +}; + static const struct platform_desc adl_p_desc = { PLATFORM(ALDERLAKE_P), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_ALDERLAKE_P_ALDERLAKE_N, "ADL-N", adlp_adln_ids }, - { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_U, "RPL-U", adlp_rplu_ids }, - { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_P, "RPL-P", adlp_rplp_ids }, + { INTEL_DISPLAY_ALDERLAKE_P_ALDERLAKE_N, "ADL-N", adlp_adln_ids, + STEP_INFO(adl_p_adl_n_steppings) }, + { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_P, "RPL-P", adlp_rplp_ids, + STEP_INFO(adl_p_rpl_pu_steppings) }, + { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_U, "RPL-U", adlp_rplu_ids, + STEP_INFO(adl_p_rpl_pu_steppings) }, {}, }, .info = &xe_lpd_display, + STEP_INFO(adl_p_steppings), }; static const struct intel_display_device_info xe_hpd_display = { @@ -1023,12 +1140,33 @@ static const u16 dg2_g12_ids[] = { 0 }; +static const enum intel_step dg2_g10_steppings[] = { + [0x0] = STEP_A0, + [0x1] = STEP_A0, + [0x4] = STEP_B0, + [0x8] = STEP_C0, +}; + +static const enum intel_step dg2_g11_steppings[] = { + [0x0] = STEP_B0, + [0x4] = STEP_C0, + [0x5] = STEP_C0, +}; + +static const enum intel_step dg2_g12_steppings[] = { + [0x0] = STEP_C0, + [0x1] = STEP_C0, +}; + static const struct platform_desc dg2_desc = { PLATFORM(DG2), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_DG2_G10, "G10", dg2_g10_ids }, - { INTEL_DISPLAY_DG2_G11, "G11", dg2_g11_ids }, - { INTEL_DISPLAY_DG2_G12, "G12", dg2_g12_ids }, + { INTEL_DISPLAY_DG2_G10, "G10", dg2_g10_ids, + STEP_INFO(dg2_g10_steppings) }, + { INTEL_DISPLAY_DG2_G11, "G11", dg2_g11_ids, + STEP_INFO(dg2_g11_steppings) }, + { INTEL_DISPLAY_DG2_G12, "G12", dg2_g12_ids, + STEP_INFO(dg2_g12_steppings) }, {}, }, .info = &xe_hpd_display, @@ -1261,13 +1399,66 @@ find_subplatform_desc(struct pci_dev *pdev, const struct platform_desc *desc) return NULL; } +static enum intel_step get_pre_gmdid_step(struct intel_display *display, + const struct stepping_desc *main, + const struct stepping_desc *sub) +{ + struct pci_dev *pdev = to_pci_dev(display->drm->dev); + const enum intel_step *map = main->map; + int size = main->size; + int revision = pdev->revision; + enum intel_step step; + + /* subplatform stepping info trumps main platform info */ + if (sub && sub->map && sub->size) { + map = sub->map; + size = sub->size; + } + + /* not all platforms define steppings, and it's fine */ + if (!map || !size) + return STEP_NONE; + + if (revision < size && map[revision] != STEP_NONE) { + step = map[revision]; + } else { + drm_warn(display->drm, "Unknown revision 0x%02x\n", revision); + + /* + * If we hit a gap in the revision to step map, use the information + * for the next revision. + * + * This may be wrong in all sorts of ways, especially if the + * steppings in the array are not monotonically increasing, but + * it's better than defaulting to 0. + */ + while (revision < size && map[revision] == STEP_NONE) + revision++; + + if (revision < size) { + drm_dbg_kms(display->drm, "Using display stepping for revision 0x%02x\n", + revision); + step = map[revision]; + } else { + drm_dbg_kms(display->drm, "Using future display stepping\n"); + step = STEP_FUTURE; + } + } + + drm_WARN_ON(display->drm, step == STEP_NONE); + + return step; +} + void intel_display_device_probe(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); const struct intel_display_device_info *info; struct intel_display_ip_ver ip_ver = {}; const struct platform_desc *desc; const struct subplatform_desc *subdesc; + enum intel_step step; /* Add drm device backpointer as early as possible. */ i915->display.drm = &i915->drm; @@ -1307,13 +1498,25 @@ void intel_display_device_probe(struct drm_i915_private *i915) DISPLAY_RUNTIME_INFO(i915)->subplatform = subdesc->subplatform; } - if (ip_ver.ver || ip_ver.rel || ip_ver.step) + if (ip_ver.ver || ip_ver.rel || ip_ver.step) { DISPLAY_RUNTIME_INFO(i915)->ip = ip_ver; + step = STEP_A0 + ip_ver.step; + if (step > STEP_FUTURE) { + drm_dbg_kms(display->drm, "Using future display stepping\n"); + step = STEP_FUTURE; + } + } else { + step = get_pre_gmdid_step(display, &desc->step_info, + subdesc ? &subdesc->step_info : NULL); + } - drm_info(&i915->drm, "Found %s%s%s (device ID %04x) display version %u.%02u\n", + DISPLAY_RUNTIME_INFO(i915)->step = step; + + drm_info(&i915->drm, "Found %s%s%s (device ID %04x) display version %u.%02u stepping %s\n", desc->name, subdesc ? "/" : "", subdesc ? subdesc->name : "", pdev->device, DISPLAY_RUNTIME_INFO(i915)->ip.ver, - DISPLAY_RUNTIME_INFO(i915)->ip.rel); + DISPLAY_RUNTIME_INFO(i915)->ip.rel, + step != STEP_NONE ? intel_step_name(step) : "N/A"); return; diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index ccf1710cb9df..4615c3ba60aa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -201,8 +201,9 @@ struct intel_display_runtime_info { struct intel_display_ip_ver { u16 ver; u16 rel; - u16 step; + u16 step; /* hardware */ } ip; + int step; /* symbolic */ u32 rawclk_freq; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h index ee3f45b668b9..2cf13a572ab0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -8,6 +8,7 @@ #include "xe_step.h" +#define intel_step xe_step #define intel_step_name xe_step_name #endif /* __INTEL_STEP_H__ */ From dfc88eac97e2177844e5dfa877f2ff60c73bf507 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:41 +0300 Subject: [PATCH 1410/1523] drm/i915/display: switch to display detected steppings Move the stepping related macros over to display. We can proceed to remove the compat macros from xe. Note: Looks like we've failed to actually initialize the display stepping for GMD ID based platforms in the xe driver. It does get set in display runtime info, but until now the compat macro used xe->info.step.display which was not set for GMD ID. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/ce8bb94e1a801d3c345f1810837bdd1964c3af75.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_device.c | 2 ++ drivers/gpu/drm/i915/display/intel_display_device.h | 6 ++++++ drivers/gpu/drm/i915/i915_drv.h | 5 ----- drivers/gpu/drm/i915/intel_device_info.c | 1 - drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 5 ----- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index dfa923672492..1b46ba985580 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1715,6 +1715,8 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "display version: %u\n", runtime->ip.ver); + drm_printf(p, "display stepping: %s\n", intel_step_name(runtime->step)); + #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, str_yes_no(info->name)) DEV_INFO_DISPLAY_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 4615c3ba60aa..dfb0c8bf5ca2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -194,6 +194,12 @@ enum intel_display_subplatform { #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) +#define INTEL_DISPLAY_STEP(__i915) (DISPLAY_RUNTIME_INFO(__i915)->step) + +#define IS_DISPLAY_STEP(__i915, since, until) \ + (drm_WARN_ON(__to_intel_display(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ + INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) + struct intel_display_runtime_info { enum intel_display_platform platform; enum intel_display_subplatform subplatform; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 94f7f6cc444c..3b1b16e71cf9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -408,15 +408,10 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) #define INTEL_REVID(i915) (to_pci_dev((i915)->drm.dev)->revision) -#define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) #define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step) -#define IS_DISPLAY_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ - INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) - #define IS_GRAPHICS_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \ INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until)) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 91acbf99574c..d92b3050ac78 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -108,7 +108,6 @@ void intel_device_info_print(const struct intel_device_info *info, drm_printf(p, "graphics stepping: %s\n", intel_step_name(runtime->step.graphics_step)); drm_printf(p, "media stepping: %s\n", intel_step_name(runtime->step.media_step)); - drm_printf(p, "display stepping: %s\n", intel_step_name(runtime->step.display_step)); drm_printf(p, "base die stepping: %s\n", intel_step_name(runtime->step.basedie_step)); drm_printf(p, "gt: %d\n", info->gt); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index cd2cb9c74981..b7b12b20e390 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -80,11 +80,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_MOBILE(xe) (xe && 0) -#define INTEL_DISPLAY_STEP(xe) ((xe)->info.step.display) - -/* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) - #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) #define IS_GEN9_BC(xe) (0) From e0446e122eae43c173d0c99375e1b2cc8da009a7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:42 +0300 Subject: [PATCH 1411/1523] drm/i915: remove display stepping handling The code is now unused. Remove. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/d64c5e8563dcb8858569d5578230f6b675acdafb.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_step.c | 79 ++++++++++++++----------------- drivers/gpu/drm/i915/intel_step.h | 1 - 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 80464e4edcce..285b96fadfd5 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -23,8 +23,7 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x -#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x +#define COMMON_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x static const struct intel_step_info skl_revids[] = { [0x6] = { COMMON_STEP(G0) }, @@ -34,13 +33,13 @@ static const struct intel_step_info skl_revids[] = { }; static const struct intel_step_info kbl_revids[] = { - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, - [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 }, - [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 }, - [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 }, - [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 }, - [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 }, + [1] = { COMMON_STEP(B0) }, + [2] = { COMMON_STEP(C0) }, + [3] = { COMMON_STEP(D0) }, + [4] = { COMMON_STEP(F0) }, + [5] = { COMMON_STEP(C0) }, + [6] = { COMMON_STEP(D1) }, + [7] = { COMMON_STEP(G0) }, }; static const struct intel_step_info bxt_revids[] = { @@ -64,16 +63,16 @@ static const struct intel_step_info jsl_ehl_revids[] = { }; static const struct intel_step_info tgl_uy_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, - [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, - [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, + [2] = { COMMON_STEP(B1) }, + [3] = { COMMON_STEP(C0) }, }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct intel_step_info tgl_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, }; static const struct intel_step_info rkl_revids[] = { @@ -88,49 +87,49 @@ static const struct intel_step_info dg1_revids[] = { }; static const struct intel_step_info adls_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct intel_step_info adlp_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(C0) }, }; static const struct intel_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, }; static const struct intel_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, - [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x5] = { COMMON_STEP(B1) }, }; static const struct intel_step_info dg2_g12_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_C0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, }; static const struct intel_step_info adls_rpls_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_D0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, + [0x4] = { COMMON_STEP(D0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct intel_step_info adlp_rplp_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_E0 }, + [0x4] = { COMMON_STEP(C0) }, }; static const struct intel_step_info adlp_n_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, }; static u8 gmd_to_intel_step(struct drm_i915_private *i915, @@ -158,11 +157,6 @@ void intel_step_init(struct drm_i915_private *i915) &RUNTIME_INFO(i915)->graphics.ip); step.media_step = gmd_to_intel_step(i915, &RUNTIME_INFO(i915)->media.ip); - step.display_step = STEP_A0 + DISPLAY_RUNTIME_INFO(i915)->ip.step; - if (step.display_step >= STEP_FUTURE) { - drm_dbg(&i915->drm, "Using future display steppings\n"); - step.display_step = STEP_FUTURE; - } RUNTIME_INFO(i915)->step = step; @@ -252,7 +246,6 @@ void intel_step_init(struct drm_i915_private *i915) } else { drm_dbg(&i915->drm, "Using future steppings\n"); step.graphics_step = STEP_FUTURE; - step.display_step = STEP_FUTURE; } } diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 96dfca4cba73..83bd1190edf5 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -16,7 +16,6 @@ struct intel_step_info { * the expectation breaks gmd_to_intel_step(). */ u8 graphics_step; /* Represents the compute tile on Xe_HPC */ - u8 display_step; u8 media_step; u8 basedie_step; }; From 168448111279e639e3296ee5c19e0cd96756422e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:43 +0300 Subject: [PATCH 1412/1523] drm/xe: remove display stepping handling The code is now unused. Remove. Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/19bc7a3197f2bc6f3c0d337487ab19f3b7f5612a.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_debugfs.c | 3 +- drivers/gpu/drm/xe/xe_pci.c | 3 +- drivers/gpu/drm/xe/xe_step.c | 57 +++++++++++++----------------- drivers/gpu/drm/xe/xe_step_types.h | 1 - 4 files changed, 27 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1011e5d281fa..a64bae36e0e3 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -47,10 +47,9 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); - drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + drm_printf(&p, "stepping G:%s M:%s B:%s\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); drm_printf(&p, "platform %d\n", xe->info.platform); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 732ee0d02124..da090f0e592f 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -830,10 +830,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi); - drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, B:%s)\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n", diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index eaf1b718f26c..c77b5c317fa0 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -28,23 +28,17 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_GT_MEDIA_STEP(x_) \ +#define COMMON_STEP(x_) \ .graphics = STEP_##x_, \ .media = STEP_##x_ -#define COMMON_STEP(x_) \ - COMMON_GT_MEDIA_STEP(x_), \ - .graphics = STEP_##x_, \ - .media = STEP_##x_, \ - .display = STEP_##x_ - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct xe_step_info tgl_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, }; static const struct xe_step_info dg1_revids[] = { @@ -53,49 +47,49 @@ static const struct xe_step_info dg1_revids[] = { }; static const struct xe_step_info adls_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adls_rpls_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x4] = { COMMON_STEP(D0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adlp_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adlp_rpl_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, + [0x4] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adln_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, }; static const struct xe_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, }; static const struct xe_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, - [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x5] = { COMMON_STEP(B1) }, }; static const struct xe_step_info dg2_g12_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, }; static const struct xe_step_info pvc_revid_step_tbl[] = { @@ -195,7 +189,6 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) } else { drm_dbg(&xe->drm, "Using future steppings\n"); step.graphics = STEP_FUTURE; - step.display = STEP_FUTURE; } } diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h index 95b38d2d6c50..d978cc2512f2 100644 --- a/drivers/gpu/drm/xe/xe_step_types.h +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -11,7 +11,6 @@ struct xe_step_info { u8 graphics; u8 media; - u8 display; u8 basedie; }; From 1ac66c4960e1c735eb6edfd3e6d52bebb2aa347e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:44 +0100 Subject: [PATCH 1413/1523] MAINTAINERS: Add sonet.h to ATM section of MAINTAINERS This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" in their name. It seems that sonet.h is included in ATM related source files, and thus that ATM is the most relevant section for these files. Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a964a34651f5..c682203915a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3504,7 +3504,9 @@ S: Maintained W: http://linux-atm.sourceforge.net F: drivers/atm/ F: include/linux/atm* +F: include/linux/sonet.h F: include/uapi/linux/atm* +F: include/uapi/linux/sonet.h ATMEL MACB ETHERNET DRIVER M: Nicolas Ferre From eb208fecd77d898709c25af680487289fd5f3e16 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:45 +0100 Subject: [PATCH 1414/1523] MAINTAINERS: Add net_tstamp.h to SOCKET TIMESTAMPING section This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" in their name. Cc: Richard Cochran Cc: Willem de Bruijn Signed-off-by: Simon Horman Acked-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c682203915a2..e5b9a4d9bc21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21057,6 +21057,7 @@ SOCKET TIMESTAMPING M: Willem de Bruijn S: Maintained F: Documentation/networking/timestamping.rst +F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h F: tools/testing/selftests/net/so_txtime.c From 8cb0a938d90b25f123fcb2e24bbda9eaabd79c9e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:46 +0100 Subject: [PATCH 1415/1523] MAINTAINERS: Add limited globs for Networking headers This aims to add limited globs to improve the coverage of header files in the NETWORKING DRIVERS and NETWORKING [GENERAL] sections. It is done so in a minimal way to exclude overlap with other sections. And so as not to require "X" entries to exclude files otherwise matched by these new globs. While imperfect, due to it's limited nature, this does extend coverage of header files by these sections. And aims to automatically cover new files that seem very likely belong to these sections. The include/linux/netdev* glob (both sections) + Subsumes the entries for: - include/linux/netdevice.h + Extends the sections to cover - include/linux/netdevice_xmit.h - include/linux/netdev_features.h The include/uapi/linux/netdev* globs: (both sections) + Subsumes the entries for: - include/linux/netdevice.h + Extends the sections to cover - include/linux/netdev.h The include/linux/skbuff* glob (NETWORKING [GENERAL] section only): + Subsumes the entry for: - include/linux/skbuff.h + Extends the section to cover - include/linux/skbuff_ref.h A include/uapi/linux/net_* glob was not added to the NETWORKING [GENERAL] section. Although it would subsume the entry for include/uapi/linux/net_namespace.h, which is fine, it would also extend coverage to: - include/uapi/linux/net_dropmon.h, which belongs to the NETWORK DROP MONITOR section - include/uapi/linux/net_tstamp.h which, as per an earlier patch in this series, belongs to the SOCKET TIMESTAMPING section Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e5b9a4d9bc21..03d571b131eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15884,10 +15884,10 @@ F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h -F: include/linux/netdevice.h +F: include/linux/netdev* F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ @@ -15940,13 +15940,13 @@ F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h -F: include/linux/netdevice.h -F: include/linux/skbuff.h +F: include/linux/netdev* +F: include/linux/skbuff* F: include/net/ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* F: lib/net_utils.c F: lib/random32.c F: net/ From f2d20c9b97f0df64841b89fa1ad3e9c92f7377ae Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:47 +0100 Subject: [PATCH 1416/1523] MAINTAINERS: Add header files to NETWORKING sections This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" or "skbuff" in their name. This patch adds a number of such files to the NETWORKING DRIVERS and NETWORKING [GENERAL] sections. Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 03d571b131eb..798f1ffcbbaa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15879,13 +15879,16 @@ F: drivers/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h +F: include/linux/ethtool_netlink.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdev* +F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h +F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ @@ -15939,14 +15942,28 @@ F: include/linux/framer/framer-provider.h F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h +F: include/linux/inet.h +F: include/linux/inet_diag.h F: include/linux/net.h F: include/linux/netdev* +F: include/linux/netlink.h +F: include/linux/netpoll.h +F: include/linux/rtnetlink.h +F: include/linux/seq_file_net.h F: include/linux/skbuff* F: include/net/ +F: include/uapi/linux/genetlink.h +F: include/uapi/linux/hsr_netlink.h F: include/uapi/linux/in.h +F: include/uapi/linux/inet_diag.h +F: include/uapi/linux/nbd-netlink.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h +F: include/uapi/linux/netconf.h F: include/uapi/linux/netdev* +F: include/uapi/linux/netlink.h +F: include/uapi/linux/netlink_diag.h +F: include/uapi/linux/rtnetlink.h F: lib/net_utils.c F: lib/random32.c F: net/ From 46097a92662496394628cb41138e681d6074cce7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:48 +0100 Subject: [PATCH 1417/1523] MAINTAINERS: Mark JME Network Driver as Odd Fixes This driver only appears to have received sporadic clean-ups, typically part of some tree-wide activity, and fixes for quite some time. And according to the maintainer, Guo-Fu Tseng, the device has been EOLed for a long time (see Link). Accordingly, it seems appropriate to mark this driver as odd fixes. Cc: Moon Yeounsu Cc: Guo-Fu Tseng Link: https://lore.kernel.org/netdev/20240805003139.M94125@cooldavid.org/ Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 798f1ffcbbaa..0c94ec0ca478 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11995,7 +11995,7 @@ F: fs/jfs/ JME NETWORK DRIVER M: Guo-Fu Tseng L: netdev@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/jme.* JOURNALLING FLASH FILE SYSTEM V2 (JFFS2) From 3e878fe5a0b139838a65f50a3df3caf3299dbc24 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 03:57:39 -0400 Subject: [PATCH 1418/1523] bcachefs: add missing inode_walker_exit() fix a small leak Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 6801c37ee803..83bd31b44aad 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2215,6 +2215,8 @@ int bch2_check_xattrs(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); + + inode_walker_exit(&inode); bch_err_fn(c, ret); return ret; } From a592cdf5164d3feb821085df71f63e70e8b8b08c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:41:00 -0400 Subject: [PATCH 1419/1523] bcachefs: don't use rht_bucket() in btree_key_cache_scan() rht_bucket() does strange complicated things when a rehash is in progress. Instead, just skip scanning when a rehash is in progress: scanning is going to be more expensive (many more empty slots to cover), and some sort of infinite loop is being observed Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9b3ec2a3b8ce..fda7998734cb 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -778,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); + + /* + * Scanning is expensive while a rehash is in progress - most elements + * will be on the new hashtable, if it's in progress + * + * A rehash could still start while we're scanning - that's ok, we'll + * still see most elements. + */ + if (unlikely(tbl->nest)) { + rcu_read_unlock(); + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + return SHRINK_STOP; + } + if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; @@ -785,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, do { struct rhash_head *pos, *next; - pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); + pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); @@ -866,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); - if (tbl) + if (tbl) { + if (tbl->nest) { + /* wait for in progress rehash */ + rcu_read_unlock(); + mutex_lock(&bc->table.mutex); + mutex_unlock(&bc->table.mutex); + rcu_read_lock(); + continue; + } for (i = 0; i < tbl->size; i++) - rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { + while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) { + ck = container_of(pos, struct bkey_cached, hash); bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } + } rcu_read_unlock(); } From ce61b605a00502c59311d0a4b1f58d62b48272d0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 20 Aug 2024 22:07:38 +0900 Subject: [PATCH 1420/1523] ksmbd: the buffer of smb2 query dir response has at least 1 byte When STATUS_NO_MORE_FILES status is set to smb2 query dir response, ->StructureSize is set to 9, which mean buffer has 1 byte. This issue occurs because ->Buffer[1] in smb2_query_directory_rsp to flex-array. Fixes: eb3e28c1e89b ("smb3: Replace smb2pdu 1-element arrays with flex-arrays") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 0bc9edf22ba4..e9204180919e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4409,7 +4409,8 @@ int smb2_query_dir(struct ksmbd_work *work) rsp->OutputBufferLength = cpu_to_le32(0); rsp->Buffer[0] = 0; rc = ksmbd_iov_pin_rsp(work, (void *)rsp, - sizeof(struct smb2_query_directory_rsp)); + offsetof(struct smb2_query_directory_rsp, Buffer) + + 1); if (rc) goto err_out; } else { From 2186a116538a715b20e15f84fdd3545e5fe0a39b Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:50 +0000 Subject: [PATCH 1421/1523] smb/server: fix return value of smb2_open() In most error cases, error code is not returned in smb2_open(), __process_request() will not print error message. Fix this by returning the correct value at the end of smb2_open(). Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e9204180919e..55d4e69bd9c9 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3713,7 +3713,7 @@ err_out2: kfree(name); kfree(lc); - return 0; + return rc; } static int readdir_info_level_struct_sz(int info_level) From 4e8771a3666c8f216eefd6bd2fd50121c6c437db Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:51 +0000 Subject: [PATCH 1422/1523] smb/server: fix potential null-ptr-deref of lease_ctx_info in smb2_open() null-ptr-deref will occur when (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) and parse_lease_state() return NULL. Fix this by check if 'lease_ctx_info' is NULL. Additionally, remove the redundant parentheses in parse_durable_handle_context(). Signed-off-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/server/oplock.c | 2 +- fs/smb/server/smb2pdu.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index a8f52c4ebbda..e546ffa57b55 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1510,7 +1510,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request * - * Return: oplock state, -ENOENT if create lease context not found + * Return: allocated lease context object on success, otherwise NULL */ struct lease_ctx_info *parse_lease_state(void *open_req) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 55d4e69bd9c9..5d170ab0817d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2770,8 +2770,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, } } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { dh_info->CreateGuid = durable_v2_blob->CreateGuid; dh_info->persistent = @@ -2791,8 +2791,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { ksmbd_debug(SMB, "Request for durable open\n"); dh_info->type = dh_idx; } @@ -3414,7 +3414,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } } else { - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && lc) { if (S_ISDIR(file_inode(filp)->i_mode)) { lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE; lc->is_dir = true; From 0dd771b7d60b8281f10f6721783c60716d22075f Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:52 +0000 Subject: [PATCH 1423/1523] smb/server: remove useless assignment of 'file_present' in smb2_open() The variable is already true here. Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5d170ab0817d..cd23517d9640 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3096,7 +3096,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - file_present = true; idmap = mnt_idmap(path.mnt); } else { if (rc != -ENOENT) From 2b7e0573a49064d9c94c114b4471327cd96ae39c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:54 +0000 Subject: [PATCH 1424/1523] smb/server: update misguided comment of smb2_allocate_rsp_buf() smb2_allocate_rsp_buf() will return other error code except -ENOMEM. Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cd23517d9640..20846a4d3031 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -519,7 +519,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work) * smb2_allocate_rsp_buf() - allocate smb2 response buffer * @work: smb work containing smb request buffer * - * Return: 0 on success, otherwise -ENOMEM + * Return: 0 on success, otherwise error */ int smb2_allocate_rsp_buf(struct ksmbd_work *work) { From d7fd2941ae9a67423d1c7bee985f240e4686634f Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 21 Aug 2024 18:55:06 +0200 Subject: [PATCH 1425/1523] s390/boot: Avoid possible physmem_info segment corruption When physical memory for the kernel image is allocated it does not consider extra memory required for offsetting the image start to match it with the lower 20 bits of KASLR virtual base address. That might lead to kernel access beyond its memory range. Suggested-by: Vasily Gorbik Fixes: 693d41f7c938 ("s390/mm: Restore mapping of kernel image using large pages") Signed-off-by: Alexander Gordeev Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cff34744b5a9..d69f1dfd3b1e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -457,9 +457,9 @@ void startup_kernel(void) */ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - unsigned long end = ident_map_size - kaslr_large_page_offset; + unsigned long size = kernel_size + kaslr_large_page_offset; - __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end); + __kaslr_offset_phys = randomize_within_range(size, _SEGMENT_SIZE, 0, ident_map_size); } if (!__kaslr_offset_phys) __kaslr_offset_phys = nokaslr_offset_phys; From 1642285e511c2a40b14e87a41aa8feace6123036 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 21 Aug 2024 18:55:07 +0200 Subject: [PATCH 1426/1523] s390/boot: Fix KASLR base offset off by __START_KERNEL bytes Symbol offsets to the KASLR base do not match symbol address in the vmlinux image. That is the result of setting the KASLR base to the beginning of .text section as result of an optimization. Revert that optimization and allocate virtual memory for the whole kernel image including __START_KERNEL bytes as per the linker script. That allows keeping the semantics of the KASLR base offset in sync with other architectures. Rename __START_KERNEL to TEXT_OFFSET, since it represents the offset of the .text section within the kernel image, rather than a virtual address. Still skip mapping TEXT_OFFSET bytes to save memory on pgtables and provoke exceptions in case an attempt to access this area is made, as no kernel symbol may reside there. In case CONFIG_KASAN is enabled the location counter might exceed the value of TEXT_OFFSET, while the decompressor linker script forcefully resets it to TEXT_OFFSET, which leads to a sections overlap link failure. Use MAX() expression to avoid that. Reported-by: Omar Sandoval Closes: https://lore.kernel.org/linux-s390/ZnS8dycxhtXBZVky@telecaster.dhcp.thefacebook.com/ Fixes: 56b1069c40c7 ("s390/boot: Rework deployment of the kernel image") Signed-off-by: Alexander Gordeev Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 55 ++++++++++++++++++---------------- arch/s390/boot/vmem.c | 14 +++++++-- arch/s390/boot/vmlinux.lds.S | 7 ++++- arch/s390/include/asm/page.h | 3 +- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/s390/tools/relocs.c | 2 +- 6 files changed, 52 insertions(+), 31 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d69f1dfd3b1e..c73b5118ad42 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -162,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, loc = (long)*reloc + phys_offset; if (loc < min_addr || loc > max_addr) error("64-bit relocation outside of kernel!\n"); - *(u64 *)loc += offset - __START_KERNEL; + *(u64 *)loc += offset; } } @@ -177,7 +177,7 @@ static void kaslr_adjust_got(unsigned long offset) */ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { if (*entry) - *entry += offset - __START_KERNEL; + *entry += offset; } } @@ -252,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); @@ -389,31 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset) #endif } -static void fixup_vmlinux_info(void) -{ - vmlinux.entry -= __START_KERNEL; - kaslr_adjust_vmlinux_info(-__START_KERNEL); -} - void startup_kernel(void) { - unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size; - unsigned long nokaslr_offset_phys, kaslr_large_page_offset; - unsigned long amode31_lma = 0; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0; + unsigned long kernel_size = TEXT_OFFSET + vmlinux_size; + unsigned long kaslr_large_page_offset; unsigned long max_physmem_end; unsigned long asce_limit; unsigned long safe_addr; psw_t psw; - fixup_vmlinux_info(); setup_lpp(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE * aligned (see blow). */ - nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); - safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size); + nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); + safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size); /* * Reserve decompressor memory together with decompression heap, @@ -457,16 +451,27 @@ void startup_kernel(void) */ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - unsigned long size = kernel_size + kaslr_large_page_offset; + unsigned long size = vmlinux_size + kaslr_large_page_offset; - __kaslr_offset_phys = randomize_within_range(size, _SEGMENT_SIZE, 0, ident_map_size); + text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size); } - if (!__kaslr_offset_phys) - __kaslr_offset_phys = nokaslr_offset_phys; - __kaslr_offset_phys |= kaslr_large_page_offset; + if (!text_lma) + text_lma = nokaslr_text_lma; + text_lma |= kaslr_large_page_offset; + + /* + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is + * never accessed via the kernel image mapping as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, this region could be used for something else and does + * not need to be reserved. See how it is skipped in setup_vmem(). + */ + __kaslr_offset_phys = text_lma - TEXT_OFFSET; kaslr_adjust_vmlinux_info(__kaslr_offset_phys); - physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size); - deploy_kernel((void *)__kaslr_offset_phys); + physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size); + deploy_kernel((void *)text_lma); /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); @@ -489,7 +494,7 @@ void startup_kernel(void) amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); } if (!amode31_lma) - amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; + amode31_lma = text_lma - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* @@ -505,8 +510,8 @@ void startup_kernel(void) * - copy_bootdata() must follow setup_vmem() to propagate changes * to bootdata made by setup_vmem() */ - clear_bss_section(__kaslr_offset_phys); - kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size, + clear_bss_section(text_lma); + kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size, __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 2847cc059ab7..145035f84a0e 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -90,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern } memgap_start = end; } - kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW); + kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { @@ -475,7 +475,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l (unsigned long)__identity_va(end), POPULATE_IDENTITY); } - pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL); + + /* + * [kernel_start..kernel_start + TEXT_OFFSET] region is never + * accessed as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. + */ + pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index a750711d44c8..66670212a361 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -109,7 +109,12 @@ SECTIONS #ifdef CONFIG_KERNEL_UNCOMPRESSED . = ALIGN(PAGE_SIZE); . += AMODE31_SIZE; /* .amode31 section */ - . = ALIGN(1 << 20); /* _SEGMENT_SIZE */ + + /* + * Make sure the location counter is not less than TEXT_OFFSET. + * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead. + */ + . = MAX(TEXT_OFFSET, ALIGN(1 << 20)); #else . = ALIGN(8); #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 06416b3f94f5..16e4caa931f1 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -279,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define AMODE31_SIZE (3 * PAGE_SIZE) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#define __START_KERNEL 0x100000 #define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE #define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE) +#define TEXT_OFFSET 0x100000 + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index e67cd409b858..ae5d0a9d6911 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ PHDRS { SECTIONS { - . = __START_KERNEL; + . = TEXT_OFFSET; .text : { _stext = .; /* Start of text section */ _text = .; /* Text and read-only data */ diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c index a74dbd5c9896..30a732c808f3 100644 --- a/arch/s390/tools/relocs.c +++ b/arch/s390/tools/relocs.c @@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel) case R_390_GOTOFF64: break; case R_390_64: - add_reloc(&relocs64, offset - ehdr.e_entry); + add_reloc(&relocs64, offset); break; default: die("Unsupported relocation type: %d\n", r_type); From 15179cf2806f91685410e598f82813a7fcf90f6c Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 16 Aug 2024 16:47:39 -0500 Subject: [PATCH 1427/1523] smb3: fix problem unloading module due to leaked refcount on shutdown The shutdown ioctl can leak a refcount on the tlink which can prevent rmmod (unloading the cifs.ko) module from working. Found while debugging xfstest generic/043 Fixes: 69ca1f57555f ("smb3: add dynamic tracepoints for shutdown ioctl") Reviewed-by: Meetakshi Setiya Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 3 +++ fs/smb/client/ioctl.c | 2 ++ fs/smb/client/link.c | 1 + 3 files changed, 6 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d2307162a2de..c1c14274930a 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4194,6 +4194,9 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink) * * If one doesn't exist then insert a new tcon_link struct into the tree and * try to construct a new one. + * + * REMEMBER to call cifs_put_tlink() after successful calls to cifs_sb_tlink, + * to avoid refcount issues */ struct tcon_link * cifs_sb_tlink(struct cifs_sb_info *cifs_sb) diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 44dbaf9929a4..9bb5c869f4db 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -229,9 +229,11 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) shutdown_good: trace_smb3_shutdown_done(flags, tcon->tid); + cifs_put_tlink(tlink); return 0; shutdown_out_err: trace_smb3_shutdown_err(rc, flags, tcon->tid); + cifs_put_tlink(tlink); return rc; } diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index d86da949a919..80099bbb333b 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -588,6 +588,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); + /* BB could be clearer if skipped put_tlink on error here, but harmless */ goto symlink_exit; } pTcon = tlink_tcon(tlink); From ec686804117a0421cf31d54427768aaf93aa0069 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 21 Aug 2024 00:45:03 -0300 Subject: [PATCH 1428/1523] smb: client: ignore unhandled reparse tags Just ignore reparse points that the client can't parse rather than bailing out and not opening the file or directory. Reported-by: Marc <1marc1@gmail.com> Closes: https://lore.kernel.org/r/CAMHwNVv-B+Q6wa0FEXrAuzdchzcJRsPKDDRrNaYZJd6X-+iJzw@mail.gmail.com Fixes: 539aad7f14da ("smb: client: introduce ->parse_reparse_point()") Tested-by: Anthony Nandaa (Microsoft) Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/reparse.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 689d8a506d45..48c27581ec51 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -378,6 +378,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, bool unicode, struct cifs_open_info_data *data) { + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + data->reparse.buf = buf; /* See MS-FSCC 2.1.2 */ @@ -394,12 +396,13 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_LX_CHR: case IO_REPARSE_TAG_LX_BLK: - return 0; + break; default: - cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", - __func__, le32_to_cpu(buf->ReparseTag)); - return -EOPNOTSUPP; + cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", + le32_to_cpu(buf->ReparseTag)); + break; } + return 0; } int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, From 8fb4ac1cee88a57e7a56faba49b408a41a4af4db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 18 Aug 2024 16:07:11 +0900 Subject: [PATCH 1429/1523] kbuild: fix typos "prequisites" to "prerequisites" This typo in scripts/Makefile.build has been present for more than 20 years. It was accidentally copy-pasted to other scripts/Makefile.* files. Fix them all. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/Makefile.build | 2 +- scripts/Makefile.modfinal | 2 +- scripts/Makefile.vmlinux | 2 +- scripts/Makefile.vmlinux_o | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index efacca63c897..a5ac8ed1936f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -487,7 +487,7 @@ $(subdir-ym): need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 1fa98b5e952b..306a6bb86e4d 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -62,7 +62,7 @@ endif targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb96844..5ceecbed31eb 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -33,7 +33,7 @@ targets += vmlinux vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6de297916ce6..d64070b6b4bc 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -87,7 +87,7 @@ targets += modules.builtin modules.builtin: modules.builtin.modinfo FORCE $(call if_changed,modules_builtin) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE From 69f0925c67c21c1e3ccda152d2b6ce21c363e563 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:31 -0400 Subject: [PATCH 1430/1523] drm/xe: Removed unused xe_ggtt_printk Apparently this was only useful when enabling ggtt support for the very first time and never used again. It is also not useful now that we have the ggtt_dump available through debugfs. Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 20 -------------------- drivers/gpu/drm/xe/xe_ggtt.h | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0cdbc1296e88..add14f3dea1f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -314,26 +314,6 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) -{ - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; - u64 addr, scratch_pte; - - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); - - printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { - unsigned int i = addr / XE_PAGE_SIZE; - - xe_tile_assert(ggtt->tile, addr <= U32_MAX); - if (ggtt->gsm[i] == scratch_pte) - continue; - - printk("%s ggtt[0x%08x] = 0x%016llx", - prefix, (u32)addr, ggtt->gsm[i]); - } -} - static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, const struct drm_mm_node *node, const char *description) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 6a96fd54bf60..2546bab97507 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,7 +12,6 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); From 244fe1666364865154930f34d8df5489df1922b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:32 -0400 Subject: [PATCH 1431/1523] drm/xe: Introduce GGTT documentation Document xe_ggtt and ensure it is part of the built kernel docs. v2: - Accepted all Michal's suggestions - Rebased on top of new set_pte per platform/wa function pointer v3: - Typos and other acronym fixes (Michal) Cc: Matthew Brost Cc: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray #v1 Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/xe_mm.rst | 15 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 136 +++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_ggtt_types.h | 34 ++++++-- 3 files changed, 150 insertions(+), 35 deletions(-) diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst index 6c8fd8b4a466..95864a4502dd 100644 --- a/Documentation/gpu/xe/xe_mm.rst +++ b/Documentation/gpu/xe/xe_mm.rst @@ -7,6 +7,21 @@ Memory Management .. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h :doc: Buffer Objects (BO) +GGTT +==== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :doc: Global Graphics Translation Table (GGTT) + +GGTT Internal API +----------------- + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :internal: + Pagetable building ================== diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index add14f3dea1f..dd5cd0df705c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -164,12 +197,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +231,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -262,6 +276,12 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -382,6 +402,18 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_insert_special_node_locked - Locked version to insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags) { @@ -389,6 +421,15 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node mm_flags); } +/** + * xe_ggtt_insert_special_node - Insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align) { @@ -402,6 +443,11 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, return ret; } +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; @@ -449,17 +495,39 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } +/** + * xe_ggtt_remove_node - Remove a &drm_mm_node from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @node: the &drm_mm_node to be removed + * @invalidate: if node needs invalidation upon removal + */ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool invalidate) { @@ -488,6 +556,11 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, drm_dev_exit(idx); } +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { if (XE_WARN_ON(!bo->ggtt_node.size)) @@ -544,6 +617,13 @@ void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vf } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..154de298a4e3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,50 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; From df99acc7ba1be9f111debdb75e00539fed8ad21a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:33 -0400 Subject: [PATCH 1432/1523] drm/xe: Remove unnecessary drm_mm.h includes These includes are no longer necessary, and where appropriate are replaced by the linux/types.h one. Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.h | 2 +- drivers/gpu/drm/xe/xe_res_cursor.h | 1 - drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 7929cc2425e8..0109866e398a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -6,7 +6,7 @@ #ifndef _XE_MIGRATE_ #define _XE_MIGRATE_ -#include +#include struct dma_fence; struct iosys_map; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 655af89b31a9..dca374b6521c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -26,7 +26,6 @@ #include -#include #include #include #include diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f46fd2df84de..f7113cf6109d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -5,7 +5,6 @@ */ #include -#include #include #include From 6dbd43dcedf3b58a18eb3518e5c19e38a97aa68a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:34 -0400 Subject: [PATCH 1433/1523] drm/{i915, xe}: Avoid direct inspection of dpt_vma from outside dpt DPT code is so dependent on i915 vma implementation and it is not ported yet to Xe. This patch limits inspection to DPT's VMA struct to intel_dpt component only, so the Xe GGTT code can evolve. Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Juha-Pekka Heikkila Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dpt.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dpt.h | 3 +++ drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 ++- drivers/gpu/drm/xe/display/xe_fb_pin.c | 9 +++++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 73a1918e2537..3a6d99044828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -317,3 +317,7 @@ void intel_dpt_destroy(struct i915_address_space *vm) i915_vm_put(&dpt->vm); } +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return dpt_vma->node.start; +} diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h index ff18a525bfbe..1f88b0ee17e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.h +++ b/drivers/gpu/drm/i915/display/intel_dpt.h @@ -6,6 +6,8 @@ #ifndef __INTEL_DPT_H__ #define __INTEL_DPT_H__ +#include + struct drm_i915_private; struct i915_address_space; @@ -20,5 +22,6 @@ void intel_dpt_suspend(struct drm_i915_private *i915); void intel_dpt_resume(struct drm_i915_private *i915); struct i915_address_space * intel_dpt_create(struct intel_framebuffer *fb); +u64 intel_dpt_offset(struct i915_vma *dpt_vma); #endif /* __INTEL_DPT_H__ */ diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba5a628b4757..1cf1d5c8b9dc 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -14,6 +14,7 @@ #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_types.h" +#include "intel_dpt.h" #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -1162,7 +1163,7 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, * within the DPT is always 0. */ drm_WARN_ON(&i915->drm, plane_state->dpt_vma && - plane_state->dpt_vma->node.start); + intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(&i915->drm, offset & 0x1fffff); return offset >> 9; } else { diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d7db44e79eaf..42d431ff14e7 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -377,8 +377,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) } /* - * For Xe introduce dummy intel_dpt_create which just return NULL and - * intel_dpt_destroy which does nothing. + * For Xe introduce dummy intel_dpt_create which just return NULL, + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; */ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) { @@ -389,3 +389,8 @@ void intel_dpt_destroy(struct i915_address_space *vm) { return; } + +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return 0; +} From 6062ea9398d3ec09c521774f9d81f604d1a85fbd Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:35 -0400 Subject: [PATCH 1434/1523] drm/xe: Encapsulate drm_mm_node inside xe_ggtt_node The xe_ggtt component uses drm_mm to manage the GGTT. The drm_mm_node is just a node inside drm_mm, but in Xe we use that only in the GGTT context. So, this patch encapsulates the drm_mm_node into a xe_ggtt's new struct. This is the first step towards limiting all the drm_mm access through xe_ggtt. The ultimate goal is to have a better control of the node insertion and removal, so the removal can be delegated to a delayed workqueue. v2: Fix includes and typos (Michal and Brost) Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-5-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/compat-i915-headers/i915_vma.h | 7 +- drivers/gpu/drm/xe/display/xe_fb_pin.c | 10 +-- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 6 +- drivers/gpu/drm/xe/xe_bo_types.h | 5 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 72 +++++++++---------- drivers/gpu/drm/xe/xe_ggtt.h | 12 ++-- drivers/gpu/drm/xe/xe_ggtt_types.h | 8 +++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 39 +++++----- .../gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 5 +- 11 files changed, 90 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index a20d2638ea7a..3028ac1ba72f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -7,7 +7,8 @@ #define I915_VMA_H #include -#include + +#include "xe_ggtt_types.h" /* We don't want these from i915_drm.h in case of Xe */ #undef I915_TILING_X @@ -19,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct drm_mm_node node; + struct xe_ggtt_node node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -28,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.start; + return vma->node.base.start; } #endif diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 42d431ff14e7..a93923fb8721 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,7 +204,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node.base.size && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; @@ -218,7 +218,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -232,7 +232,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (ret) goto out_unlock; - ggtt_ofs = vma->node.start; + ggtt_ofs = vma->node.base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -325,8 +325,8 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.start != vma->node.start) + else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || + vma->bo->ggtt_node.base.start != vma->node.base.start) xe_ggtt_remove_node(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6ed0e1955215..1faef9903b87 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1120,7 +1120,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.size) + if (bo->ggtt_node.base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 935a94279026..9904b410be37 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -194,9 +194,9 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.size > bo->size); - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); - return bo->ggtt_node.start; + XE_WARN_ON(bo->ggtt_node.base.size > bo->size); + XE_WARN_ON(bo->ggtt_node.base.start + bo->ggtt_node.base.size > (1ull << 32)); + return bo->ggtt_node.base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ebc8abf7930a..4b1de9f5be00 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -8,12 +8,13 @@ #include -#include #include #include #include #include +#include "xe_ggtt_types.h" + struct xe_device; struct xe_vm; @@ -39,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct drm_mm_node ggtt_node; + struct xe_ggtt_node ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index cb60bc5ec21b..9f7d46a3260c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct drm_mm_node ggtt_balloon[2]; + struct xe_ggtt_node ggtt_balloon[2]; } vf; } sriov; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index dd5cd0df705c..1cf682d15253 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -351,61 +351,61 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, * @ggtt: the &xe_ggtt where we want to make reservation * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &drm_mm_node to hold reserved GGTT node + * @node: the &xe_ggtt_node to hold reserved GGTT node * * Use xe_ggtt_deballoon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_node *node) { int err; xe_tile_assert(ggtt->tile, start < end); xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - node->color = 0; - node->start = start; - node->size = end - start; + node->base.color = 0; + node->base.start = start; + node->base.size = end - start; mutex_lock(&ggtt->lock); - err = drm_mm_reserve_node(&ggtt->mm, node); + err = drm_mm_reserve_node(&ggtt->mm, &node->base); mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->start, node->start + node->size, ERR_PTR(err))) + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; - xe_ggtt_dump_node(ggtt, node, "balloon"); + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); return 0; } /** * xe_ggtt_deballoon - release a reserved GGTT region * @ggtt: the &xe_ggtt where reserved node belongs - * @node: the &drm_mm_node with reserved GGTT region + * @node: the &xe_ggtt_node with reserved GGTT region * * See xe_ggtt_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(node)) + if (!drm_mm_node_allocated(&node->base)) return; - xe_ggtt_dump_node(ggtt, node, "deballoon"); + xe_ggtt_dump_node(ggtt, &node->base, "deballoon"); mutex_lock(&ggtt->lock); - drm_mm_remove_node(node); + drm_mm_remove_node(&node->base); mutex_unlock(&ggtt->lock); } /** - * xe_ggtt_insert_special_node_locked - Locked version to insert a &drm_mm_node into the GGTT + * xe_ggtt_insert_special_node_locked - Locked version to insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted - * @node: the &drm_mm_node to be inserted + * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * @mm_flags: flags to control the node behavior @@ -414,23 +414,23 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, mm_flags); } /** - * xe_ggtt_insert_special_node - Insert a &drm_mm_node into the GGTT + * xe_ggtt_insert_special_node - Insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted - * @node: the &drm_mm_node to be inserted + * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align) { int ret; @@ -452,7 +452,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.start; + u64 start = bo->ggtt_node.base.start; u64 offset, pte; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { @@ -470,9 +470,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.size)) { + if (XE_WARN_ON(bo->ggtt_node.base.size)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); return 0; } @@ -482,7 +482,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node.base, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); @@ -523,12 +523,12 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) } /** - * xe_ggtt_remove_node - Remove a &drm_mm_node from the GGTT + * xe_ggtt_remove_node - Remove a &xe_ggtt_node from the GGTT * @ggtt: the &xe_ggtt where node will be removed - * @node: the &drm_mm_node to be removed + * @node: the &xe_ggtt_node to be removed * @invalidate: if node needs invalidation upon removal */ -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -541,9 +541,9 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, mutex_lock(&ggtt->lock); if (bound) - xe_ggtt_clear(ggtt, node->start, node->size); - drm_mm_remove_node(node); - node->size = 0; + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; mutex_unlock(&ggtt->lock); if (!bound) @@ -563,11 +563,11 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node.size)) + if (XE_WARN_ON(!bo->ggtt_node.base.size)) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); xe_ggtt_remove_node(ggtt, &bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); @@ -602,17 +602,17 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF * @ggtt: the &xe_ggtt where the node belongs - * @node: the &drm_mm_node to update + * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * * This function is used by the PF driver to assign a GGTT region to the VF. * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid) { mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, node, vfid); + xe_ggtt_assign_locked(ggtt, &node->base, vfid); mutex_unlock(&ggtt->lock); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 2546bab97507..30a521f7b075 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,15 +13,15 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align); int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct drm_mm_node *node, + struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); @@ -32,7 +32,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 154de298a4e3..af312a7d1031 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -49,6 +49,14 @@ struct xe_ggtt { unsigned int access_count; }; +/** + * struct xe_ggtt_node - A node in GGTT + */ +struct xe_ggtt_node { + /** @base: A drm_mm_node */ + struct drm_mm_node base; +}; + /** * struct xe_ggtt_pt_ops - GGTT Page table operations * Which can vary from platform to platform. diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 227527785afd..f107ae550c0f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -6,6 +6,9 @@ #include #include +/* FIXME: remove this after encapsulating all drm_mm_node access into xe_ggtt */ +#include + #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" @@ -232,14 +235,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region)) { + if (drm_mm_node_allocated(&config->ggtt_region.base)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.start); - cfg[n++] = upper_32_bits(config->ggtt_region.start); + cfg[n++] = lower_32_bits(config->ggtt_region.base.start); + cfg[n++] = upper_32_bits(config->ggtt_region.base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.size); - cfg[n++] = upper_32_bits(config->ggtt_region.size); + cfg[n++] = lower_32_bits(config->ggtt_region.base.size); + cfg[n++] = upper_32_bits(config->ggtt_region.base.size); } return n; @@ -369,11 +372,11 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = tile->mem.ggtt; - if (drm_mm_node_allocated(node)) { + if (drm_mm_node_allocated(&node->base)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by @@ -391,7 +394,7 @@ static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_confi static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = &config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -403,14 +406,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(node)) { + if (drm_mm_node_allocated(&node->base)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(node)); + xe_gt_assert(gt, !drm_mm_node_allocated(&node->base)); if (!size) return 0; @@ -421,9 +424,9 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) xe_ggtt_assign(ggtt, node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", - vfid, node->start, node->start + node->size - 1); + vfid, node->base.start, node->base.start + node->base.size - 1); - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) return err; @@ -433,10 +436,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = &config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(node) ? node->size : 0; + return drm_mm_node_allocated(&node->base) ? node->base.size : 0; } /** @@ -2025,13 +2028,13 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region)) + if (!drm_mm_node_allocated(&config->ggtt_region.base)) continue; - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.start, - config->ggtt_region.start + config->ggtt_region.size - 1, buf); + n, config->ggtt_region.base.start, + config->ggtt_region.base.start + config->ggtt_region.base.size - 1, buf); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 7bc66656fcc7..a73d9a4b9e64 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,8 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ -#include - +#include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" struct xe_bo; @@ -19,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct drm_mm_node ggtt_region; + struct xe_ggtt_node ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ From 0567f18e0757a260031e59487fe01f402c16c0de Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:36 -0400 Subject: [PATCH 1435/1523] drm/xe: Rename xe_ggtt_node related functions Bring some consistency and prepare for more xe_ggtt_node related functions to be introduced. Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-6-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 8 +- drivers/gpu/drm/xe/xe_ggtt.c | 86 +++++++++++----------- drivers/gpu/drm/xe/xe_ggtt.h | 12 +-- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 4 +- 4 files changed, 54 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index a93923fb8721..db74c3395ef8 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -209,8 +209,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); + ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); if (ret) goto out_unlock; @@ -227,8 +226,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); + ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); if (ret) goto out_unlock; @@ -327,7 +325,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) xe_bo_unpin_map_no_vm(vma->dpt); else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || vma->bo->ggtt_node.base.start != vma->node.base.start) - xe_ggtt_remove_node(ggtt, &vma->node, false); + xe_ggtt_node_remove(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 1cf682d15253..e0da24bb5774 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -403,7 +403,7 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) } /** - * xe_ggtt_insert_special_node_locked - Locked version to insert a &xe_ggtt_node into the GGTT + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node @@ -414,15 +414,15 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align, u32 mm_flags) +int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags) { return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, mm_flags); } /** - * xe_ggtt_insert_special_node - Insert a &xe_ggtt_node into the GGTT + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node @@ -430,19 +430,53 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align) +int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align) { int ret; mutex_lock(&ggtt->lock); - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); + ret = xe_ggtt_node_insert_locked(ggtt, node, size, + align, DRM_MM_INSERT_HIGH); mutex_unlock(&ggtt->lock); return ret; } +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + bool invalidate) +{ + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + if (bound) + xe_pm_runtime_get_noresume(xe); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + return; + + if (invalidate) + xe_ggtt_invalidate(ggtt); + + xe_pm_runtime_put(xe); + drm_dev_exit(idx); +} + /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped @@ -522,40 +556,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -/** - * xe_ggtt_remove_node - Remove a &xe_ggtt_node from the GGTT - * @ggtt: the &xe_ggtt where node will be removed - * @node: the &xe_ggtt_node to be removed - * @invalidate: if node needs invalidation upon removal - */ -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate) -{ - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); - drm_mm_remove_node(&node->base); - node->base.size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) - return; - - if (invalidate) - xe_ggtt_invalidate(ggtt); - - xe_pm_runtime_put(xe); - drm_dev_exit(idx); -} - /** * xe_ggtt_remove_bo - Remove a BO from the GGTT * @ggtt: the &xe_ggtt where node will be removed @@ -569,7 +569,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) /* This BO is not currently in the GGTT */ xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, + xe_ggtt_node_remove(ggtt, &bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 30a521f7b075..5147930357de 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -16,12 +16,12 @@ int xe_ggtt_init(struct xe_ggtt *ggtt); int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align); -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct xe_ggtt_node *node, - u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, + struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f107ae550c0f..3ca98f8a0eae 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -382,7 +382,7 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_remove_node(ggtt, node, false); + xe_ggtt_node_remove(ggtt, node, false); } } @@ -418,7 +418,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + err = xe_ggtt_node_insert(ggtt, node, size, alignment); if (unlikely(err)) return err; From 8b5ccc9743ab026b12075eb5e3883cc9e42bc683 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:37 -0400 Subject: [PATCH 1436/1523] drm/xe: Limit drm_mm_node_allocated access to xe_ggtt_node Continue with the encapsulation of drm_mm_node inside xe_ggtt. Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-7-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 12 ++++++------ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index db74c3395ef8..de4930b67a29 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -323,7 +323,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || + else if (!xe_ggtt_node_allocated(&vma->bo->ggtt_node) || vma->bo->ggtt_node.base.start != vma->node.base.start) xe_ggtt_node_remove(ggtt, &vma->node, false); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index e0da24bb5774..7c8bbaa30fca 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -477,6 +477,17 @@ void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, drm_dev_exit(idx); } +/** + * xe_ggtt_node_allocated - Check if node is allocated + * @node: the &xe_ggtt_node to be inspected + * + * Return: True if allocated, False otherwise. + */ +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +{ + return drm_mm_node_allocated(&node->base); +} + /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 5147930357de..f816b3c0732b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -23,6 +23,7 @@ int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3ca98f8a0eae..947750d97d7d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -235,7 +235,7 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region.base)) { + if (xe_ggtt_node_allocated(&config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); cfg[n++] = lower_32_bits(config->ggtt_region.base.start); cfg[n++] = upper_32_bits(config->ggtt_region.base.start); @@ -376,7 +376,7 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = tile->mem.ggtt; - if (drm_mm_node_allocated(&node->base)) { + if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by @@ -406,14 +406,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(&node->base)) { + if (xe_ggtt_node_allocated(node)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(&node->base)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); if (!size) return 0; @@ -439,7 +439,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_ggtt_node *node = &config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(&node->base) ? node->base.size : 0; + return xe_ggtt_node_allocated(node) ? node->base.size : 0; } /** @@ -2028,7 +2028,7 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region.base)) + if (!xe_ggtt_node_allocated(&config->ggtt_region)) continue; string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); From 1144e0dff5e68907cb8d3e2d64d1c00e2a96d1b2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:38 -0400 Subject: [PATCH 1437/1523] drm/xe: Introduce xe_ggtt_largest_hole Introduce a new xe_ggtt_largest_hole helper that attends the SRIOV demand and continue with the goal of limiting drm_mm access to xe_ggtt. v2: Fix a typo (Michal) Cc: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-8-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 35 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 23 ++------------ 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7c8bbaa30fca..2d055f489879 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -584,6 +584,41 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } +/** + * xe_ggtt_largest_hole - Largest GGTT hole + * @ggtt: the &xe_ggtt that will be inspected + * @alignment: minimum alignment + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare + * + * Return: size of the largest continuous GGTT region + */ +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + if (spare) + *spare -= min3(*spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + return max_hole; +} + #ifdef CONFIG_PCI_IOV static u64 xe_encode_vfid_pte(u16 vfid) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index f816b3c0732b..31060fe7644b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -29,6 +29,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 947750d97d7d..1852ff45bea4 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -590,30 +590,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_max_ggtt(struct xe_gt *gt) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); u64 spare = pf_get_spare_ggtt(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 max_hole = 0; + u64 max_hole; - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", - hole_start, hole_size / SZ_1K); - spare -= min3(spare, hole_size, max_hole); - max_hole = max(max_hole, hole_size); - } - - mutex_unlock(&ggtt->lock); + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", max_hole / SZ_1K, spare / SZ_1K); From 136367290ea5d7b5d05696189e9fd6162b9d9742 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:39 -0400 Subject: [PATCH 1438/1523] drm/xe: Introduce xe_ggtt_print_holes Introduce a new xe_ggtt_print_holes helper that attends the SRIOV demand and finishes the goal of limiting drm_mm access to xe_ggtt. Cc: Michal Wajdeczko Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-9-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 40 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 25 +------------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2d055f489879..960f5a28b7ed 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -682,3 +682,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) mutex_unlock(&ggtt->lock); return err; } + +/** + * xe_ggtt_print_holes - Print holes + * @ggtt: the &xe_ggtt to be inspected + * @alignment: min alignment + * @p: the &drm_printer + * + * Print GGTT ranges that are available and return total size available. + * + * Return: Total available size. + */ +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 total; + char buf[10]; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + return total; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 31060fe7644b..67ae5f1602a3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -32,6 +32,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 1852ff45bea4..e133594cc6bd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -6,9 +6,6 @@ #include #include -/* FIXME: remove this after encapsulating all drm_mm_node access into xe_ggtt */ -#include - #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" @@ -2103,11 +2100,7 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; u64 spare, avail, total = 0; char buf[10]; @@ -2116,24 +2109,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); spare = pf_get_spare_ggtt(gt); + total = xe_ggtt_print_holes(ggtt, alignment, p); - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - total += hole_size; - - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", - hole_start, hole_end - 1, buf); - } - - mutex_unlock(&ggtt->lock); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); From 15ca09499bc669b600dcdaf01fc0bf8c55e15b35 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:40 -0400 Subject: [PATCH 1439/1523] drm/xe: Refactor xe_ggtt balloon functions to make the node clear These operations are related to node. Convert them to the new appropriate name space xe_ggtt_node. v2: Also move arguments around for consistency (Lucas). v3: s/node_balloon/node_insert_balloon and s/node_deballoon/node_remove_balloon (Michal). Reviewed-by: Lucas De Marchi Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-10-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_ggtt.h | 5 +++-- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 12 +++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 960f5a28b7ed..d21474715bdb 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -347,17 +347,17 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses * @ggtt: the &xe_ggtt where we want to make reservation + * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &xe_ggtt_node to hold reserved GGTT node * - * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_node *node) +int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 start, u64 end) { int err; @@ -384,18 +384,18 @@ int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_nod } /** - * xe_ggtt_deballoon - release a reserved GGTT region + * xe_ggtt_node_remove_balloon - release a reserved GGTT region * @ggtt: the &xe_ggtt where reserved node belongs * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_balloon() for details. + * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) { if (!drm_mm_node_allocated(&node->base)) return; - xe_ggtt_dump_node(ggtt, &node->base, "deballoon"); + xe_ggtt_dump_node(ggtt, &node->base, "remove-balloon"); mutex_lock(&ggtt->lock); drm_mm_remove_node(&node->base); diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 67ae5f1602a3..e258a4f381b4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,8 +13,9 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 47222bd9988d..39ff4e7f902e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -528,7 +528,8 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); + err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0], + start, end); if (err) goto failed; } @@ -536,7 +537,8 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); + err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1], + start, end); if (err) goto deballoon; } @@ -544,7 +546,7 @@ static int vf_balloon_ggtt(struct xe_gt *gt) return 0; deballoon: - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); failed: return err; } @@ -555,8 +557,8 @@ static void deballoon_ggtt(struct drm_device *drm, void *arg) struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); } /** From 34e804220f693fe6bb1c4e50f27dd855e9313f0c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:41 -0400 Subject: [PATCH 1440/1523] drm/xe: Make xe_ggtt_node struct independent In some rare cases, the drm_mm node cannot be removed synchronously due to runtime PM conditions. In this situation, the node removal will be delegated to a workqueue that will be able to wake up the device before removing the node. However, in this situation, the lifetime of the xe_ggtt_node cannot be restricted to the lifetime of the parent object. So, this patch introduces the infrastructure so the xe_ggtt_node struct can be allocated in advance and freed when needed. By having the ggtt backpointer, it also ensure that the init function is always called before any attempt to insert or reserve the node in the GGTT. v2: s/xe_ggtt_node_force_fini/xe_ggtt_node_fini and use it internaly (Brost) v3: - Use GF_NOFS for node allocation (CI) - Avoid ggtt argument, now that we have it inside the node (Lucas) - Fix some missed fini cases (CI) v4: - Fix SRIOV critical case where config->ggtt_region was lost (Michal) - Avoid ggtt argument also on removal (missed case on v3) (Michal) - Remove useless checks (Michal) - Return 0 instead of negative errno on a u32 addr. (Michal) - s/xe_ggtt_assign/xe_ggtt_node_assign for coherence, while we are touching it (Michal) v5: - Fix VFs' ggtt_balloon Cc: Matthew Auld Cc: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-11-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/compat-i915-headers/i915_vma.h | 4 +- drivers/gpu/drm/xe/display/xe_fb_pin.c | 39 +++-- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 9 +- drivers/gpu/drm/xe/xe_bo_types.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 140 +++++++++++++----- drivers/gpu/drm/xe/xe_ggtt.h | 17 +-- drivers/gpu/drm/xe/xe_ggtt_types.h | 8 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 49 +++--- .../gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 46 +++--- 12 files changed, 215 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index 3028ac1ba72f..bdae8392e125 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -20,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct xe_ggtt_node node; + struct xe_ggtt_node *node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -29,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.base.start; + return vma->node->base.start; } #endif diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index de4930b67a29..d650c5ac41a4 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,20 +204,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.base.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.base.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -226,11 +234,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } - ggtt_ofs = vma->node.base.start; + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } + + ggtt_ofs = vma->node->base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -318,14 +334,11 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.base.start != vma->node.base.start) - xe_ggtt_node_remove(ggtt, &vma->node, false); + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || + vma->bo->ggtt_node->base.start != vma->node->base.start) + xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1faef9903b87..cbe7bf098970 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1120,7 +1120,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.base.size) + if (bo->ggtt_node && bo->ggtt_node->base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9904b410be37..dbfb3209615d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -194,9 +194,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.base.size > bo->size); - XE_WARN_ON(bo->ggtt_node.base.start + bo->ggtt_node.base.size > (1ull << 32)); - return bo->ggtt_node.base.start; + if (XE_WARN_ON(!bo->ggtt_node)) + return 0; + + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); + return bo->ggtt_node->base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 4b1de9f5be00..2ed558ac2264 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -40,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct xe_ggtt_node ggtt_node; + struct xe_ggtt_node *ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f7d46a3260c..121a403a1478 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node ggtt_balloon[2]; + struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index d21474715bdb..58d7cad2425b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -348,7 +348,6 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, /** * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses - * @ggtt: the &xe_ggtt where we want to make reservation * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region @@ -357,8 +356,9 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 start, u64 end) +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) { + struct xe_ggtt *ggtt = node->ggtt; int err; xe_tile_assert(ggtt->tile, start < end); @@ -385,77 +385,124 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, /** * xe_ggtt_node_remove_balloon - release a reserved GGTT region - * @ggtt: the &xe_ggtt where reserved node belongs * @node: the &xe_ggtt_node with reserved GGTT region * * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(&node->base)) + if (!node || !node->ggtt) return; - xe_ggtt_dump_node(ggtt, &node->base, "remove-balloon"); + if (!drm_mm_node_allocated(&node->base)) + goto free_node; - mutex_lock(&ggtt->lock); + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); + + mutex_lock(&node->ggtt->lock); drm_mm_remove_node(&node->base); - mutex_unlock(&ggtt->lock); + mutex_unlock(&node->ggtt->lock); + +free_node: + xe_ggtt_node_fini(node); } /** * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT - * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * @mm_flags: flags to control the node behavior * + * It cannot be called without first having called xe_ggtt_init() once. * To be used in cases where ggtt->lock is already taken. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } /** * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT - * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * + * It cannot be called without first having called xe_ggtt_init() once. + * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align) +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) { int ret; - mutex_lock(&ggtt->lock); - ret = xe_ggtt_node_insert_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); - mutex_unlock(&ggtt->lock); + if (!node || !node->ggtt) + return -ENOENT; + + mutex_lock(&node->ggtt->lock); + ret = xe_ggtt_node_insert_locked(node, size, align, + DRM_MM_INSERT_HIGH); + mutex_unlock(&node->ggtt->lock); return ret; } +/** + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. + * + * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() + * or xe_ggtt_node_remove_balloon(). + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. + **/ +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +{ + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); + + if (!node) + return ERR_PTR(-ENOMEM); + + node->ggtt = ggtt; + return node; +} + +/** + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct + * @node: the &xe_ggtt_node to be freed + * + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * this function needs to be called to free the %xe_ggtt_node struct + **/ +void xe_ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + /** * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT - * @ggtt: the &xe_ggtt where node will be removed * @node: the &xe_ggtt_node to be removed * @invalidate: if node needs invalidation upon removal */ -void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate) +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) { + struct xe_ggtt *ggtt = node->ggtt; struct xe_device *xe = tile_to_xe(ggtt->tile); bool bound; int idx; + if (!node || !node->ggtt) + return; + bound = drm_dev_enter(&xe->drm, &idx); if (bound) xe_pm_runtime_get_noresume(xe); @@ -468,23 +515,29 @@ void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, mutex_unlock(&ggtt->lock); if (!bound) - return; + goto free_node; if (invalidate) xe_ggtt_invalidate(ggtt); xe_pm_runtime_put(xe); drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); } /** - * xe_ggtt_node_allocated - Check if node is allocated + * xe_ggtt_node_allocated - Check if node is allocated in GGTT * @node: the &xe_ggtt_node to be inspected * * Return: True if allocated, False otherwise. */ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) { + if (!node || !node->ggtt) + return false; + return drm_mm_node_allocated(&node->base); } @@ -497,9 +550,14 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.base.start; + u64 start; u64 offset, pte; + if (XE_WARN_ON(!bo->ggtt_node)) + return; + + start = bo->ggtt_node->base.start; + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); @@ -515,9 +573,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.base.size)) { + if (XE_WARN_ON(bo->ggtt_node)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); return 0; } @@ -526,15 +584,26 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + + bo->ggtt_node = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node)) { + err = PTR_ERR(bo->ggtt_node); + goto out; + } + mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node.base, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (!err) + if (err) + xe_ggtt_node_fini(bo->ggtt_node); + else xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); + +out: xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; @@ -574,13 +643,13 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node.base.size)) + if (XE_WARN_ON(!bo->ggtt_node)) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); - xe_ggtt_node_remove(ggtt, &bo->ggtt_node, + xe_ggtt_node_remove(bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } @@ -647,7 +716,6 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF - * @ggtt: the &xe_ggtt where the node belongs * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * @@ -655,11 +723,11 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid) +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, &node->base, vfid); - mutex_unlock(&ggtt->lock); + mutex_lock(&node->ggtt->lock); + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); + mutex_unlock(&node->ggtt->lock); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index e258a4f381b4..27e7d67de004 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,17 +13,16 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); +void xe_ggtt_node_fini(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 size); -void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); -int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align); -int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, - struct xe_ggtt_node *node, +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate); +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); @@ -36,7 +35,7 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index af312a7d1031..0e8822ae13fc 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -50,9 +50,15 @@ struct xe_ggtt { }; /** - * struct xe_ggtt_node - A node in GGTT + * struct xe_ggtt_node - A node in GGTT. + * + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node + * insertion, reservation, or 'ballooning'. + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). */ struct xe_ggtt_node { + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ + struct xe_ggtt *ggtt; /** @base: A drm_mm_node */ struct drm_mm_node base; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index e133594cc6bd..c8835d9eead6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -232,14 +232,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (xe_ggtt_node_allocated(&config->ggtt_region)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.base.start); - cfg[n++] = upper_32_bits(config->ggtt_region.base.start); + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.base.size); - cfg[n++] = upper_32_bits(config->ggtt_region.base.size); + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); } return n; @@ -371,27 +371,26 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { - struct xe_ggtt *ggtt = tile->mem.ggtt; - if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_node_remove(ggtt, node, false); + xe_ggtt_node_remove(node, false); } } static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + config->ggtt_region = NULL; } static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -415,25 +414,33 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - err = xe_ggtt_node_insert(ggtt, node, size, alignment); - if (unlikely(err)) - return err; + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return PTR_ERR(node); - xe_ggtt_assign(ggtt, node, vfid); + err = xe_ggtt_node_insert(node, size, alignment); + if (unlikely(err)) + goto err; + + xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", vfid, node->base.start, node->base.start + node->base.size - 1); err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) - return err; + goto err; + config->ggtt_region = node; return 0; +err: + xe_ggtt_node_fini(node); + return err; } static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; @@ -2006,13 +2013,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!xe_ggtt_node_allocated(&config->ggtt_region)) + if (!xe_ggtt_node_allocated(config->ggtt_region)) continue; - string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, + buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.base.start, - config->ggtt_region.base.start + config->ggtt_region.base.size - 1, buf); + n, config->ggtt_region->base.start, + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, + buf); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index a73d9a4b9e64..2d3b73d78f14 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -18,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct xe_ggtt_node ggtt_region; + struct xe_ggtt_node *ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 39ff4e7f902e..4ebc82e607af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } +static struct xe_ggtt_node * +vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) +{ + struct xe_ggtt_node *node; + int err; + + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + err = xe_ggtt_node_insert_balloon(node, start, end); + if (err) { + xe_ggtt_node_fini(node); + return ERR_PTR(err); + } + + return node; +} + static int vf_balloon_ggtt(struct xe_gt *gt) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; @@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt) struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_device *xe = gt_to_xe(gt); u64 start, end; - int err; xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); @@ -528,37 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0], - start, end); - if (err) - goto failed; + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); } start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1], - start, end); - if (err) - goto deballoon; + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } } return 0; - -deballoon: - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); -failed: - return err; } static void deballoon_ggtt(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; - struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); } /** From 919bb54e989c1edef87e9797be125c94c450fc65 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:42 -0400 Subject: [PATCH 1441/1523] drm/xe: Fix missing runtime outer protection for ggtt_remove_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defer the ggtt node removal to a thread if runtime_pm is not active. The ggtt node removal can be called from multiple places, including places where we cannot protect with outer callers and places we are within other locks. So, try to grab the runtime reference if the device is already active, otherwise defer the removal to a separate thread from where we are sure we can wake the device up. v2: - use xe wq instead of system wq (Matt and CI) - Avoid GFP_KERNEL to be future proof since this removal can be called from outside our drivers and we don't want to block if atomic is needed. (Brost) v3: amend forgot chunk declaring xe_device. v4: Use a xe_ggtt_region to encapsulate the node and remova info, wihtout the need for any memory allocation at runtime. v5: Actually fill the delayed_removal.invalidate (Brost) v6: - Ensure that ggtt_region is not freed before work finishes (Auld) - Own wq to ensures that the queued works are flushed before ggtt_fini (Brost) v7: also free ggtt_region on early !bound return (Auld) v8: Address the null deref (CI) v9: Based on the new xe_ggtt_node for the proper care of the lifetime of the object. v10: Redo the lost v5 change. (Brost) v11: Simplify the invalidate_on_remove (Lucas) Cc: Matthew Auld Cc: Paulo Zanoni Cc: Francois Dugast Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-12-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 106 ++++++++++++++++++----------- drivers/gpu/drm/xe/xe_ggtt_types.h | 6 ++ 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 58d7cad2425b..b4a0cd2b62ed 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -161,6 +161,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; + destroy_workqueue(ggtt->wq); mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } @@ -242,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -276,6 +279,68 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_remove(struct xe_ggtt_node *node) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + if (!node || !node->ggtt) + return; + + bound = drm_dev_enter(&xe->drm, &idx); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + goto free_node; + + if (node->invalidate_on_remove) + xe_ggtt_invalidate(ggtt); + + drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); +} + +static void ggtt_node_remove_work_func(struct work_struct *work) +{ + struct xe_ggtt_node *node = container_of(work, typeof(*node), + delayed_removal_work); + struct xe_device *xe = tile_to_xe(node->ggtt->tile); + + xe_pm_runtime_get(xe); + ggtt_node_remove(node); + xe_pm_runtime_put(xe); +} + +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + + node->invalidate_on_remove = invalidate; + + if (xe_pm_runtime_get_if_active(xe)) { + ggtt_node_remove(node); + xe_pm_runtime_put(xe); + } else { + queue_work(ggtt->wq, &node->delayed_removal_work); + } +} + /** * xe_ggtt_init - Regular non-early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -471,7 +536,9 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) if (!node) return ERR_PTR(-ENOMEM); + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); node->ggtt = ggtt; + return node; } @@ -488,45 +555,6 @@ void xe_ggtt_node_fini(struct xe_ggtt_node *node) kfree(node); } -/** - * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT - * @node: the &xe_ggtt_node to be removed - * @invalidate: if node needs invalidation upon removal - */ -void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) -{ - struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - if (!node || !node->ggtt) - return; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); - drm_mm_remove_node(&node->base); - node->base.size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) - goto free_node; - - if (invalidate) - xe_ggtt_invalidate(ggtt); - - xe_pm_runtime_put(xe); - drm_dev_exit(idx); - -free_node: - xe_ggtt_node_fini(node); -} - /** * xe_ggtt_node_allocated - Check if node is allocated in GGTT * @node: the &xe_ggtt_node to be inspected diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 0e8822ae13fc..cb02b7994a9a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -47,6 +47,8 @@ struct xe_ggtt { struct drm_mm mm; /** @access_count: counts GGTT writes */ unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; }; /** @@ -61,6 +63,10 @@ struct xe_ggtt_node { struct xe_ggtt *ggtt; /** @base: A drm_mm_node */ struct drm_mm_node base; + /** @delayed_removal_work: The work struct for the delayed removal */ + struct work_struct delayed_removal_work; + /** @invalidate_on_remove: If it needs invalidation upon removal */ + bool invalidate_on_remove; }; /** From afc954fd223ded70b1fa000767e2531db55cce58 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:21 +0200 Subject: [PATCH 1442/1523] thermal: of: Fix OF node leak in thermal_of_trips_init() error path Terminating for_each_child_of_node() loop requires dropping OF node reference, so bailing out after thermal_of_populate_trip() error misses this. Solve the OF node reference leak with scoped for_each_child_of_node_scoped(). Fixes: d0c75fa2c17f ("thermal/of: Initialize trip points separately") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index aa34b6e82e26..30f8d6e70484 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -125,7 +125,7 @@ static int thermal_of_populate_trip(struct device_node *np, static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *ntrips) { struct thermal_trip *tt; - struct device_node *trips, *trip; + struct device_node *trips; int ret, count; trips = of_get_child_by_name(np, "trips"); @@ -150,7 +150,7 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n *ntrips = count; count = 0; - for_each_child_of_node(trips, trip) { + for_each_child_of_node_scoped(trips, trip) { ret = thermal_of_populate_trip(trip, &tt[count++]); if (ret) goto out_kfree; From 662b52b761bfe0ba970e5823759798faf809b896 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:22 +0200 Subject: [PATCH 1443/1523] thermal: of: Fix OF node leak in thermal_of_zone_register() thermal_of_zone_register() calls of_thermal_zone_find() which will iterate over OF nodes with for_each_available_child_of_node() to find matching thermal zone node. When it finds such, it exits the loop and returns the node. Prematurely ending for_each_available_child_of_node() loops requires dropping OF node reference, thus success of of_thermal_zone_find() means that caller must drop the reference. Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-2-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 30f8d6e70484..b08a9b64718d 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -491,7 +491,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); - return ERR_CAST(trips); + ret = PTR_ERR(trips); + goto out_of_node_put; } ret = thermal_of_monitor_init(np, &delay, &pdelay); @@ -519,6 +520,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * goto out_kfree_trips; } + of_node_put(np); kfree(trips); ret = thermal_zone_device_enable(tz); @@ -533,6 +535,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * out_kfree_trips: kfree(trips); +out_of_node_put: + of_node_put(np); return ERR_PTR(ret); } From c0a1ef9c5be72ff28a5413deb1b3e1a066593c13 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:23 +0200 Subject: [PATCH 1444/1523] thermal: of: Fix OF node leak in of_thermal_zone_find() error paths Terminating for_each_available_child_of_node() loop requires dropping OF node reference, so bailing out on errors misses this. Solve the OF node reference leak with scoped for_each_available_child_of_node_scoped(). Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-3-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index b08a9b64718d..1f252692815a 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -184,14 +184,14 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int * Search for each thermal zone, a defined sensor * corresponding to the one passed as parameter */ - for_each_available_child_of_node(np, tz) { + for_each_available_child_of_node_scoped(np, child) { int count, i; - count = of_count_phandle_with_args(tz, "thermal-sensors", + count = of_count_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells"); if (count <= 0) { - pr_err("%pOFn: missing thermal sensor\n", tz); + pr_err("%pOFn: missing thermal sensor\n", child); tz = ERR_PTR(-EINVAL); goto out; } @@ -200,18 +200,19 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int int ret; - ret = of_parse_phandle_with_args(tz, "thermal-sensors", + ret = of_parse_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells", i, &sensor_specs); if (ret < 0) { - pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", tz, ret); + pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", child, ret); tz = ERR_PTR(ret); goto out; } if ((sensor == sensor_specs.np) && id == (sensor_specs.args_count ? sensor_specs.args[0] : 0)) { - pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, tz); + pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, child); + tz = no_free_ptr(child); goto out; } } From 4ae738dfef2c0323752ab81786e2d298c9939321 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 22 Aug 2024 11:40:55 -0400 Subject: [PATCH 1445/1523] net: xilinx: axienet: Always disable promiscuous mode If promiscuous mode is disabled when there are fewer than four multicast addresses, then it will not be reflected in the hardware. Fix this by always clearing the promiscuous mode flag even when we program multicast addresses. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240822154059.1066595-2-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 02fdf66e07fa..163d05248007 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -450,6 +450,10 @@ static void axienet_set_multicast_list(struct net_device *ndev) } else if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; + reg = axienet_ior(lp, XAE_FMI_OFFSET); + reg &= ~XAE_FMI_PM_MASK; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + i = 0; netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) From 797a68c9de0f5a5447baf4bd3bb9c10a3993435b Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 22 Aug 2024 11:40:56 -0400 Subject: [PATCH 1446/1523] net: xilinx: axienet: Fix dangling multicast addresses If a multicast address is removed but there are still some multicast addresses, that address would remain programmed into the frame filter. Fix this by explicitly setting the enable bit for each filter. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240822154059.1066595-3-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 1 + .../net/ethernet/xilinx/xilinx_axienet_main.c | 21 ++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index c7d9221fafdc..09c9f9787180 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -170,6 +170,7 @@ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ #define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ +#define XAE_FFE_OFFSET 0x0000070C /* Frame Filter Enable */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 163d05248007..9aeb7b9f3ae4 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -432,7 +432,7 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p) */ static void axienet_set_multicast_list(struct net_device *ndev) { - int i; + int i = 0; u32 reg, af0reg, af1reg; struct axienet_local *lp = netdev_priv(ndev); @@ -454,7 +454,6 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - i = 0; netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) break; @@ -473,6 +472,7 @@ static void axienet_set_multicast_list(struct net_device *ndev) axienet_iow(lp, XAE_FMI_OFFSET, reg); axienet_iow(lp, XAE_AF0_OFFSET, af0reg); axienet_iow(lp, XAE_AF1_OFFSET, af1reg); + axienet_iow(lp, XAE_FFE_OFFSET, 1); i++; } } else { @@ -480,18 +480,15 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - - for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { - reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; - reg |= i; - - axienet_iow(lp, XAE_FMI_OFFSET, reg); - axienet_iow(lp, XAE_AF0_OFFSET, 0); - axienet_iow(lp, XAE_AF1_OFFSET, 0); - } - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } + + for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { + reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; + reg |= i; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + axienet_iow(lp, XAE_FFE_OFFSET, 0); + } } /** From 57fb67783c4011581882f32e656d738da1f82042 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 21 Aug 2024 20:32:52 +0800 Subject: [PATCH 1447/1523] net: ovs: fix ovs_drop_reasons error There is something wrong with ovs_drop_reasons. ovs_drop_reasons[0] is "OVS_DROP_LAST_ACTION", but OVS_DROP_LAST_ACTION == __OVS_DROP_REASON + 1, which means that ovs_drop_reasons[1] should be "OVS_DROP_LAST_ACTION". And as Adrian tested, without the patch, adding flow to drop packets results in: drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) origin: software input port ifindex: 8 timestamp: Tue Aug 20 10:19:17 2024 859853461 nsec protocol: 0x800 length: 98 original length: 98 drop reason: OVS_DROP_ACTION_ERROR With the patch, the same results in: drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) origin: software input port ifindex: 8 timestamp: Tue Aug 20 10:16:13 2024 475856608 nsec protocol: 0x800 length: 98 original length: 98 drop reason: OVS_DROP_LAST_ACTION Fix this by initializing ovs_drop_reasons with index. Fixes: 9d802da40b7c ("net: openvswitch: add last-action drop reason") Signed-off-by: Menglong Dong Tested-by: Adrian Moreno Reviewed-by: Adrian Moreno Link: https://patch.msgid.link/20240821123252.186305-1-dongml2@chinatelecom.cn Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99d72543abd3..78d9961fcd44 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = { }; static const char * const ovs_drop_reasons[] = { -#define S(x) (#x), +#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; From 0124fb0ebf3b0ef89892d42147c9387be3105318 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 21 Aug 2024 11:13:37 +0200 Subject: [PATCH 1448/1523] s390/iucv: Fix vargs handling in iucv_alloc_device() iucv_alloc_device() gets a format string and a varying number of arguments. This is incorrectly forwarded by calling dev_set_name() with the format string and a va_list, while dev_set_name() expects also a varying number of arguments. Symptoms: Corrupted iucv device names, which can result in log messages like: sysfs: cannot create duplicate filename '/devices/iucv/hvc_iucv1827699952' Fixes: 4452e8ef8c36 ("s390/iucv: Provide iucv_alloc_device() / iucv_release_device()") Link: https://bugzilla.suse.com/show_bug.cgi?id=1228425 Signed-off-by: Alexandra Winter Reviewed-by: Thorsten Winkler Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20240821091337.3627068-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/iucv/iucv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1e42e13ad24e..d3e9efab7f4b 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, { struct device *dev; va_list vargs; + char buf[20]; int rc; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto out_error; va_start(vargs, fmt); - rc = dev_set_name(dev, fmt, vargs); + vsnprintf(buf, sizeof(buf), fmt, vargs); + rc = dev_set_name(dev, "%s", buf); va_end(vargs); if (rc) goto out_error; From a54a93d0e3599b05856971734e15418ac551a14c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Aug 2024 09:35:27 +0800 Subject: [PATCH 1449/1523] nvme: move stopping keep-alive into nvme_uninit_ctrl() Commit 4733b65d82bd ("nvme: start keep-alive after admin queue setup") moves starting keep-alive from nvme_start_ctrl() into nvme_init_ctrl_finish(), but don't move stopping keep-alive into nvme_uninit_ctrl(), so keep-alive work can be started and keep pending after failing to start controller, finally use-after-free is triggered if nvme host driver is unloaded. This patch fixes kernel panic when running nvme/004 in case that connection failure is triggered, by moving stopping keep-alive into nvme_uninit_ctrl(). This way is reasonable because keep-alive is now started in nvme_init_ctrl_finish(). Fixes: 3af755a46881 ("nvme: move nvme_stop_keep_alive() back to original position") Cc: Hannes Reinecke Cc: Mark O'Donovan Reported-by: Changhui Zhong Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 33fa01c599ad..0dc8bcc664f2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4612,7 +4612,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); - nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4648,6 +4647,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); nvme_hwmon_exit(ctrl); nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); From fe01751347359862c65c715d51c0b3f4fa8ee2f0 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 14 Aug 2024 19:26:50 +0530 Subject: [PATCH 1450/1523] nvme: Remove unused field The "name" field in struct nvme_ctrl is unsued so removing it. This would help save 12 bytes of space for each nvme_ctrl instance created. Signed-off-by: Nilay Shroff Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ae5314d32943..da57947130cc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -301,7 +301,6 @@ struct nvme_ctrl { struct opal_dev *opal_dev; - char name[12]; u16 cntlid; u16 mtfa; From 5e51224d2afbda57f33f47485871ee5532145e18 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 20 Aug 2024 14:33:15 +0000 Subject: [PATCH 1451/1523] smb/client: fix typo: GlobalMid_Sem -> GlobalMid_Lock The comments have typos, fix that to not confuse readers. Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon --- fs/smb/client/cifsfs.c | 6 +++--- fs/smb/client/cifsglob.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2c4b357d85e2..d89485235425 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -75,9 +75,9 @@ unsigned int sign_CIFS_PDUs = 1; /* * Global transaction id (XID) information */ -unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 5c9b3e6cd95f..7ebe80a25d04 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2017,9 +2017,9 @@ extern spinlock_t cifs_tcp_ses_lock; /* * Global transaction id (XID) information */ -extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* From cb78f9b7d0c0c9f86d8c0ac9c46b8b684d8785a9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Aug 2024 10:18:41 -0400 Subject: [PATCH 1452/1523] nfs: fix the fetch of FATTR4_OPEN_ARGUMENTS The client doesn't properly request FATTR4_OPEN_ARGUMENTS in the initial SERVER_CAPS getattr. Add FATTR4_WORD2_OPEN_ARGUMENTS to the initial request. Fixes: 707f13b3d081 (NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute) Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8883016c551c..06df74362e94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3931,7 +3931,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_CASE_INSENSITIVE | FATTR4_WORD0_CASE_PRESERVING; if (minorversion) - bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { From 95832998fb6edc50d4f2f6a958d9f90142d4be48 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 21 Aug 2024 08:28:25 -0400 Subject: [PATCH 1453/1523] nfs: fix bitmap decoder to handle a 3rd word It only decodes the first two words at this point. Have it decode the third word as well. Without this, the client doesn't send delegated timestamps in the CB_GETATTR response. With this change we also need to expand the on-stack bitmap in decode_recallany_args to 3 elements, in case the server sends a larger bitmap than expected. Fixes: 43df7110f4a9 ("NFSv4: Add CB_GETATTR support for delegated attributes") Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 29c49a7e5fe1..6df77f008d3f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -118,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) if (likely(attrlen > 0)) bitmap[0] = ntohl(*p++); if (attrlen > 1) - bitmap[1] = ntohl(*p); + bitmap[1] = ntohl(*p++); + if (attrlen > 2) + bitmap[2] = ntohl(*p); return 0; } @@ -446,7 +448,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, void *argp) { struct cb_recallanyargs *args = argp; - uint32_t bitmap[2]; + uint32_t bitmap[3]; __be32 *p, status; p = xdr_inline_decode(xdr, 4); From a017ad1313fc91bdf235097fd0a02f673fc7bb11 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:00 -0400 Subject: [PATCH 1454/1523] NFSv4: Add missing rescheduling points in nfs_client_return_marked_delegations We're seeing reports of soft lockups when iterating through the loops, so let's add rescheduling points. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index cbbd4866b0b7..97b386032b71 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,7 @@ static int __nfs_list_for_each_server(struct list_head *head, ret = fn(server, data); if (ret) goto out; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); From d72b7963115bea971a28eaa2cb76722c023f9fdf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:01 -0400 Subject: [PATCH 1455/1523] NFSv4: Fix clearing of layout segments in layoutreturn Make sure that we clear the layout segments in cases where we see a fatal error, and also in the case where the layout is invalid. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++++--- fs/nfs/pnfs.c | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 06df74362e94..b8ffbe52ba15 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9998,6 +9998,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; default: task->tk_status = 0; + lrp->res.lrs_present = 0; fallthrough; case 0: break; @@ -10011,9 +10012,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) - break; - goto out_restart; + if (nfs4_async_handle_error(task, server, NULL, NULL) == + -EAGAIN) + goto out_restart; + lrp->res.lrs_present = 0; + break; } return; out_restart: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index aa698481bec8..0d16b383a452 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1284,10 +1284,9 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || - !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; - if (stateid) { + if (stateid && pnfs_layout_is_valid(lo)) { u32 seq = be32_to_cpu(arg_stateid->seqid); pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); From f92214e4c312f6ea9d78650cc6291d200f17abb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:02 -0400 Subject: [PATCH 1456/1523] NFS: Avoid unnecessary rescanning of the per-server delegation list If the call to nfs_delegation_grab_inode() fails, we will not have dropped any locks that require us to rescan the list. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d5edb3b3eeef..20cb2008f9e4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -647,6 +647,9 @@ restart: prev = delegation; continue; } + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; if (prev) { struct inode *tmp = nfs_delegation_grab_inode(prev); @@ -657,12 +660,6 @@ restart: } } - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) { - rcu_read_unlock(); - iput(to_put); - goto restart; - } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); @@ -1184,7 +1181,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, struct inode *inode; restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1195,7 +1191,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); if (delegation != NULL) { @@ -1318,7 +1314,6 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1330,7 +1325,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; spin_lock(&delegation->lock); cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); From dd6ded6e87a97a32e06c10b7f6639eebb779257d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:49 +0300 Subject: [PATCH 1457/1523] drm/i915/vblank: use drm_crtc_vblank_crtc() instead of open-coding There's a helper for drm->vblank[drm_crtc_index(crtc)], use it. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/78198ade92da1d978f2032c50b3b50e3f8f01701.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vblank.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index f183e0d4b2ba..551e9ca9bb99 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -68,7 +68,7 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[drm_crtc_index(crtc)]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); const struct drm_display_mode *mode = &vblank->hwmode; enum pipe pipe = to_intel_crtc(crtc)->pipe; u32 pixel, vbl_start, hsync_start, htotal; @@ -120,7 +120,7 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) u32 g4x_get_vblank_counter(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[drm_crtc_index(crtc)]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); enum pipe pipe = to_intel_crtc(crtc)->pipe; if (!vblank->max_vblank_count) From 58cfea648ff3b3bd283c03f094f24d379fe6fa82 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:50 +0300 Subject: [PATCH 1458/1523] drm/i915/vblank: fix context imbalance warnings When building for xe, we get the context imbalance warning as the actual locking/unlocking is not compiled: ../drivers/gpu/drm/i915/display/intel_vblank.c:306:13: warning: context imbalance in 'intel_vblank_section_enter' - wrong count at exit ../drivers/gpu/drm/i915/display/intel_vblank.c:314:13: warning: context imbalance in 'intel_vblank_section_exit' - wrong count at exit Fix by adding separata stubs for xe without __acquires/__releases annotation. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/111de5bee15f408de65b19ece4b68a7ac66b30cf.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vblank.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 551e9ca9bb99..2073e8075af4 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -303,21 +303,27 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) * all register accesses to the same cacheline to be serialized, * otherwise they may hang. */ +#ifdef I915 static void intel_vblank_section_enter(struct drm_i915_private *i915) __acquires(i915->uncore.lock) { -#ifdef I915 spin_lock(&i915->uncore.lock); -#endif } static void intel_vblank_section_exit(struct drm_i915_private *i915) __releases(i915->uncore.lock) { -#ifdef I915 spin_unlock(&i915->uncore.lock); -#endif } +#else +static void intel_vblank_section_enter(struct drm_i915_private *i915) +{ +} + +static void intel_vblank_section_exit(struct drm_i915_private *i915) +{ +} +#endif static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, bool in_vblank_irq, From aa451ae76fda24f919174a17df31c290f91a953c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:51 +0300 Subject: [PATCH 1459/1523] drm/i915/vblank: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_vblank.[ch] to struct intel_display. Some stragglers are left behind where needed. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/40430651a45ddd9e350a1fd7938fe4054492f6ea.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vblank.c | 97 +++++++++++---------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 2073e8075af4..838b55ecb1d8 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -67,7 +67,7 @@ */ u32 i915_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_display *display = to_intel_display(crtc->dev); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); const struct drm_display_mode *mode = &vblank->hwmode; enum pipe pipe = to_intel_crtc(crtc)->pipe; @@ -103,8 +103,8 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) * we get a low value that's stable across two reads of the high * register. */ - frame = intel_de_read64_2x32(dev_priv, PIPEFRAMEPIXEL(dev_priv, pipe), - PIPEFRAME(dev_priv, pipe)); + frame = intel_de_read64_2x32(display, PIPEFRAMEPIXEL(display, pipe), + PIPEFRAME(display, pipe)); pixel = frame & PIPE_PIXEL_MASK; frame = (frame >> PIPE_FRAME_LOW_SHIFT) & 0xffffff; @@ -119,19 +119,19 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) u32 g4x_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_display *display = to_intel_display(crtc->dev); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); enum pipe pipe = to_intel_crtc(crtc)->pipe; if (!vblank->max_vblank_count) return 0; - return intel_de_read(dev_priv, PIPE_FRMCOUNT_G4X(dev_priv, pipe)); + return intel_de_read(display, PIPE_FRMCOUNT_G4X(display, pipe)); } static u32 intel_crtc_scanlines_since_frame_timestamp(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); const struct drm_display_mode *mode = &vblank->hwmode; u32 htotal = mode->crtc_htotal; @@ -150,16 +150,16 @@ static u32 intel_crtc_scanlines_since_frame_timestamp(struct intel_crtc *crtc) * pipe frame time stamp. The time stamp value * is sampled at every start of vertical blank. */ - scan_prev_time = intel_de_read_fw(dev_priv, + scan_prev_time = intel_de_read_fw(display, PIPE_FRMTMSTMP(crtc->pipe)); /* * The TIMESTAMP_CTR register has the current * time stamp value. */ - scan_curr_time = intel_de_read_fw(dev_priv, IVB_TIMESTAMP_CTR); + scan_curr_time = intel_de_read_fw(display, IVB_TIMESTAMP_CTR); - scan_post_time = intel_de_read_fw(dev_priv, + scan_post_time = intel_de_read_fw(display, PIPE_FRMTMSTMP(crtc->pipe)); } while (scan_post_time != scan_prev_time); @@ -192,6 +192,7 @@ static u32 __intel_get_crtc_scanline_from_timestamp(struct intel_crtc *crtc) static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); /* @@ -220,7 +221,7 @@ static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) * However if queried just before the start of vblank we'll get an * answer that's slightly in the future. */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) return -1; else if (HAS_DDI(i915) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return 2; @@ -234,8 +235,7 @@ static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) */ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(crtc); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); const struct drm_display_mode *mode = &vblank->hwmode; enum pipe pipe = crtc->pipe; @@ -249,7 +249,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) vtotal = intel_mode_vtotal(mode); - position = intel_de_read_fw(dev_priv, PIPEDSL(dev_priv, pipe)) & PIPEDSL_LINE_MASK; + position = intel_de_read_fw(display, PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; /* * On HSW, the DSL reg (0x70000) appears to return 0 if we @@ -263,13 +263,13 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) * problem. We may need to extend this to include other platforms, * but so far testing only shows the problem on HSW. */ - if (HAS_DDI(dev_priv) && !position) { + if (HAS_DDI(display) && !position) { int i, temp; for (i = 0; i < 100; i++) { udelay(1); - temp = intel_de_read_fw(dev_priv, - PIPEDSL(dev_priv, pipe)) & PIPEDSL_LINE_MASK; + temp = intel_de_read_fw(display, + PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; if (temp != position) { position = temp; break; @@ -304,23 +304,25 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) * otherwise they may hang. */ #ifdef I915 -static void intel_vblank_section_enter(struct drm_i915_private *i915) +static void intel_vblank_section_enter(struct intel_display *display) __acquires(i915->uncore.lock) { + struct drm_i915_private *i915 = to_i915(display->drm); spin_lock(&i915->uncore.lock); } -static void intel_vblank_section_exit(struct drm_i915_private *i915) +static void intel_vblank_section_exit(struct intel_display *display) __releases(i915->uncore.lock) { + struct drm_i915_private *i915 = to_i915(display->drm); spin_unlock(&i915->uncore.lock); } #else -static void intel_vblank_section_enter(struct drm_i915_private *i915) +static void intel_vblank_section_enter(struct intel_display *display) { } -static void intel_vblank_section_exit(struct drm_i915_private *i915) +static void intel_vblank_section_exit(struct intel_display *display) { } #endif @@ -331,19 +333,19 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { - struct drm_device *dev = _crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(_crtc->dev); + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_crtc *crtc = to_intel_crtc(_crtc); enum pipe pipe = crtc->pipe; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; - bool use_scanline_counter = DISPLAY_VER(dev_priv) >= 5 || - IS_G4X(dev_priv) || DISPLAY_VER(dev_priv) == 2 || + bool use_scanline_counter = DISPLAY_VER(display) >= 5 || + IS_G4X(dev_priv) || DISPLAY_VER(display) == 2 || crtc->mode_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; - if (drm_WARN_ON(&dev_priv->drm, !mode->crtc_clock)) { - drm_dbg(&dev_priv->drm, + if (drm_WARN_ON(display->drm, !mode->crtc_clock)) { + drm_dbg(display->drm, "trying to get scanoutpos for disabled pipe %c\n", pipe_name(pipe)); return false; @@ -361,7 +363,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, * preemption disabled, so the following code must not block. */ local_irq_save(irqflags); - intel_vblank_section_enter(dev_priv); + intel_vblank_section_enter(display); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -393,7 +395,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, * We can split this into vertical and horizontal * scanout position. */ - position = (intel_de_read_fw(dev_priv, PIPEFRAMEPIXEL(dev_priv, pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT; + position = (intel_de_read_fw(display, PIPEFRAMEPIXEL(display, pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT; /* convert to pixel counts */ vbl_start *= htotal; @@ -429,7 +431,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ - intel_vblank_section_exit(dev_priv); + intel_vblank_section_exit(display); local_irq_restore(irqflags); /* @@ -464,42 +466,42 @@ bool intel_crtc_get_vblank_timestamp(struct drm_crtc *crtc, int *max_error, int intel_get_crtc_scanline(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); unsigned long irqflags; int position; local_irq_save(irqflags); - intel_vblank_section_enter(dev_priv); + intel_vblank_section_enter(display); position = __intel_get_crtc_scanline(crtc); - intel_vblank_section_exit(dev_priv); + intel_vblank_section_exit(display); local_irq_restore(irqflags); return position; } -static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, +static bool pipe_scanline_is_moving(struct intel_display *display, enum pipe pipe) { - i915_reg_t reg = PIPEDSL(dev_priv, pipe); + i915_reg_t reg = PIPEDSL(display, pipe); u32 line1, line2; - line1 = intel_de_read(dev_priv, reg) & PIPEDSL_LINE_MASK; + line1 = intel_de_read(display, reg) & PIPEDSL_LINE_MASK; msleep(5); - line2 = intel_de_read(dev_priv, reg) & PIPEDSL_LINE_MASK; + line2 = intel_de_read(display, reg) & PIPEDSL_LINE_MASK; return line1 != line2; } static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); enum pipe pipe = crtc->pipe; /* Wait for the display line to settle/start moving */ - if (wait_for(pipe_scanline_is_moving(dev_priv, pipe) == state, 100)) - drm_err(&dev_priv->drm, + if (wait_for(pipe_scanline_is_moving(display, pipe) == state, 100)) + drm_err(display->drm, "pipe %c scanline %s wait timed out\n", pipe_name(pipe), str_on_off(state)); } @@ -517,8 +519,8 @@ void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc) void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, bool vrr_enable) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); u8 mode_flags = crtc_state->mode_flags; struct drm_display_mode adjusted_mode; int vmax_vblank_start = 0; @@ -527,7 +529,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, drm_mode_init(&adjusted_mode, &crtc_state->hw.adjusted_mode); if (vrr_enable) { - drm_WARN_ON(&i915->drm, (mode_flags & I915_MODE_FLAG_VRR) == 0); + drm_WARN_ON(display->drm, + (mode_flags & I915_MODE_FLAG_VRR) == 0); adjusted_mode.crtc_vtotal = crtc_state->vrr.vmax; adjusted_mode.crtc_vblank_end = crtc_state->vrr.vmax; @@ -549,8 +552,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, * __intel_get_crtc_scanline()) with vblank_time_lock? * Need to audit everything to make sure it's safe. */ - spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags); - intel_vblank_section_enter(i915); + spin_lock_irqsave(&display->drm->vblank_time_lock, irqflags); + intel_vblank_section_enter(display); drm_calc_timestamping_constants(&crtc->base, &adjusted_mode); @@ -559,8 +562,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, crtc->mode_flags = mode_flags; crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state); - intel_vblank_section_exit(i915); - spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags); + intel_vblank_section_exit(display); + spin_unlock_irqrestore(&display->drm->vblank_time_lock, irqflags); } int intel_mode_vdisplay(const struct drm_display_mode *mode) @@ -666,7 +669,7 @@ void intel_vblank_evade_init(const struct intel_crtc_state *old_crtc_state, int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) { struct intel_crtc *crtc = evade->crtc; - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); long timeout = msecs_to_jiffies_timeout(1); wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); DEFINE_WAIT(wait); @@ -688,7 +691,7 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) break; if (!timeout) { - drm_err(&i915->drm, + drm_err(display->drm, "Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; From 852791985af63bcfd66d7f6ec160cc5f9fcb020a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:52 +0300 Subject: [PATCH 1460/1523] drm/i915/vrr: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_vrr.[ch] to struct intel_display. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1d25a08c62a320133fbb0a89dac3dd1081139487.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vrr.c | 127 +++++++++++------------ 1 file changed, 61 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 7e1d9c718214..9a51f5bac307 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -17,8 +17,8 @@ bool intel_vrr_is_capable(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); const struct drm_display_info *info = &connector->base.display_info; - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dp *intel_dp; /* @@ -43,7 +43,7 @@ bool intel_vrr_is_capable(struct intel_connector *connector) return false; } - return HAS_VRR(i915) && + return HAS_VRR(display) && info->monitor_range.max_vfreq - info->monitor_range.min_vfreq > 10; } @@ -89,10 +89,9 @@ intel_vrr_check_modeset(struct intel_atomic_state *state) */ static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return crtc_state->vrr.guardband; else /* The hw imposes the extra scanline before frame start */ @@ -113,11 +112,11 @@ int intel_vrr_vmax_vblank_start(const struct intel_crtc_state *crtc_state) static bool is_cmrr_frac_required(struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); int calculated_refresh_k, actual_refresh_k, pixel_clock_per_line; struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (!HAS_CMRR(i915)) + if (!HAS_CMRR(display)) return false; actual_refresh_k = @@ -161,8 +160,7 @@ void intel_vrr_compute_config(struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_dp *intel_dp = intel_attached_dp(connector); @@ -186,7 +184,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, if (!crtc_state->vrr.in_range) return; - if (HAS_LRR(i915)) + if (HAS_LRR(display)) crtc_state->update_lrr = true; vmin = DIV_ROUND_UP(adjusted_mode->crtc_clock * 1000, @@ -246,7 +244,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, * For XE_LPD+, we use guardband and pipeline override * is deprecated. */ - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { crtc_state->vrr.guardband = crtc_state->vrr.vmin + 1 - adjusted_mode->crtc_vblank_start; } else { @@ -258,9 +256,9 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return VRR_CTL_IGN_MAX_SHIFT | VRR_CTL_FLIP_LINE_EN | XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband); else @@ -271,7 +269,7 @@ static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; /* @@ -279,133 +277,130 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) * TGL: generate VRR "safe window" for DSB vblank waits * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ - if (IS_DISPLAY_VER(dev_priv, 12, 13)) - intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), + if (IS_DISPLAY_VER(display, 12, 13)) + intel_de_rmw(display, CHICKEN_TRANS(cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); if (!crtc_state->vrr.flipline) { - intel_de_write(dev_priv, - TRANS_VRR_CTL(dev_priv, cpu_transcoder), 0); + intel_de_write(display, + TRANS_VRR_CTL(display, cpu_transcoder), 0); return; } if (crtc_state->cmrr.enable) { - intel_de_write(dev_priv, TRANS_CMRR_M_HI(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_M_HI(display, cpu_transcoder), upper_32_bits(crtc_state->cmrr.cmrr_m)); - intel_de_write(dev_priv, TRANS_CMRR_M_LO(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_M_LO(display, cpu_transcoder), lower_32_bits(crtc_state->cmrr.cmrr_m)); - intel_de_write(dev_priv, TRANS_CMRR_N_HI(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_N_HI(display, cpu_transcoder), upper_32_bits(crtc_state->cmrr.cmrr_n)); - intel_de_write(dev_priv, TRANS_CMRR_N_LO(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_N_LO(display, cpu_transcoder), lower_32_bits(crtc_state->cmrr.cmrr_n)); } - intel_de_write(dev_priv, TRANS_VRR_VMIN(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_VMIN(display, cpu_transcoder), crtc_state->vrr.vmin - 1); - intel_de_write(dev_priv, TRANS_VRR_VMAX(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_VMAX(display, cpu_transcoder), crtc_state->vrr.vmax - 1); - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(crtc_state)); - intel_de_write(dev_priv, TRANS_VRR_FLIPLINE(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder), crtc_state->vrr.flipline - 1); } void intel_vrr_send_push(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN | TRANS_PUSH_SEND); } bool intel_vrr_is_push_sent(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return false; - return intel_de_read(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder)) & TRANS_PUSH_SEND; + return intel_de_read(display, TRANS_PUSH(display, cpu_transcoder)) & TRANS_PUSH_SEND; } void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); - if (HAS_AS_SDP(dev_priv)) - intel_de_write(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder), + if (HAS_AS_SDP(display)) + intel_de_write(display, + TRANS_VRR_VSYNC(display, cpu_transcoder), VRR_VSYNC_END(crtc_state->vrr.vsync_end) | VRR_VSYNC_START(crtc_state->vrr.vsync_start)); if (crtc_state->cmrr.enable) { - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | VRR_CTL_CMRR_ENABLE | trans_vrr_ctl(crtc_state)); } else { - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); } } void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(old_crtc_state); enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; if (!old_crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(old_crtc_state)); - intel_de_wait_for_clear(dev_priv, - TRANS_VRR_STATUS(dev_priv, cpu_transcoder), + intel_de_wait_for_clear(display, + TRANS_VRR_STATUS(display, cpu_transcoder), VRR_STATUS_VRR_EN_LIVE, 1000); - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), 0); + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); - if (HAS_AS_SDP(dev_priv)) - intel_de_write(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder), 0); + if (HAS_AS_SDP(display)) + intel_de_write(display, + TRANS_VRR_VSYNC(display, cpu_transcoder), 0); } void intel_vrr_get_config(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 trans_vrr_ctl, trans_vrr_vsync; - trans_vrr_ctl = intel_de_read(dev_priv, - TRANS_VRR_CTL(dev_priv, cpu_transcoder)); + trans_vrr_ctl = intel_de_read(display, + TRANS_VRR_CTL(display, cpu_transcoder)); crtc_state->vrr.enable = trans_vrr_ctl & VRR_CTL_VRR_ENABLE; - if (HAS_CMRR(dev_priv)) + if (HAS_CMRR(display)) crtc_state->cmrr.enable = (trans_vrr_ctl & VRR_CTL_CMRR_ENABLE); if (crtc_state->cmrr.enable) { crtc_state->cmrr.cmrr_n = - intel_de_read64_2x32(dev_priv, TRANS_CMRR_N_LO(dev_priv, cpu_transcoder), - TRANS_CMRR_N_HI(dev_priv, cpu_transcoder)); + intel_de_read64_2x32(display, TRANS_CMRR_N_LO(display, cpu_transcoder), + TRANS_CMRR_N_HI(display, cpu_transcoder)); crtc_state->cmrr.cmrr_m = - intel_de_read64_2x32(dev_priv, TRANS_CMRR_M_LO(dev_priv, cpu_transcoder), - TRANS_CMRR_M_HI(dev_priv, cpu_transcoder)); + intel_de_read64_2x32(display, TRANS_CMRR_M_LO(display, cpu_transcoder), + TRANS_CMRR_M_HI(display, cpu_transcoder)); } - if (DISPLAY_VER(dev_priv) >= 13) + if (DISPLAY_VER(display) >= 13) crtc_state->vrr.guardband = REG_FIELD_GET(XELPD_VRR_CTL_VRR_GUARDBAND_MASK, trans_vrr_ctl); else @@ -414,21 +409,21 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) REG_FIELD_GET(VRR_CTL_PIPELINE_FULL_MASK, trans_vrr_ctl); if (trans_vrr_ctl & VRR_CTL_FLIP_LINE_EN) { - crtc_state->vrr.flipline = intel_de_read(dev_priv, - TRANS_VRR_FLIPLINE(dev_priv, cpu_transcoder)) + 1; - crtc_state->vrr.vmax = intel_de_read(dev_priv, - TRANS_VRR_VMAX(dev_priv, cpu_transcoder)) + 1; - crtc_state->vrr.vmin = intel_de_read(dev_priv, - TRANS_VRR_VMIN(dev_priv, cpu_transcoder)) + 1; + crtc_state->vrr.flipline = intel_de_read(display, + TRANS_VRR_FLIPLINE(display, cpu_transcoder)) + 1; + crtc_state->vrr.vmax = intel_de_read(display, + TRANS_VRR_VMAX(display, cpu_transcoder)) + 1; + crtc_state->vrr.vmin = intel_de_read(display, + TRANS_VRR_VMIN(display, cpu_transcoder)) + 1; } if (crtc_state->vrr.enable) { crtc_state->mode_flags |= I915_MODE_FLAG_VRR; - if (HAS_AS_SDP(dev_priv)) { + if (HAS_AS_SDP(display)) { trans_vrr_vsync = - intel_de_read(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder)); + intel_de_read(display, + TRANS_VRR_VSYNC(display, cpu_transcoder)); crtc_state->vrr.vsync_start = REG_FIELD_GET(VRR_VSYNC_START_MASK, trans_vrr_vsync); crtc_state->vrr.vsync_end = From ab0b0eb5c85c5961913bdb9b8011cc8f5c14978a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:53 +0300 Subject: [PATCH 1461/1523] drm/i915/tv: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_tv.[ch] to struct intel_display. Some stragglers are left behind where needed. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/04b1c8d095a52fb817876acdab4e9139d909f306.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_tv.c | 203 ++++++++++--------- drivers/gpu/drm/i915/display/intel_tv.h | 6 +- 3 files changed, 108 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1042f65967ba..9049b9a1209d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7935,7 +7935,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) g4x_dp_init(dev_priv, DP_D, PORT_D); if (SUPPORTS_TV(dev_priv)) - intel_tv_init(dev_priv); + intel_tv_init(display); } else if (DISPLAY_VER(dev_priv) == 2) { if (IS_I85X(dev_priv)) intel_lvds_init(dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index bfc43bda8532..581844d1db9a 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -914,8 +914,8 @@ static struct intel_tv *intel_attached_tv(struct intel_connector *connector) static bool intel_tv_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 tmp = intel_de_read(dev_priv, TV_CTL); + struct intel_display *display = to_intel_display(encoder); + u32 tmp = intel_de_read(display, TV_CTL); *pipe = (tmp & TV_ENC_PIPE_SEL_MASK) >> TV_ENC_PIPE_SEL_SHIFT; @@ -928,13 +928,12 @@ intel_enable_tv(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(state); /* Prevents vblank waits from timing out in intel_tv_detect_type() */ intel_crtc_wait_for_next_vblank(to_intel_crtc(pipe_config->uapi.crtc)); - intel_de_rmw(dev_priv, TV_CTL, 0, TV_ENC_ENABLE); + intel_de_rmw(display, TV_CTL, 0, TV_ENC_ENABLE); } static void @@ -943,10 +942,9 @@ intel_disable_tv(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(state); - intel_de_rmw(dev_priv, TV_CTL, TV_ENC_ENABLE, 0); + intel_de_rmw(display, TV_CTL, TV_ENC_ENABLE, 0); } static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) @@ -960,9 +958,10 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = i915->display.cdclk.max_dotclk_freq; + int max_dotclk = display->cdclk.max_dotclk_freq; enum drm_mode_status status; status = intel_cpu_transcoder_mode_valid(i915, mode); @@ -1092,6 +1091,7 @@ static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -1104,11 +1104,11 @@ intel_tv_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); - tv_ctl = intel_de_read(dev_priv, TV_CTL); - hctl1 = intel_de_read(dev_priv, TV_H_CTL_1); - hctl3 = intel_de_read(dev_priv, TV_H_CTL_3); - vctl1 = intel_de_read(dev_priv, TV_V_CTL_1); - vctl2 = intel_de_read(dev_priv, TV_V_CTL_2); + tv_ctl = intel_de_read(display, TV_CTL); + hctl1 = intel_de_read(display, TV_H_CTL_1); + hctl3 = intel_de_read(display, TV_H_CTL_3); + vctl1 = intel_de_read(display, TV_V_CTL_1); + vctl2 = intel_de_read(display, TV_V_CTL_2); tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; @@ -1143,17 +1143,17 @@ intel_tv_get_config(struct intel_encoder *encoder, break; } - tmp = intel_de_read(dev_priv, TV_WIN_POS); + tmp = intel_de_read(display, TV_WIN_POS); xpos = tmp >> 16; ypos = tmp & 0xffff; - tmp = intel_de_read(dev_priv, TV_WIN_SIZE); + tmp = intel_de_read(display, TV_WIN_SIZE); xsize = tmp >> 16; ysize = tmp & 0xffff; intel_tv_mode_to_mode(&mode, &tv_mode, pipe_config->port_clock); - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(&mode)); intel_tv_scale_mode_horiz(&mode, hdisplay, @@ -1171,10 +1171,10 @@ intel_tv_get_config(struct intel_encoder *encoder, I915_MODE_FLAG_USE_SCANLINE_COUNTER; } -static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, +static bool intel_tv_source_too_wide(struct intel_display *display, int hdisplay) { - return DISPLAY_VER(dev_priv) == 3 && hdisplay > 1024; + return DISPLAY_VER(display) == 3 && hdisplay > 1024; } static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, @@ -1192,6 +1192,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(encoder); struct intel_atomic_state *state = to_intel_atomic_state(pipe_config->uapi.state); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); @@ -1214,7 +1215,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, pipe_config->sink_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - drm_dbg_kms(&dev_priv->drm, "forcing bpc to 8 for TV\n"); + drm_dbg_kms(display->drm, "forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; pipe_config->port_clock = tv_mode->clock; @@ -1228,14 +1229,14 @@ intel_tv_compute_config(struct intel_encoder *encoder, intel_tv_mode_to_mode(adjusted_mode, tv_mode, pipe_config->port_clock); drm_mode_set_crtcinfo(adjusted_mode, 0); - if (intel_tv_source_too_wide(dev_priv, hdisplay) || + if (intel_tv_source_too_wide(display, hdisplay) || !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { int extra, top, bottom; extra = adjusted_mode->crtc_vdisplay - vdisplay; if (extra < 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "No vertical scaling for >1024 pixel wide modes\n"); return -EINVAL; } @@ -1269,7 +1270,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, tv_conn_state->bypass_vfilter = false; } - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(adjusted_mode)); /* @@ -1355,7 +1356,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, } static void -set_tv_mode_timings(struct drm_i915_private *dev_priv, +set_tv_mode_timings(struct intel_display *display, const struct tv_mode *tv_mode, bool burst_ena) { @@ -1401,32 +1402,32 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv, vctl7 = (tv_mode->vburst_start_f4 << TV_VBURST_START_F4_SHIFT) | (tv_mode->vburst_end_f4 << TV_VBURST_END_F4_SHIFT); - intel_de_write(dev_priv, TV_H_CTL_1, hctl1); - intel_de_write(dev_priv, TV_H_CTL_2, hctl2); - intel_de_write(dev_priv, TV_H_CTL_3, hctl3); - intel_de_write(dev_priv, TV_V_CTL_1, vctl1); - intel_de_write(dev_priv, TV_V_CTL_2, vctl2); - intel_de_write(dev_priv, TV_V_CTL_3, vctl3); - intel_de_write(dev_priv, TV_V_CTL_4, vctl4); - intel_de_write(dev_priv, TV_V_CTL_5, vctl5); - intel_de_write(dev_priv, TV_V_CTL_6, vctl6); - intel_de_write(dev_priv, TV_V_CTL_7, vctl7); + intel_de_write(display, TV_H_CTL_1, hctl1); + intel_de_write(display, TV_H_CTL_2, hctl2); + intel_de_write(display, TV_H_CTL_3, hctl3); + intel_de_write(display, TV_V_CTL_1, vctl1); + intel_de_write(display, TV_V_CTL_2, vctl2); + intel_de_write(display, TV_V_CTL_3, vctl3); + intel_de_write(display, TV_V_CTL_4, vctl4); + intel_de_write(display, TV_V_CTL_5, vctl5); + intel_de_write(display, TV_V_CTL_6, vctl6); + intel_de_write(display, TV_V_CTL_7, vctl7); } -static void set_color_conversion(struct drm_i915_private *dev_priv, +static void set_color_conversion(struct intel_display *display, const struct color_conversion *color_conversion) { - intel_de_write(dev_priv, TV_CSC_Y, + intel_de_write(display, TV_CSC_Y, (color_conversion->ry << 16) | color_conversion->gy); - intel_de_write(dev_priv, TV_CSC_Y2, + intel_de_write(display, TV_CSC_Y2, (color_conversion->by << 16) | color_conversion->ay); - intel_de_write(dev_priv, TV_CSC_U, + intel_de_write(display, TV_CSC_U, (color_conversion->ru << 16) | color_conversion->gu); - intel_de_write(dev_priv, TV_CSC_U2, + intel_de_write(display, TV_CSC_U2, (color_conversion->bu << 16) | color_conversion->au); - intel_de_write(dev_priv, TV_CSC_V, + intel_de_write(display, TV_CSC_V, (color_conversion->rv << 16) | color_conversion->gv); - intel_de_write(dev_priv, TV_CSC_V2, + intel_de_write(display, TV_CSC_V2, (color_conversion->bv << 16) | color_conversion->av); } @@ -1435,6 +1436,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); @@ -1450,7 +1452,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, int xpos, ypos; unsigned int xsize, ysize; - tv_ctl = intel_de_read(dev_priv, TV_CTL); + tv_ctl = intel_de_read(display, TV_CTL); tv_ctl &= TV_CTL_SAVE; switch (intel_tv->type) { @@ -1525,21 +1527,21 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, if (IS_I915GM(dev_priv)) tv_ctl |= TV_ENC_C0_FIX | TV_ENC_SDP_FIX; - set_tv_mode_timings(dev_priv, tv_mode, burst_ena); + set_tv_mode_timings(display, tv_mode, burst_ena); - intel_de_write(dev_priv, TV_SC_CTL_1, scctl1); - intel_de_write(dev_priv, TV_SC_CTL_2, scctl2); - intel_de_write(dev_priv, TV_SC_CTL_3, scctl3); + intel_de_write(display, TV_SC_CTL_1, scctl1); + intel_de_write(display, TV_SC_CTL_2, scctl2); + intel_de_write(display, TV_SC_CTL_3, scctl3); - set_color_conversion(dev_priv, color_conversion); + set_color_conversion(display, color_conversion); - if (DISPLAY_VER(dev_priv) >= 4) - intel_de_write(dev_priv, TV_CLR_KNOBS, 0x00404000); + if (DISPLAY_VER(display) >= 4) + intel_de_write(display, TV_CLR_KNOBS, 0x00404000); else - intel_de_write(dev_priv, TV_CLR_KNOBS, 0x00606000); + intel_de_write(display, TV_CLR_KNOBS, 0x00606000); if (video_levels) - intel_de_write(dev_priv, TV_CLR_LEVEL, + intel_de_write(display, TV_CLR_LEVEL, ((video_levels->black << TV_BLACK_LEVEL_SHIFT) | (video_levels->blank << TV_BLANK_LEVEL_SHIFT))); assert_transcoder_disabled(dev_priv, pipe_config->cpu_transcoder); @@ -1548,7 +1550,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, tv_filter_ctl = TV_AUTO_SCALE; if (tv_conn_state->bypass_vfilter) tv_filter_ctl |= TV_V_FILTER_BYPASS; - intel_de_write(dev_priv, TV_FILTER_CTL_1, tv_filter_ctl); + intel_de_write(display, TV_FILTER_CTL_1, tv_filter_ctl); xsize = tv_mode->hblank_start - tv_mode->hblank_end; ysize = intel_tv_mode_vdisplay(tv_mode); @@ -1559,31 +1561,32 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, conn_state->tv.margins.right); ysize -= (tv_conn_state->margins.top + tv_conn_state->margins.bottom); - intel_de_write(dev_priv, TV_WIN_POS, (xpos << 16) | ypos); - intel_de_write(dev_priv, TV_WIN_SIZE, (xsize << 16) | ysize); + intel_de_write(display, TV_WIN_POS, (xpos << 16) | ypos); + intel_de_write(display, TV_WIN_SIZE, (xsize << 16) | ysize); j = 0; for (i = 0; i < 60; i++) - intel_de_write(dev_priv, TV_H_LUMA(i), + intel_de_write(display, TV_H_LUMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 60; i++) - intel_de_write(dev_priv, TV_H_CHROMA(i), + intel_de_write(display, TV_H_CHROMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 43; i++) - intel_de_write(dev_priv, TV_V_LUMA(i), + intel_de_write(display, TV_V_LUMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 43; i++) - intel_de_write(dev_priv, TV_V_CHROMA(i), + intel_de_write(display, TV_V_CHROMA(i), tv_mode->filter_table[j++]); - intel_de_write(dev_priv, TV_DAC, - intel_de_read(dev_priv, TV_DAC) & TV_DAC_SAVE); - intel_de_write(dev_priv, TV_CTL, tv_ctl); + intel_de_write(display, TV_DAC, + intel_de_read(display, TV_DAC) & TV_DAC_SAVE); + intel_de_write(display, TV_CTL, tv_ctl); } static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_crtc *crtc = to_intel_crtc(connector->state->crtc); struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1600,8 +1603,8 @@ intel_tv_detect_type(struct intel_tv *intel_tv, spin_unlock_irq(&dev_priv->irq_lock); } - save_tv_dac = tv_dac = intel_de_read(dev_priv, TV_DAC); - save_tv_ctl = tv_ctl = intel_de_read(dev_priv, TV_CTL); + save_tv_dac = tv_dac = intel_de_read(display, TV_DAC); + save_tv_ctl = tv_ctl = intel_de_read(display, TV_CTL); /* Poll for TV detection */ tv_ctl &= ~(TV_ENC_ENABLE | TV_ENC_PIPE_SEL_MASK | TV_TEST_MODE_MASK); @@ -1627,15 +1630,15 @@ intel_tv_detect_type(struct intel_tv *intel_tv, tv_dac &= ~(TVDAC_STATE_CHG_EN | TVDAC_A_SENSE_CTL | TVDAC_B_SENSE_CTL | TVDAC_C_SENSE_CTL); - intel_de_write(dev_priv, TV_CTL, tv_ctl); - intel_de_write(dev_priv, TV_DAC, tv_dac); - intel_de_posting_read(dev_priv, TV_DAC); + intel_de_write(display, TV_CTL, tv_ctl); + intel_de_write(display, TV_DAC, tv_dac); + intel_de_posting_read(display, TV_DAC); intel_crtc_wait_for_next_vblank(crtc); type = -1; - tv_dac = intel_de_read(dev_priv, TV_DAC); - drm_dbg_kms(&dev_priv->drm, "TV detected: %x, %x\n", tv_ctl, tv_dac); + tv_dac = intel_de_read(display, TV_DAC); + drm_dbg_kms(display->drm, "TV detected: %x, %x\n", tv_ctl, tv_dac); /* * A B C * 0 1 1 Composite @@ -1643,25 +1646,25 @@ intel_tv_detect_type(struct intel_tv *intel_tv, * 0 0 0 Component */ if ((tv_dac & TVDAC_SENSE_MASK) == (TVDAC_B_SENSE | TVDAC_C_SENSE)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected Composite TV connection\n"); type = DRM_MODE_CONNECTOR_Composite; } else if ((tv_dac & (TVDAC_A_SENSE|TVDAC_B_SENSE)) == TVDAC_A_SENSE) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected S-Video TV connection\n"); type = DRM_MODE_CONNECTOR_SVIDEO; } else if ((tv_dac & TVDAC_SENSE_MASK) == 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected Component TV connection\n"); type = DRM_MODE_CONNECTOR_Component; } else { - drm_dbg_kms(&dev_priv->drm, "Unrecognised TV connection\n"); + drm_dbg_kms(display->drm, "Unrecognised TV connection\n"); type = -1; } - intel_de_write(dev_priv, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); - intel_de_write(dev_priv, TV_CTL, save_tv_ctl); - intel_de_posting_read(dev_priv, TV_CTL); + intel_de_write(display, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); + intel_de_write(display, TV_CTL, save_tv_ctl); + intel_de_posting_read(display, TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ intel_crtc_wait_for_next_vblank(crtc); @@ -1711,12 +1714,13 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); enum drm_connector_status status; int type; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] force=%d\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, force); if (!intel_display_device_enabled(i915)) @@ -1791,7 +1795,7 @@ intel_tv_set_mode_type(struct drm_display_mode *mode, static int intel_tv_get_modes(struct drm_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i, count = 0; @@ -1805,7 +1809,7 @@ intel_tv_get_modes(struct drm_connector *connector) continue; /* no vertical scaling with wide sources on gen3 */ - if (DISPLAY_VER(dev_priv) == 3 && input->w > 1024 && + if (DISPLAY_VER(display) == 3 && input->w > 1024 && input->h > intel_tv_mode_vdisplay(tv_mode)) continue; @@ -1822,7 +1826,8 @@ intel_tv_get_modes(struct drm_connector *connector) */ intel_tv_mode_to_mode(mode, tv_mode, tv_mode->clock); if (count == 0) { - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, + "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); } intel_tv_scale_mode_horiz(mode, input->w, 0, 0); @@ -1887,7 +1892,7 @@ static const struct drm_encoder_funcs intel_tv_enc_funcs = { static void intel_tv_add_properties(struct drm_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); struct drm_connector_state *conn_state = connector->state; const char *tv_format_names[ARRAY_SIZE(tv_modes)]; int i; @@ -1903,45 +1908,44 @@ static void intel_tv_add_properties(struct drm_connector *connector) /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { /* 1080p50/1080p60 not supported on gen3 */ - if (DISPLAY_VER(i915) == 3 && tv_modes[i].oversample == 1) + if (DISPLAY_VER(display) == 3 && tv_modes[i].oversample == 1) break; tv_format_names[i] = tv_modes[i].name; } - drm_mode_create_tv_properties_legacy(&i915->drm, i, tv_format_names); + drm_mode_create_tv_properties_legacy(display->drm, i, tv_format_names); drm_object_attach_property(&connector->base, - i915->drm.mode_config.legacy_tv_mode_property, + display->drm->mode_config.legacy_tv_mode_property, conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_left_margin_property, + display->drm->mode_config.tv_left_margin_property, conn_state->tv.margins.left); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_top_margin_property, + display->drm->mode_config.tv_top_margin_property, conn_state->tv.margins.top); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_right_margin_property, + display->drm->mode_config.tv_right_margin_property, conn_state->tv.margins.right); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_bottom_margin_property, + display->drm->mode_config.tv_bottom_margin_property, conn_state->tv.margins.bottom); } void -intel_tv_init(struct drm_i915_private *dev_priv) +intel_tv_init(struct intel_display *display) { - struct intel_display *display = &dev_priv->display; struct drm_connector *connector; struct intel_tv *intel_tv; struct intel_encoder *intel_encoder; struct intel_connector *intel_connector; u32 tv_dac_on, tv_dac_off, save_tv_dac; - if ((intel_de_read(dev_priv, TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) + if ((intel_de_read(display, TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) return; if (!intel_bios_is_tv_present(display)) { - drm_dbg_kms(&dev_priv->drm, "Integrated TV is not present.\n"); + drm_dbg_kms(display->drm, "Integrated TV is not present.\n"); return; } @@ -1949,15 +1953,15 @@ intel_tv_init(struct drm_i915_private *dev_priv) * Sanity check the TV output by checking to see if the * DAC register holds a value */ - save_tv_dac = intel_de_read(dev_priv, TV_DAC); + save_tv_dac = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac | TVDAC_STATE_CHG_EN); - tv_dac_on = intel_de_read(dev_priv, TV_DAC); + intel_de_write(display, TV_DAC, save_tv_dac | TVDAC_STATE_CHG_EN); + tv_dac_on = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); - tv_dac_off = intel_de_read(dev_priv, TV_DAC); + intel_de_write(display, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); + tv_dac_off = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac); + intel_de_write(display, TV_DAC, save_tv_dac); /* * If the register does not hold the state change enable @@ -1995,10 +1999,11 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT; intel_connector->base.polled = intel_connector->polled; - drm_connector_init(&dev_priv->drm, connector, &intel_tv_connector_funcs, + drm_connector_init(display->drm, connector, &intel_tv_connector_funcs, DRM_MODE_CONNECTOR_SVIDEO); - drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_tv_enc_funcs, + drm_encoder_init(display->drm, &intel_encoder->base, + &intel_tv_enc_funcs, DRM_MODE_ENCODER_TVDAC, "TV"); intel_encoder->compute_config = intel_tv_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_tv.h b/drivers/gpu/drm/i915/display/intel_tv.h index f08827b8bf2b..0f280f69e73c 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.h +++ b/drivers/gpu/drm/i915/display/intel_tv.h @@ -6,12 +6,12 @@ #ifndef __INTEL_TV_H__ #define __INTEL_TV_H__ -struct drm_i915_private; +struct intel_display; #ifdef I915 -void intel_tv_init(struct drm_i915_private *dev_priv); +void intel_tv_init(struct intel_display *display); #else -static inline void intel_tv_init(struct drm_i915_private *dev_priv) +static inline void intel_tv_init(struct intel_display *display) { } #endif From 065ca63e161a1ed7e619ef7dea0c7c6435ee2f8d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:54 +0300 Subject: [PATCH 1462/1523] drm/i915/sprite: convert to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_sprite.[ch] to struct intel_display. Some stragglers are left behind where needed. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/094db6a9f40404fcc14843d32b45465d31730d96.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_sprite.c | 209 +++++++++++--------- 1 file changed, 112 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index f8cceb3e5d8e..e657b09ede99 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -48,9 +48,9 @@ #include "intel_sprite.h" #include "intel_sprite_regs.h" -static char sprite_name(struct drm_i915_private *i915, enum pipe pipe, int sprite) +static char sprite_name(struct intel_display *display, enum pipe pipe, int sprite) { - return pipe * DISPLAY_RUNTIME_INFO(i915)->num_sprites[pipe] + sprite + 'A'; + return pipe * DISPLAY_RUNTIME_INFO(display)->num_sprites[pipe] + sprite + 'A'; } static void i9xx_plane_linear_gamma(u16 gamma[8]) @@ -67,7 +67,7 @@ static void chv_sprite_update_csc(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id plane_id = plane->id; /* @@ -100,35 +100,35 @@ chv_sprite_update_csc(const struct intel_plane_state *plane_state) if (!fb->format->is_yuv) return; - intel_de_write_fw(dev_priv, SPCSCYGOFF(plane_id), + intel_de_write_fw(display, SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCCBOFF(plane_id), + intel_de_write_fw(display, SPCSCCBOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCCROFF(plane_id), + intel_de_write_fw(display, SPCSCCROFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCC01(plane_id), + intel_de_write_fw(display, SPCSCC01(plane_id), SPCSC_C1(csc[1]) | SPCSC_C0(csc[0])); - intel_de_write_fw(dev_priv, SPCSCC23(plane_id), + intel_de_write_fw(display, SPCSCC23(plane_id), SPCSC_C1(csc[3]) | SPCSC_C0(csc[2])); - intel_de_write_fw(dev_priv, SPCSCC45(plane_id), + intel_de_write_fw(display, SPCSCC45(plane_id), SPCSC_C1(csc[5]) | SPCSC_C0(csc[4])); - intel_de_write_fw(dev_priv, SPCSCC67(plane_id), + intel_de_write_fw(display, SPCSCC67(plane_id), SPCSC_C1(csc[7]) | SPCSC_C0(csc[6])); - intel_de_write_fw(dev_priv, SPCSCC8(plane_id), SPCSC_C0(csc[8])); + intel_de_write_fw(display, SPCSCC8(plane_id), SPCSC_C0(csc[8])); - intel_de_write_fw(dev_priv, SPCSCYGICLAMP(plane_id), + intel_de_write_fw(display, SPCSCYGICLAMP(plane_id), SPCSC_IMAX(1023) | SPCSC_IMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCBICLAMP(plane_id), + intel_de_write_fw(display, SPCSCCBICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); - intel_de_write_fw(dev_priv, SPCSCCRICLAMP(plane_id), + intel_de_write_fw(display, SPCSCCRICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); - intel_de_write_fw(dev_priv, SPCSCYGOCLAMP(plane_id), + intel_de_write_fw(display, SPCSCYGOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCBOCLAMP(plane_id), + intel_de_write_fw(display, SPCSCCBOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCROCLAMP(plane_id), + intel_de_write_fw(display, SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); } @@ -139,7 +139,7 @@ static void vlv_sprite_update_clrc(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -168,9 +168,9 @@ vlv_sprite_update_clrc(const struct intel_plane_state *plane_state) } /* FIXME these register are single buffered :( */ - intel_de_write_fw(dev_priv, SPCLRC0(pipe, plane_id), + intel_de_write_fw(display, SPCLRC0(pipe, plane_id), SP_CONTRAST(contrast) | SP_BRIGHTNESS(brightness)); - intel_de_write_fw(dev_priv, SPCLRC1(pipe, plane_id), + intel_de_write_fw(display, SPCLRC1(pipe, plane_id), SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } @@ -357,7 +357,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, static void vlv_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -373,7 +373,7 @@ static void vlv_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ /* The two end points are implicit (0.0 and 1.0) */ for (i = 1; i < 8 - 1; i++) - intel_de_write_fw(dev_priv, SPGAMC(pipe, plane_id, i - 1), + intel_de_write_fw(display, SPGAMC(pipe, plane_id, i - 1), gamma[i] << 16 | gamma[i] << 8 | gamma[i]); } @@ -382,7 +382,7 @@ vlv_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; int crtc_x = plane_state->uapi.dst.x1; @@ -390,11 +390,11 @@ vlv_sprite_update_noarm(struct intel_plane *plane, u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); - intel_de_write_fw(dev_priv, SPSTRIDE(pipe, plane_id), + intel_de_write_fw(display, SPSTRIDE(pipe, plane_id), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, SPPOS(pipe, plane_id), + intel_de_write_fw(display, SPPOS(pipe, plane_id), SP_POS_Y(crtc_y) | SP_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, SPSIZE(pipe, plane_id), + intel_de_write_fw(display, SPSIZE(pipe, plane_id), SP_HEIGHT(crtc_h - 1) | SP_WIDTH(crtc_w - 1)); } @@ -403,6 +403,7 @@ vlv_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -420,18 +421,18 @@ vlv_sprite_update_arm(struct intel_plane *plane, chv_sprite_update_csc(plane_state); if (key->flags) { - intel_de_write_fw(dev_priv, SPKEYMINVAL(pipe, plane_id), + intel_de_write_fw(display, SPKEYMINVAL(pipe, plane_id), key->min_value); - intel_de_write_fw(dev_priv, SPKEYMSK(pipe, plane_id), + intel_de_write_fw(display, SPKEYMSK(pipe, plane_id), key->channel_mask); - intel_de_write_fw(dev_priv, SPKEYMAXVAL(pipe, plane_id), + intel_de_write_fw(display, SPKEYMAXVAL(pipe, plane_id), key->max_value); } - intel_de_write_fw(dev_priv, SPCONSTALPHA(pipe, plane_id), 0); + intel_de_write_fw(display, SPCONSTALPHA(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, SPLINOFF(pipe, plane_id), linear_offset); - intel_de_write_fw(dev_priv, SPTILEOFF(pipe, plane_id), + intel_de_write_fw(display, SPLINOFF(pipe, plane_id), linear_offset); + intel_de_write_fw(display, SPTILEOFF(pipe, plane_id), SP_OFFSET_Y(y) | SP_OFFSET_X(x)); /* @@ -439,8 +440,8 @@ vlv_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, SPCNTR(pipe, plane_id), sprctl); - intel_de_write_fw(dev_priv, SPSURF(pipe, plane_id), + intel_de_write_fw(display, SPCNTR(pipe, plane_id), sprctl); + intel_de_write_fw(display, SPSURF(pipe, plane_id), intel_plane_ggtt_offset(plane_state) + sprsurf_offset); vlv_sprite_update_clrc(plane_state); @@ -451,18 +452,19 @@ static void vlv_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; - intel_de_write_fw(dev_priv, SPCNTR(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, SPSURF(pipe, plane_id), 0); + intel_de_write_fw(display, SPCNTR(pipe, plane_id), 0); + intel_de_write_fw(display, SPSURF(pipe, plane_id), 0); } static bool vlv_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; @@ -474,7 +476,7 @@ vlv_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; + ret = intel_de_read(display, SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; *pipe = plane->pipe; @@ -766,7 +768,7 @@ static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state, static void ivb_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; u16 gamma[18]; int i; @@ -778,17 +780,17 @@ static void ivb_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) - intel_de_write_fw(dev_priv, SPRGAMC(pipe, i), + intel_de_write_fw(display, SPRGAMC(pipe, i), gamma[i] << 20 | gamma[i] << 10 | gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 2), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 0), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 1), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 2), gamma[i]); i++; - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 2), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 0), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 1), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 2), gamma[i]); i++; } @@ -797,6 +799,7 @@ ivb_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; int crtc_x = plane_state->uapi.dst.x1; @@ -812,14 +815,14 @@ ivb_sprite_update_noarm(struct intel_plane *plane, SPRITE_SRC_WIDTH(src_w - 1) | SPRITE_SRC_HEIGHT(src_h - 1); - intel_de_write_fw(dev_priv, SPRSTRIDE(pipe), + intel_de_write_fw(display, SPRSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, SPRPOS(pipe), + intel_de_write_fw(display, SPRPOS(pipe), SPRITE_POS_Y(crtc_y) | SPRITE_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, SPRSIZE(pipe), + intel_de_write_fw(display, SPRSIZE(pipe), SPRITE_HEIGHT(crtc_h - 1) | SPRITE_WIDTH(crtc_w - 1)); if (IS_IVYBRIDGE(dev_priv)) - intel_de_write_fw(dev_priv, SPRSCALE(pipe), sprscale); + intel_de_write_fw(display, SPRSCALE(pipe), sprscale); } static void @@ -827,6 +830,7 @@ ivb_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -840,20 +844,20 @@ ivb_sprite_update_arm(struct intel_plane *plane, linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (key->flags) { - intel_de_write_fw(dev_priv, SPRKEYVAL(pipe), key->min_value); - intel_de_write_fw(dev_priv, SPRKEYMSK(pipe), + intel_de_write_fw(display, SPRKEYVAL(pipe), key->min_value); + intel_de_write_fw(display, SPRKEYMSK(pipe), key->channel_mask); - intel_de_write_fw(dev_priv, SPRKEYMAX(pipe), key->max_value); + intel_de_write_fw(display, SPRKEYMAX(pipe), key->max_value); } /* HSW consolidates SPRTILEOFF and SPRLINOFF into a single SPROFFSET * register */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - intel_de_write_fw(dev_priv, SPROFFSET(pipe), + intel_de_write_fw(display, SPROFFSET(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } else { - intel_de_write_fw(dev_priv, SPRLINOFF(pipe), linear_offset); - intel_de_write_fw(dev_priv, SPRTILEOFF(pipe), + intel_de_write_fw(display, SPRLINOFF(pipe), linear_offset); + intel_de_write_fw(display, SPRTILEOFF(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } @@ -862,8 +866,8 @@ ivb_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, SPRCTL(pipe), sprctl); - intel_de_write_fw(dev_priv, SPRSURF(pipe), + intel_de_write_fw(display, SPRCTL(pipe), sprctl); + intel_de_write_fw(display, SPRSURF(pipe), intel_plane_ggtt_offset(plane_state) + sprsurf_offset); ivb_sprite_update_gamma(plane_state); @@ -873,20 +877,22 @@ static void ivb_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - intel_de_write_fw(dev_priv, SPRCTL(pipe), 0); + intel_de_write_fw(display, SPRCTL(pipe), 0); /* Disable the scaler */ if (IS_IVYBRIDGE(dev_priv)) - intel_de_write_fw(dev_priv, SPRSCALE(pipe), 0); - intel_de_write_fw(dev_priv, SPRSURF(pipe), 0); + intel_de_write_fw(display, SPRSCALE(pipe), 0); + intel_de_write_fw(display, SPRSURF(pipe), 0); } static bool ivb_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; @@ -897,7 +903,7 @@ ivb_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, SPRCTL(plane->pipe)) & SPRITE_ENABLE; + ret = intel_de_read(display, SPRCTL(plane->pipe)) & SPRITE_ENABLE; *pipe = plane->pipe; @@ -1073,7 +1079,7 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, static void g4x_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; u16 gamma[8]; @@ -1088,7 +1094,7 @@ static void g4x_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ /* The two end points are implicit (0.0 and 1.0) */ for (i = 1; i < 8 - 1; i++) - intel_de_write_fw(dev_priv, DVSGAMC_G4X(pipe, i - 1), + intel_de_write_fw(display, DVSGAMC_G4X(pipe, i - 1), gamma[i] << 16 | gamma[i] << 8 | gamma[i]); } @@ -1103,7 +1109,7 @@ static void ilk_sprite_linear_gamma(u16 gamma[17]) static void ilk_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; u16 gamma[17]; @@ -1117,12 +1123,12 @@ static void ilk_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) - intel_de_write_fw(dev_priv, DVSGAMC_ILK(pipe, i), + intel_de_write_fw(display, DVSGAMC_ILK(pipe, i), gamma[i] << 20 | gamma[i] << 10 | gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 2), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 0), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 1), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 2), gamma[i]); i++; } @@ -1131,7 +1137,7 @@ g4x_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; int crtc_x = plane_state->uapi.dst.x1; int crtc_y = plane_state->uapi.dst.y1; @@ -1146,13 +1152,13 @@ g4x_sprite_update_noarm(struct intel_plane *plane, DVS_SRC_WIDTH(src_w - 1) | DVS_SRC_HEIGHT(src_h - 1); - intel_de_write_fw(dev_priv, DVSSTRIDE(pipe), + intel_de_write_fw(display, DVSSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, DVSPOS(pipe), + intel_de_write_fw(display, DVSPOS(pipe), DVS_POS_Y(crtc_y) | DVS_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, DVSSIZE(pipe), + intel_de_write_fw(display, DVSSIZE(pipe), DVS_HEIGHT(crtc_h - 1) | DVS_WIDTH(crtc_w - 1)); - intel_de_write_fw(dev_priv, DVSSCALE(pipe), dvsscale); + intel_de_write_fw(display, DVSSCALE(pipe), dvsscale); } static void @@ -1160,6 +1166,7 @@ g4x_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -1173,14 +1180,14 @@ g4x_sprite_update_arm(struct intel_plane *plane, linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (key->flags) { - intel_de_write_fw(dev_priv, DVSKEYVAL(pipe), key->min_value); - intel_de_write_fw(dev_priv, DVSKEYMSK(pipe), + intel_de_write_fw(display, DVSKEYVAL(pipe), key->min_value); + intel_de_write_fw(display, DVSKEYMSK(pipe), key->channel_mask); - intel_de_write_fw(dev_priv, DVSKEYMAX(pipe), key->max_value); + intel_de_write_fw(display, DVSKEYMAX(pipe), key->max_value); } - intel_de_write_fw(dev_priv, DVSLINOFF(pipe), linear_offset); - intel_de_write_fw(dev_priv, DVSTILEOFF(pipe), + intel_de_write_fw(display, DVSLINOFF(pipe), linear_offset); + intel_de_write_fw(display, DVSTILEOFF(pipe), DVS_OFFSET_Y(y) | DVS_OFFSET_X(x)); /* @@ -1188,8 +1195,8 @@ g4x_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, DVSCNTR(pipe), dvscntr); - intel_de_write_fw(dev_priv, DVSSURF(pipe), + intel_de_write_fw(display, DVSCNTR(pipe), dvscntr); + intel_de_write_fw(display, DVSSURF(pipe), intel_plane_ggtt_offset(plane_state) + dvssurf_offset); if (IS_G4X(dev_priv)) @@ -1202,19 +1209,20 @@ static void g4x_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; - intel_de_write_fw(dev_priv, DVSCNTR(pipe), 0); + intel_de_write_fw(display, DVSCNTR(pipe), 0); /* Disable the scaler */ - intel_de_write_fw(dev_priv, DVSSCALE(pipe), 0); - intel_de_write_fw(dev_priv, DVSSURF(pipe), 0); + intel_de_write_fw(display, DVSSCALE(pipe), 0); + intel_de_write_fw(display, DVSSURF(pipe), 0); } static bool g4x_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; @@ -1225,7 +1233,7 @@ g4x_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, DVSCNTR(plane->pipe)) & DVS_ENABLE; + ret = intel_de_read(display, DVSCNTR(plane->pipe)) & DVS_ENABLE; *pipe = plane->pipe; @@ -1255,7 +1263,7 @@ static int g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(crtc_state); const struct drm_framebuffer *fb = plane_state->hw.fb; const struct drm_rect *src = &plane_state->uapi.src; const struct drm_rect *dst = &plane_state->uapi.dst; @@ -1281,7 +1289,8 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { if (src_h & 1) { - drm_dbg_kms(&i915->drm, "Source height must be even with interlaced modes\n"); + drm_dbg_kms(display->drm, + "Source height must be even with interlaced modes\n"); return -EINVAL; } min_height = 6; @@ -1293,19 +1302,22 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, if (src_w < min_width || src_h < min_height || src_w > 2048 || src_h > 2048) { - drm_dbg_kms(&i915->drm, "Source dimensions (%dx%d) exceed hardware limits (%dx%d - %dx%d)\n", + drm_dbg_kms(display->drm, + "Source dimensions (%dx%d) exceed hardware limits (%dx%d - %dx%d)\n", src_w, src_h, min_width, min_height, 2048, 2048); return -EINVAL; } if (width_bytes > 4096) { - drm_dbg_kms(&i915->drm, "Fetch width (%d) exceeds hardware max with scaling (%u)\n", + drm_dbg_kms(display->drm, + "Fetch width (%d) exceeds hardware max with scaling (%u)\n", width_bytes, 4096); return -EINVAL; } if (stride > 4096) { - drm_dbg_kms(&i915->drm, "Stride (%u) exceeds hardware max with scaling (%u)\n", + drm_dbg_kms(display->drm, + "Stride (%u) exceeds hardware max with scaling (%u)\n", stride, 4096); return -EINVAL; } @@ -1317,6 +1329,7 @@ static int g4x_sprite_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); int min_scale = DRM_PLANE_NO_SCALING; @@ -1324,7 +1337,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, int ret; if (g4x_fb_scalable(plane_state->hw.fb)) { - if (DISPLAY_VER(dev_priv) < 7) { + if (DISPLAY_VER(display) < 7) { min_scale = 1; max_scale = 16 << 16; } else if (IS_IVYBRIDGE(dev_priv)) { @@ -1353,7 +1366,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (DISPLAY_VER(dev_priv) >= 7) + if (DISPLAY_VER(display) >= 7) plane_state->ctl = ivb_sprite_ctl(crtc_state, plane_state); else plane_state->ctl = g4x_sprite_ctl(crtc_state, plane_state); @@ -1364,6 +1377,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, int chv_plane_check_rotation(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); unsigned int rotation = plane_state->hw.rotation; @@ -1371,7 +1385,7 @@ int chv_plane_check_rotation(const struct intel_plane_state *plane_state) if (IS_CHERRYVIEW(dev_priv) && rotation & DRM_MODE_ROTATE_180 && rotation & DRM_MODE_REFLECT_X) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Cannot rotate and reflect at the same time\n"); return -EINVAL; } @@ -1573,6 +1587,7 @@ struct intel_plane * intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int sprite) { + struct intel_display *display = &dev_priv->display; struct intel_plane *plane; const struct drm_plane_funcs *plane_funcs; unsigned int supported_rotations; @@ -1604,7 +1619,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, } plane_funcs = &vlv_sprite_funcs; - } else if (DISPLAY_VER(dev_priv) >= 7) { + } else if (DISPLAY_VER(display) >= 7) { plane->update_noarm = ivb_sprite_update_noarm; plane->update_arm = ivb_sprite_update_arm; plane->disable_arm = ivb_sprite_disable_arm; @@ -1663,11 +1678,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, modifiers = intel_fb_plane_get_modifiers(dev_priv, INTEL_PLANE_CAP_TILING_X); - ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, + ret = drm_universal_plane_init(display->drm, &plane->base, 0, plane_funcs, formats, num_formats, modifiers, DRM_PLANE_TYPE_OVERLAY, - "sprite %c", sprite_name(dev_priv, pipe, sprite)); + "sprite %c", sprite_name(display, pipe, sprite)); kfree(modifiers); if (ret) From 1c8f80c2aadf76daff01ab133fb11136e7248746 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 19:04:55 +0300 Subject: [PATCH 1463/1523] drm/i915/display: convert params to struct intel_display Going forward, struct intel_display shall replace struct drm_i915_private as the main display device data pointer type. Convert intel_display_params.[ch] and intel_display_debugfs_params.[ch] to struct intel_display. Some stragglers are left behind where needed. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/3e5cc1744eaf4708b08303e3e7e194035d7941cc.1724342644.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 2 +- .../gpu/drm/i915/display/intel_display_debugfs_params.c | 8 ++++---- .../gpu/drm/i915/display/intel_display_debugfs_params.h | 4 ++-- drivers/gpu/drm/i915/display/intel_display_params.c | 6 +++--- drivers/gpu/drm/i915/display/intel_display_params.h | 4 ++-- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 3 ++- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 0cf0b4223513..74f527647aa9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1073,7 +1073,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) intel_opregion_debugfs_register(display); intel_psr_debugfs_register(i915); intel_wm_debugfs_register(i915); - intel_display_debugfs_params(i915); + intel_display_debugfs_params(display); } static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c index f35718748555..ec3ed29a83c9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -151,13 +151,13 @@ intel_display_debugfs_create_uint(const char *name, umode_t mode, } while (0) /* add a subdirectory with files for each intel display param */ -void intel_display_debugfs_params(struct drm_i915_private *i915) +void intel_display_debugfs_params(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; struct dentry *dir; char dirname[16]; - snprintf(dirname, sizeof(dirname), "%s_params", i915->drm.driver->name); + snprintf(dirname, sizeof(dirname), "%s_params", display->drm->driver->name); dir = debugfs_lookup(dirname, minor->debugfs_root); if (!dir) dir = debugfs_create_dir(dirname, minor->debugfs_root); @@ -171,7 +171,7 @@ void intel_display_debugfs_params(struct drm_i915_private *i915) */ #define REGISTER(T, x, unused, mode, ...) _intel_display_param_create_file( \ - dir, #x, mode, &i915->display.params.x); + dir, #x, mode, &display->params.x); INTEL_DISPLAY_PARAMS_FOR_EACH(REGISTER); #undef REGISTER } diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h index 1e9945a4044c..a1120915a5a8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h @@ -6,8 +6,8 @@ #ifndef __INTEL_DISPLAY_DEBUGFS_PARAMS__ #define __INTEL_DISPLAY_DEBUGFS_PARAMS__ -struct drm_i915_private; +struct intel_display; -void intel_display_debugfs_params(struct drm_i915_private *i915); +void intel_display_debugfs_params(struct intel_display *display); #endif /* __INTEL_DISPLAY_DEBUGFS_PARAMS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c index e82bd72d32fa..1a45d300b6f0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -173,14 +173,14 @@ static void _param_print_charp(struct drm_printer *p, const char *driver_name, /** * intel_display_params_dump - dump intel display modparams - * @i915: i915 device + * @display: display device * @p: the &drm_printer * * Pretty printer for i915 modparams. */ -void intel_display_params_dump(struct drm_i915_private *i915, struct drm_printer *p) +void intel_display_params_dump(struct intel_display *display, struct drm_printer *p) { -#define PRINT(T, x, ...) _param_print(p, i915->drm.driver->name, #x, i915->display.params.x); +#define PRINT(T, x, ...) _param_print(p, display->drm->driver->name, #x, display->params.x); INTEL_DISPLAY_PARAMS_FOR_EACH(PRINT); #undef PRINT } diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h index 48c29c55c939..da8dc943234b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -9,7 +9,7 @@ #include struct drm_printer; -struct drm_i915_private; +struct intel_display; /* * Invoke param, a function-like macro, for each intel display param, with @@ -56,7 +56,7 @@ struct intel_display_params { }; #undef MEMBER -void intel_display_params_dump(struct drm_i915_private *i915, +void intel_display_params_dump(struct intel_display *display, struct drm_printer *p); void intel_display_params_copy(struct intel_display_params *dest); void intel_display_params_free(struct intel_display_params *params); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc717cf544e4..f969f585d07b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -66,6 +66,7 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_display *display = &i915->display; struct drm_printer p = drm_seq_file_printer(m); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915)); @@ -77,7 +78,7 @@ static int i915_capabilities(struct seq_file *m, void *data) kernel_param_lock(THIS_MODULE); i915_params_dump(&i915->params, &p); - intel_display_params_dump(i915, &p); + intel_display_params_dump(display, &p); kernel_param_unlock(THIS_MODULE); return 0; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 96c6cafd5b9e..6469b9bcf2ec 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -660,9 +660,10 @@ static void err_print_params(struct drm_i915_error_state_buf *m, const struct i915_params *params) { struct drm_printer p = i915_error_printer(m); + struct intel_display *display = &m->i915->display; i915_params_dump(params, &p); - intel_display_params_dump(m->i915, &p); + intel_display_params_dump(display, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, From db6341a9168d2a24ded526277eeab29724d76e9d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 20 Aug 2024 13:56:32 +0800 Subject: [PATCH 1464/1523] drm/amdkfd: Check int source id for utcl2 poison event Traditional utcl2 fault_status polling does not work in SRIOV environment. The polling of fault status register from guest side will be dropped by hardware. Driver should switch to check utcl2 interrupt source id to identify utcl2 poison event. It is set to 1 when poisoned data interrupts are signaled. v2: drop the unused local variable (Tao) Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 18 +----------------- drivers/gpu/drm/amd/amdkfd/soc15_int.h | 1 + 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index a9c3580be8c9..fecdbbab9894 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -431,25 +431,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 10138676f27f..e5c0205f2618 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -29,6 +29,7 @@ #define SOC15_INTSRC_CP_BAD_OPCODE 183 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_VMC_UTCL2_POISON 1 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 #define SOC21_INTSRC_SDMA_TRAP 49 From e28604d8337eac97fa956d6682b6312741ce85a1 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 19 Aug 2024 22:23:11 +0800 Subject: [PATCH 1465/1523] drm/amdkfd: Drop poison hanlding from gfx v10 Not supported. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 71 ------------------- 1 file changed, 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 8e0d0356e810..bb8cbfc39b90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -129,63 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE { KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) -static void event_interrupt_poison_consumption(struct kfd_node *dev, - uint16_t pasid, uint16_t client_id) -{ - enum amdgpu_ras_block block = 0; - int old_poison, ret = -EINVAL; - uint32_t reset = 0; - struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); - - if (!p) - return; - - /* all queues of a process will be unmapped in one time */ - old_poison = atomic_cmpxchg(&p->poison, 0, 1); - kfd_unref_process(p); - if (old_poison) - return; - - switch (client_id) { - case SOC15_IH_CLIENTID_SE0SH: - case SOC15_IH_CLIENTID_SE1SH: - case SOC15_IH_CLIENTID_SE2SH: - case SOC15_IH_CLIENTID_SE3SH: - case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); - block = AMDGPU_RAS_BLOCK__GFX; - if (ret) - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - case SOC15_IH_CLIENTID_SDMA0: - case SOC15_IH_CLIENTID_SDMA1: - case SOC15_IH_CLIENTID_SDMA2: - case SOC15_IH_CLIENTID_SDMA3: - case SOC15_IH_CLIENTID_SDMA4: - block = AMDGPU_RAS_BLOCK__SDMA; - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - default: - break; - } - - kfd_signal_poison_consumed_event(dev, pasid); - - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - else - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); -} - static bool event_interrupt_isr_v10(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -332,11 +275,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID), sq_intr_err_type); - if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && - sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; - } break; default: break; @@ -362,9 +300,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); - } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || @@ -388,12 +323,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) hub_inst = node_id / 4; - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { - event_interrupt_poison_consumption(dev, pasid, client_id); - return; - } - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | From 01bfabc2d1d8aaffe5268f8df0843a6d916dcbaa Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 20 Aug 2024 08:57:15 +0800 Subject: [PATCH 1466/1523] drm/amd/pm: update message interface for smu v14.0.2/3 update message interface for smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h | 18 ++++++++++++++---- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 1 - 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 5913f9c60fe0..e000ac7b4c0e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -127,7 +127,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), From 75f0efbc4b3b088cca20864d055b3854a51b5af0 Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Tue, 13 Aug 2024 13:41:11 +0530 Subject: [PATCH 1467/1523] drm/amdgpu: Take IOMMU remapping into account for p2p checks when trying to enable p2p the amdgpu_device_is_peer_accessible() checks the condition where address_mask overlaps the aper_base and hence returns 0, due to which the p2p disables for this platform IOMMU should remap the BAR addresses so the device can access them. Hence check if peer_adev is remapping DMA v5: (Felix, Alex) - fixing comment as per Alex feedback - refactor code as per Felix v4: (Alex) - fix the comment and description v3: - remove iommu_remap variable v2: (Alex) - Fix as per review comments - add new function amdgpu_device_check_iommu_remap to check if iommu remap Signed-off-by: Rahul Jain Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 43 ++++++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ad97f03f1358..da06705f0026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3957,6 +3957,27 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) adev->ram_is_direct_mapped = true; } +#if defined(CONFIG_HSA_AMD_P2P) +/** + * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. + * + * @adev: amdgpu_device pointer + * + * return if IOMMU remapping bar address + */ +static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (domain && (domain->type == IOMMU_DOMAIN_DMA || + domain->type == IOMMU_DOMAIN_DMA_FQ)) + return true; + + return false; +} +#endif + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_pcie_replay_count.attr, NULL @@ -6151,18 +6172,24 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { #ifdef CONFIG_HSA_AMD_P2P - uint64_t address_mask = peer_adev->dev->dma_mask ? - ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask)); + bool is_large_bar = adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size; + bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + + if (!p2p_addressable) { + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + + p2p_addressable = !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask); + } + return is_large_bar && p2p_access && p2p_addressable; #else return false; #endif From b05d6476ae2dde8eb447f907ab689083499edeaa Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 19 Aug 2024 22:59:19 +0800 Subject: [PATCH 1468/1523] drm/amdgpu: Retire query_utcl2_poison_status callback Driver switches to interrupt source id to identify utcl2 poison event. polling interface is not needed. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 ---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 -- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 17 ----------------- 7 files changed, 74 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 64a989cbc301..4f08b153cb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -783,22 +783,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type) -{ - if (!hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } else { - if (adev->mmhub.funcs->query_utcl2_poison_status) - return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } -} - int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { return kgd2kfd_check_and_lock_kfd(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 825c7ffe4bc9..f9d119448442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -350,8 +350,6 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 103a837ccc71..c7b44aeb671b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,8 +38,6 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 95d676ee207f..1ca9d4ed8063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,8 +63,6 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d200310d1731..0e3ddea7b8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,23 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } -static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) - return false; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, @@ -468,5 +451,4 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 72109abe7c86..ed8e130c7d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -622,22 +622,6 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } -static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 fed, status; - - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -646,7 +630,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 915203b91c5f..b01bb759d0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,22 +559,6 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } -static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst) -{ - u32 fed, status; - - status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -584,7 +568,6 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, - .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { From 40318a2406bd426c6f4591269669c04e8eda571d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 13:11:22 -0400 Subject: [PATCH 1469/1523] drm/amdgpu/gfx12: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3575 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f14e27f86e0e..54059cbcfc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3054,7 +3054,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From 988bfa0bc67d7220ff8d9e2ba3a425727aa98af3 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:36 +0800 Subject: [PATCH 1470/1523] drm/amd/display: Make core_dcn4_g6_temp_read_blackout_table static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:6853:56: warning: symbol 'core_dcn4_g6_temp_read_blackout_table' was not declared. Should it be static? This symbol is not used outside of dml2_core_dcn4_calcs.c, so marks it static. And not want to change it, so mark it const. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index e2c45e498664..805fd783131f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -6887,7 +6887,8 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { +static const struct dml2_core_internal_g6_temp_read_blackouts_table + core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, From 0e405395e0b162075001b9c027443dd10b723a03 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:37 +0800 Subject: [PATCH 1471/1523] drm/amd/display: Make core_dcn4_ip_caps_base static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c:12:28: warning: symbol 'core_dcn4_ip_caps_base' was not declared. Should it be static? This symbol is not used outside of dcn35_hubp.c, so marks it static. And do not want to change it, so mark it const. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 698307f3ca39..0aa4e4d343b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -9,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -struct dml2_core_ip_params core_dcn4_ip_caps_base = { +static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, From 570867ef90550b01f0ca0f919dba308c3f2fb605 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:38 +0800 Subject: [PATCH 1472/1523] drm/amd/display: Make dcn35_hubp_funcs static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/hubp/dcn35/dcn35_hubp.c:191:19: warning: symbol 'dcn35_hubp_funcs' was not declared. Should it be static? This symbol is not used outside of dcn35_hubp.c, so marks it static. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 771fcd0d3b99..d1f05b82b3dd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -188,7 +188,7 @@ void hubp35_program_surface_config( hubp35_program_pixel_format(hubp, format); } -struct hubp_funcs dcn35_hubp_funcs = { +static struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, From 2845f512232de9e436b9e3b5529e906e62414013 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 21 Aug 2024 14:40:39 +0800 Subject: [PATCH 1473/1523] drm/amd/display: Make dcn401_dsc_funcs static The sparse tool complains as follows: drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dcn401/dcn401_dsc.c:30:24: warning: symbol 'dcn401_dsc_funcs' was not declared. Should it be static? This symbol is not used outside of dcn401_dsc.c, so marks it static. Signed-off-by: Jinjie Ruan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 6acb6699f146..61678b0a5a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -27,7 +27,7 @@ static void dsc401_disconnect(struct display_stream_compressor *dsc); static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); -const struct dsc_funcs dcn401_dsc_funcs = { +static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, From 4416377ae1fdc41a90b665943152ccd7ff61d3c5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Aug 2024 14:42:41 +0800 Subject: [PATCH 1474/1523] drm/amdgpu: add list empty check to avoid null pointer issue Add list empty check to avoid null pointer issues in some corner cases. - list_for_each_entry_safe() Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 929095a2e088..57bda66e85ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } From 73dd0ad9e5dad53766ea3e631303430116f834b3 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 21 Aug 2024 12:27:24 +0800 Subject: [PATCH 1475/1523] drm/amd/display: avoid using null object of framebuffer Instead of using state->fb->obj[0] directly, get object from framebuffer by calling drm_gem_fb_get_obj() and return error code when object is null to avoid using null object of framebuffer. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 1ff469ef51af..a573a6639898 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); From 875ff9a7ee8824200885384effa7743892a34ed6 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 22 Aug 2024 11:44:12 +0800 Subject: [PATCH 1476/1523] drm/amdgpu: support for gc_info table v1.3 Add gc_info table v1.3 for IP discovery. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 11 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +++ drivers/gpu/drm/amd/include/discovery.h | 42 +++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7b561e8e3caf..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e28c1ebfa98f..5644e10a86a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -245,6 +245,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; From 010cc730ace807c6d267481b5fb6ff99acc35c46 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 16 Aug 2024 14:34:17 +0530 Subject: [PATCH 1477/1523] drm/amd/pm: Add support for new P2S table revision Add p2s table support for a new revision of SMUv13.0.6. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); From 6ef29715ac06fad7b3e43086cb4df97952c3a4de Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 23 Aug 2024 02:04:09 -0500 Subject: [PATCH 1478/1523] drm/amdkfd: Change kfd/svm page fault drain handling When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page fault to decide to drop or recover page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happened on unmapped ranges after unmap events is application bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 95 +++++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 7 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1468222ea0cd..ad2e469548c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2776,7 +2776,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2802,7 +2802,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 046949c4b695..d12d66dca8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -558,7 +558,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f0ceab3ce5bf..9784a2892185 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc..c76ac0dfe572 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7bba6bed2f48..9ae9abc6eb43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -866,6 +866,8 @@ struct svm_range_list { struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; + /* check point ts decides if page fault recovery need be dropped */ + uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2339bbdf452f..ce2a5d9f90d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2262,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2291,8 +2285,6 @@ restart: pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2314,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2445,6 +2428,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; if (atomic_read(&prange->queue_refcount)) { int r; @@ -2464,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2909,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2919,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2940,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2961,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3182,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 747325a2ea89..bddd24f04669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -174,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t vmid, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, @@ -225,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p) static inline int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t client_id, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { return -EFAULT; } From 3376f922bfe070eff762164b3fc66981e3079417 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 21 Aug 2024 13:10:58 +0800 Subject: [PATCH 1479/1523] drm/amd/pm: Drop unsupported features on smu v14_0_2 Drop unsupported features on smu v14_0_2. Signed-off-by: Candice Li Reviewed-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e000ac7b4c0e..a31fae5feedf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2114,50 +2114,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -2896,12 +2852,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) From 25ebe10e3f4c897a2d348a9d4b674ee9ea28225c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 23 Aug 2024 09:22:07 -0700 Subject: [PATCH 1480/1523] Revert "drm/xe: Invalidate media_gt TLBs in PT code" This reverts commit 40520283e0fd11237ed9dfc0991503b3403d5fa4. We can't install dma-fence-chain in timeline sync objs. Signed-off-by: Matthew Brost Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240823162207.2168887-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 99 ++++++++------------------------------ 1 file changed, 19 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 6c6714af3d5d..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,8 +3,6 @@ * Copyright © 2022 Intel Corporation */ -#include - #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1851,20 +1849,13 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma *vma, struct dma_fence *fence) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { @@ -1884,20 +1875,13 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma *vma, struct dma_fence *fence) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } vma->tile_present &= ~BIT(tile->id); if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); @@ -1914,8 +1898,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma_op *op, struct dma_fence *fence) { xe_vm_assert_held(vm); @@ -1924,28 +1907,26 @@ static void op_commit(struct xe_vm *vm, if (!op->map.immediate && xe_vm_in_fault_mode(vm)) break; - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); break; case DRM_GPUVA_OP_REMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); + gpuva_to_vma(op->base.remap.unmap->va), fence); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); + fence); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); + fence); break; case DRM_GPUVA_OP_UNMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); + gpuva_to_vma(op->base.unmap.va), fence); break; case DRM_GPUVA_OP_PREFETCH: bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); + gpuva_to_vma(op->base.prefetch.va), fence); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -1982,8 +1963,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence_chain *chain_fence = NULL; + struct invalidation_fence *ifence = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -2016,18 +1996,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) err = -ENOMEM; goto kill_vm_tile1; } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - chain_fence = dma_fence_chain_alloc(); - if (!chain_fence) { - err = -ENOMEM; - goto free_ifence; - } - } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -2059,46 +2027,19 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) /* tlb invalidation must be done before signaling rebind */ if (ifence) { - if (mfence) - dma_fence_get(fence); invalidation_fence_init(tile->primary_gt, ifence, fence, pt_update_ops->start, pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - dma_fence_chain_init(chain_fence, &ifence->base.base, - &mfence->base.base, 0); - fence = &chain_fence->base; - } else { - fence = &ifence->base.base; - } + fence = &ifence->base.base; } - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); + dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence); if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); @@ -2108,8 +2049,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) free_rfence: kfree(rfence); free_ifence: - dma_fence_chain_free(chain_fence); - kfree(mfence); kfree(ifence); kill_vm_tile1: if (err != -EAGAIN && tile->id) From 5b993d00d7f0c970a5e5d34c1031069fb13b6986 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:53 -0700 Subject: [PATCH 1481/1523] drm/xe: Move ggtt_fini to devm managed ggtt->scratch is destroyed via devm, ggtt_fini sets ggtt->scratch to NULL, ggtt->scratch in GGTT clears, so ensure ggtt->scratch is set NULL before the BO is destroyed. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index b4a0cd2b62ed..a3ce91decdce 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -166,7 +166,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) drm_mm_takedown(&ggtt->mm); } -static void ggtt_fini(struct drm_device *drm, void *arg) +static void ggtt_fini(void *arg) { struct xe_ggtt *ggtt = arg; @@ -374,7 +374,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_ggtt_initial_clear(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; From b5de6a5ced074910b8fe57d3b0ab7f8843f85a3a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:54 -0700 Subject: [PATCH 1482/1523] drm/xe: Set firmware state to loadable before registering guc_fini_hw The guc_fini_hw registered calls __xe_uc_fw_status which is only expected to be called after initializing fw state. Move this before registering guc_fini_hw. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index de0fe9e65746..52df28032a6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); if (ret) goto out; @@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc) xe_guc_comm_init_early(guc); - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); - return 0; out: From a323782567812ee925e9b7926445532c7afe331b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:55 -0700 Subject: [PATCH 1483/1523] drm/xe: Drop warn on xe_guc_pc_gucrc_disable in guc pc fini Not a big deal if CT is down as driver is unloading, no need to warn. Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..def503abeed5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg) return; XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ From 6eb2aad402cc94ab4dd3780d8fd94c5c9bb7ed4a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:56 -0700 Subject: [PATCH 1484/1523] drm/xe: Move hw_engine_fini to devm managed Kernel BOs are destroyed with GGTT mappings, this is hardware interaction so use devm. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index aa9b2b16a6e0..18980238a2ea 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -267,7 +267,7 @@ static const struct engine_info engine_infos[] = { }, }; -static void hw_engine_fini(struct drm_device *drm, void *arg) +static void hw_engine_fini(void *arg) { struct xe_hw_engine *hwe = arg; @@ -585,7 +585,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_put(hwe->kernel_lrc); From 501d94389310bb282915e730386d1150b13ae321 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:58 -0700 Subject: [PATCH 1485/1523] drm/xe: Update xe_sa to use xe_managed_bo_create_pin_map Preferred way to create kernel BOs is xe_managed_bo_create_pin_map, use it. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 13 ++++++------- drivers/gpu/drm/xe/xe_sa_types.h | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f3060979e63f..fe2cb2a96f78 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) drm_suballoc_manager_fini(&sa_manager->base); - if (bo->vmap.is_iomem) + if (sa_manager->is_iomem) kvfree(sa_manager->cpu_ptr); - xe_bo_unpin_map_no_vm(bo); sa_manager->bo = NULL; } @@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; + sa_manager->is_iomem = bo->vmap.is_iomem; drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); @@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); if (!sa_manager->cpu_ptr) { - xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2ef896aeca1d..2b070ff1292e 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -14,6 +14,7 @@ struct xe_sa_manager { struct xe_bo *bo; u64 gpu_addr; void *cpu_ptr; + bool is_iomem; }; #endif From a64e7e5b05e014dad9ae5858c9644d61400ec6ef Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 14:21:46 +0300 Subject: [PATCH 1486/1523] drm/xe: Suspend/resume user access only during system s/r Enable/Disable user access only during system suspend/resume. This should not happen during runtime s/r v2: rebased Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-2-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 30dfdac9f6fa..ad7fc5137b42 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -323,7 +323,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - intel_display_driver_disable_user_access(xe); + if (!runtime) + intel_display_driver_disable_user_access(xe); } if (!runtime) @@ -335,7 +336,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); - if (has_display(xe)) + if (!runtime && has_display(xe)) intel_display_driver_suspend_access(xe); intel_encoder_suspend_all(&xe->display); @@ -381,7 +382,7 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); - if (has_display(xe)) + if (!runtime && has_display(xe)) intel_display_driver_resume_access(xe); /* MST sideband requires HPD interrupts enabled */ @@ -391,7 +392,8 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); - intel_display_driver_enable_user_access(xe); + if (!runtime) + intel_display_driver_enable_user_access(xe); } intel_hpd_poll_disable(xe); From 122824165471ea492d8b07d15384345940aababb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 14:21:47 +0300 Subject: [PATCH 1487/1523] drm/xe: Handle polling only for system s/r in xe_display_pm_suspend/resume() This is a preparation for the follow-up patch where polling will be handled properly for all cases during runtime suspend/resume. v2: rebased Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-3-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ad7fc5137b42..00dd38f6177b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -321,14 +321,11 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) */ intel_power_domains_disable(xe); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - if (has_display(xe)) { + if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - if (!runtime) - intel_display_driver_disable_user_access(xe); - } - - if (!runtime) + intel_display_driver_disable_user_access(xe); intel_display_driver_suspend(xe); + } xe_display_flush_cleanup_work(xe); @@ -387,15 +384,12 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); - if (!runtime) + if (!runtime && has_display(xe)) { intel_display_driver_resume(xe); - - if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); - if (!runtime) - intel_display_driver_enable_user_access(xe); + intel_display_driver_enable_user_access(xe); + intel_hpd_poll_disable(xe); } - intel_hpd_poll_disable(xe); intel_opregion_resume(display); From 66a0f6b9f5fc205272035b6ffa4830be51e3f787 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Fri, 23 Aug 2024 14:21:48 +0300 Subject: [PATCH 1488/1523] drm/xe/display: handle HPD polling in display runtime suspend/resume In XE, display runtime suspend / resume routines are called only if d3cold is allowed. This makes the driver unable to detect any HPDs once the device goes into runtime suspend state in platforms like LNL. Update the display runtime suspend / resume routines to include HPD polling regardless of d3cold status. While xe_display_pm_suspend/resume() performs steps during runtime suspend/resume that shouldn't happen, like suspending MST and they are missing other steps like enabling DC9, this patchset is meant to keep the current behavior wrt. these, leaving the corresponding updates for a follow-up v2: have a separate function for display runtime s/r (Rodrigo) v3: better streamlining of system s/r and runtime s/r calls (Imre) v4: rebased Reviewed-by: Arun R Murthy Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-4-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/display/xe_display.h | 4 ++++ drivers/gpu/drm/xe/xe_pm.c | 8 +++++--- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 00dd38f6177b..710b1e2170c1 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -308,6 +308,18 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } } +/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) + xe_display_pm_suspend(xe, true); + + intel_hpd_poll_enable(xe); +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -354,6 +366,17 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + xe_display_pm_resume(xe, true); +} + void xe_display_pm_resume_early(struct xe_device *xe) { if (!xe->info.probe_display) diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 000fb5799df5..53d727fd792b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -38,6 +38,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -67,6 +69,8 @@ static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ #endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index fcfb49af8c89..c247e1cb8aba 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -366,9 +366,9 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_bo_runtime_pm_release_mmap_offset(bo); mutex_unlock(&xe->mem_access.vram_userfault.lock); - if (xe->d3cold.allowed) { - xe_display_pm_suspend(xe, true); + xe_display_pm_runtime_suspend(xe); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) goto out; @@ -431,12 +431,14 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_runtime_resume(xe); + if (xe->d3cold.allowed) { - xe_display_pm_resume(xe, true); err = xe_bo_restore_user(xe); if (err) goto out; } + out: lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); From a4dbe45c4c14edc316ae94b9af86a28f8c5d8123 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 23 Aug 2024 09:50:55 +0200 Subject: [PATCH 1489/1523] drm/i915/dsi: Make Lenovo Yoga Tab 3 X90F DMI match less strict There are 2G and 4G RAM versions of the Lenovo Yoga Tab 3 X90F and it turns out that the 2G version has a DMI product name of "CHERRYVIEW D1 PLATFORM" where as the 4G version has "CHERRYVIEW C0 PLATFORM". The sys-vendor + product-version check are unique enough that the product-name check is not necessary. Drop the product-name check so that the existing DMI match for the 4G RAM version also matches the 2G RAM version. Fixes: f6f4a0862bde ("drm/i915/vlv_dsi: Add DMI quirk for backlight control issues on Lenovo Yoga Tab 3 (v2)") Cc: stable@vger.kernel.org Acked-by: Jani Nikula Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20240823075055.17198-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/display/vlv_dsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 248bab691181..d21f3fb39706 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,7 +1870,6 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { /* Lenovo Yoga Tab 3 Pro YT3-X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)vlv_dsi_lenovo_yoga_tab3_backlight_fixup, From ff9c674d1127e768050fe418470e74586985c87b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 23 Aug 2024 20:47:13 -0700 Subject: [PATCH 1490/1523] drm/xe: Fix total initialization in xe_ggtt_print_holes() Clang warns (or errors with CONFIG_DRM_WERROR or CONFIG_WERROR): drivers/gpu/drm/xe/xe_ggtt.c:810:3: error: variable 'total' is uninitialized when used here [-Werror,-Wuninitialized] 810 | total += hole_size; | ^~~~~ drivers/gpu/drm/xe/xe_ggtt.c:798:11: note: initialize the variable 'total' to silence this warning 798 | u64 total; | ^ | = 0 1 error generated. Move the zero initialization of total from xe_gt_sriov_pf_config_print_available_ggtt() to xe_ggtt_print_holes() to resolve the warning. Fixes: 136367290ea5 ("drm/xe: Introduce xe_ggtt_print_holes") Signed-off-by: Nathan Chancellor Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240823-drm-xe-fix-total-in-xe_ggtt_print_holes-v1-1-12b02d079327@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index a3ce91decdce..86fc6afa43bd 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -795,7 +795,7 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer const struct drm_mm_node *entry; u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); u64 hole_start, hole_end, hole_size; - u64 total; + u64 total = 0; char buf[10]; mutex_lock(&ggtt->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c8835d9eead6..41ed07b153b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2110,7 +2110,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); - u64 spare, avail, total = 0; + u64 spare, avail, total; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); From d3204616a67e53fdcad14c7026869330fb382fd4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 17:38:41 -0400 Subject: [PATCH 1491/1523] bcachefs: Fix failure to flush moves before sleeping in copygc This fixes an apparent deadlock - rebalance would get stuck trying to take nocow locks because they weren't being released by copygc. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index deef4f024d20..d86565bf07c8 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -383,7 +383,7 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; - bch2_trans_unlock_long(ctxt.trans); + move_buckets_wait(&ctxt, buckets, true); bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), MAX_SCHEDULE_TIMEOUT); } From 49aa7830396bce33b00fa7ee734c35de36521138 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 15:35:22 -0400 Subject: [PATCH 1492/1523] bcachefs: Fix rebalance_work accounting rebalance_work was keying off of the presence of rebelance_opts in the extent - but that was incorrect, we keep those around after rebalance for indirect extents since the inode's options are not directly available Fixes: 20ac515a9cc7 ("bcachefs: bch_acct_rebalance_work") Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/buckets.c | 74 +++++++++++++++++++++++------------ fs/bcachefs/extents.c | 39 ++++++++++++++++++ fs/bcachefs/extents.h | 1 + fs/bcachefs/sb-downgrade.c | 8 +++- 5 files changed, 98 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c75f2e0f32bb..14ce726bf5a3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -677,7 +677,8 @@ struct bch_sb_field_ext { x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ - x(disk_accounting_inum, BCH_VERSION(1, 11)) + x(disk_accounting_inum, BCH_VERSION(1, 11)) \ + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index be2bbd248631..a2274429e7f4 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -699,7 +699,8 @@ err: static int __trigger_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, - enum btree_iter_update_trigger_flags flags) + enum btree_iter_update_trigger_flags flags, + s64 *replicas_sectors) { bool gc = flags & BTREE_TRIGGER_gc; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -708,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 replicas_sectors = 0; int ret = 0; struct disk_accounting_pos acc_replicas_key = { @@ -739,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans, if (ret) return ret; } else if (!p.has_ec) { - replicas_sectors += disk_sectors; + *replicas_sectors += disk_sectors; acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; } else { ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); @@ -777,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) { - ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -787,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_snapshot, .snapshot.id = k.k->p.snapshot, }; - ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -807,7 +807,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_btree, .btree.id = btree_id, }; - ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); if (ret) return ret; } else { @@ -819,22 +819,13 @@ static int __trigger_extent(struct btree_trans *trans, s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), - replicas_sectors, + *replicas_sectors, }; ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); if (ret) return ret; } - if (bch2_bkey_rebalance_opts(k)) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_rebalance_work, - }; - ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc); - if (ret) - return ret; - } - return 0; } @@ -843,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; @@ -858,22 +850,54 @@ int bch2_trigger_extent(struct btree_trans *trans, new_ptrs_bytes)) return 0; - if (flags & BTREE_TRIGGER_transactional) { - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - - (int) bch2_bkey_needs_rebalance(c, old); + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + s64 old_replicas_sectors = 0, new_replicas_sectors = 0; - if (mod) { + if (old.k->type) { + int ret = __trigger_extent(trans, btree, level, old, + flags & ~BTREE_TRIGGER_insert, + &old_replicas_sectors); + if (ret) + return ret; + } + + if (new.k->type) { + int ret = __trigger_extent(trans, btree, level, new.s_c, + flags & ~BTREE_TRIGGER_overwrite, + &new_replicas_sectors); + if (ret) + return ret; + } + + int need_rebalance_delta = 0; + s64 need_rebalance_sectors_delta = 0; + + s64 s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta -= s != 0; + need_rebalance_sectors_delta -= s; + + s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta += s != 0; + need_rebalance_sectors_delta += s; + + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, mod > 0); + new.k->p, need_rebalance_delta > 0); + if (ret) + return ret; + } + + if (need_rebalance_sectors_delta) { + struct disk_accounting_pos acc = { + .type = BCH_DISK_ACCOUNTING_rebalance_work, + }; + int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, + flags & BTREE_TRIGGER_gc); if (ret) return ret; } } - if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) - return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags); - return 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 9406f82fc255..e317df3644a1 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1379,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) return r != NULL; } +static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + u64 sectors = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { + sectors = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + sectors += p.crc.compressed_size; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) + sectors += p.crc.compressed_size; + } + + return sectors; +} + +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; +} + int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, struct bch_io_opts *opts) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 1a6ddee48041..709dd83183be 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, unsigned, unsigned); bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, struct bch_io_opts *); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 650a1f77ca40..c7e4cdd3f6a5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -74,6 +74,9 @@ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ BCH_FSCK_ERR_accounting_key_junk_at_end) \ x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(rebalance_work_acct_fix, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) @@ -108,7 +111,10 @@ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ BCH_FSCK_ERR_accounting_replicas_not_marked, \ - BCH_FSCK_ERR_bkey_version_in_future) + BCH_FSCK_ERR_bkey_version_in_future) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) struct upgrade_downgrade_entry { u64 recovery_passes; From 5be63fc19fcaa4c236b307420483578a56986a37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Aug 2024 19:07:11 +1200 Subject: [PATCH 1493/1523] Linux 6.11-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2c1db7a6f793..7b60eb103c5d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From e24b0ef20a20cd26a770c66bf87d911838ea00d4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 23 Aug 2024 15:33:18 +0300 Subject: [PATCH 1494/1523] drm/i915: remove unnecessary display includes There are a number of leftover #include "display/..." directives that are completely unnecessary. Remove them to make it easier to spot the relevant ones. In one case, switch to a more specific include. Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240823123318.3189503-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 -- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 - drivers/gpu/drm/i915/i915_driver.c | 4 +--- drivers/gpu/drm/i915/i915_gem.c | 2 -- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 - drivers/gpu/drm/i915/i915_irq.c | 1 - drivers/gpu/drm/i915/i915_pci.c | 1 - 7 files changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d54162ce0f99..8593337ddf82 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -12,8 +12,6 @@ #include #include -#include "display/intel_frontbuffer.h" - #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 206a5e0fedf1..d60a6ca0cae5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -12,7 +12,6 @@ #include #include -#include "display/intel_display.h" #include "gem/i915_gem_lmem.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index e32766286369..ccdd2983cfb5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -49,7 +49,7 @@ #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_display_driver.h" -#include "display/intel_display_types.h" +#include "display/intel_display.h" #include "display/intel_dmc.h" #include "display/intel_dp.h" #include "display/intel_dpt.h" @@ -58,10 +58,8 @@ #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pch_refclk.h" -#include "display/intel_pipe_crc.h" #include "display/intel_pps.h" #include "display/intel_sprite.h" -#include "display/intel_vga.h" #include "display/skl_watermark.h" #include "gem/i915_gem_context.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1391c01d7663..070ab6546987 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,8 +39,6 @@ #include #include -#include "display/intel_display.h" - #include "gem/i915_gem_clflush.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7bd1861ddbdf..a9662cc6ed1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -15,7 +15,6 @@ #include #include -#include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8059ac7e15fe..2321de48d169 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -34,7 +34,6 @@ #include #include "display/intel_display_irq.h" -#include "display/intel_display_types.h" #include "display/intel_hotplug.h" #include "display/intel_hotplug_irq.h" #include "display/intel_lpe_audio.h" diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 94c89ca2ef30..d37bb3a704d0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -26,7 +26,6 @@ #include #include -#include "display/intel_display.h" #include "display/intel_display_driver.h" #include "gt/intel_gt_regs.h" #include "gt/intel_sa_media.h" From 65112db0c21022cb1caed5a03c6392eaaf984c14 Mon Sep 17 00:00:00 2001 From: Apoorva Singh Date: Fri, 16 Aug 2024 13:33:55 +0530 Subject: [PATCH 1495/1523] drm/xe: Remove NULL check of lrc->bo in xe_lrc_snapshot_capture() - lrc->bo NULL check is not needed in xe_lrc_snapshot_capture() as its already been taken care of in xe_lrc_init(). Signed-off-by: Apoorva Singh Acked-by: Rodrigo Vivi Reviewed-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240816080355.897256-1-apoorva.singh@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 974a9cd8c379..aec7db39c061 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1649,7 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo && lrc->bo->vm) + if (lrc->bo->vm) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); From 4836c6cc01a16f1ac2b436550299474ad7183c46 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 22 Aug 2024 17:39:46 +0300 Subject: [PATCH 1496/1523] drm/i915: remove unused leftover basedie step code The basedie step initialization as well as its last users were removed in commit 326e30e4624c ("drm/i915: Drop dead code for pvc"). Follow through with removing the unused macros and struct member and debug logging. Suggested-by: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240822143946.2526425-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 5 ----- drivers/gpu/drm/i915/intel_device_info.c | 1 - drivers/gpu/drm/i915/intel_step.h | 1 - 3 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3b1b16e71cf9..eb4c33e83c7c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -410,7 +410,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) -#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step) #define IS_GRAPHICS_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \ @@ -420,10 +419,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) -#define IS_BASEDIE_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \ - INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < (until)) - static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, enum intel_platform p) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d92b3050ac78..b485e959f064 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -108,7 +108,6 @@ void intel_device_info_print(const struct intel_device_info *info, drm_printf(p, "graphics stepping: %s\n", intel_step_name(runtime->step.graphics_step)); drm_printf(p, "media stepping: %s\n", intel_step_name(runtime->step.media_step)); - drm_printf(p, "base die stepping: %s\n", intel_step_name(runtime->step.basedie_step)); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "memory-regions: 0x%x\n", info->memory_regions); diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 83bd1190edf5..22f1d6905160 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -17,7 +17,6 @@ struct intel_step_info { */ u8 graphics_step; /* Represents the compute tile on Xe_HPC */ u8 media_step; - u8 basedie_step; }; #define STEP_ENUM_VAL(name) STEP_##name, From 594cf78dc36f31c0c7e0de4567e644f406d46bae Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 19:29:18 +0300 Subject: [PATCH 1497/1523] drm/i915/dp_mst: Fix MST state after a sink reset In some cases the sink can reset itself after it was configured into MST mode, without the driver noticing the disconnected state. For instance the reset may happen in the middle of a modeset, or the (long) HPD pulse generated may be not long enough for the encoder detect handler to observe the HPD's deasserted state. In this case the sink's DPCD register programmed to enable MST will be reset, while the driver still assumes MST is still enabled. Detect this condition, which will tear down and recreate/re-enable the MST topology. v2: - Add a code comment about adjusting the expected DP_MSTM_CTRL register value for SST + SideBand. (Suraj, Jani) - Print a debug message about detecting the link reset. (Jani) - Verify the DPCD MST state only if it wasn't already determined that the sink is disconnected. Cc: stable@vger.kernel.org Cc: Jani Nikula Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11195 Reviewed-by: Suraj Kandpal (v1) Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240823162918.1211875-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 12 +++++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 40 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.h | 1 + 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 977f149551f6..bceccaa40f32 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5999,6 +5999,18 @@ intel_dp_detect(struct drm_connector *connector, else status = connector_status_disconnected; + if (status != connector_status_disconnected && + !intel_dp_mst_verify_dpcd_state(intel_dp)) + /* + * This requires retrying detection for instance to re-enable + * the MST mode that got reset via a long HPD pulse. The retry + * will happen either via the hotplug handler's retry logic, + * ensured by setting the connector here to SST/disconnected, + * or via a userspace connector probing in response to the + * hotplug uevent sent when removing the MST connectors. + */ + status = connector_status_disconnected; + if (status == connector_status_disconnected) { memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance)); memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 45d2230d1801..15541932b809 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -2062,3 +2062,43 @@ void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp) intel_mst_set_probed_link_params(intel_dp, link_rate, lane_count); } + +/* + * intel_dp_mst_verify_dpcd_state - verify the MST SW enabled state wrt. the DPCD + * @intel_dp: DP port object + * + * Verify if @intel_dp's MST enabled SW state matches the corresponding DPCD + * state. A long HPD pulse - not long enough to be detected as a disconnected + * state - could've reset the DPCD state, which requires tearing + * down/recreating the MST topology. + * + * Returns %true if the SW MST enabled and DPCD states match, %false + * otherwise. + */ +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &dig_port->base; + int ret; + u8 val; + + if (!intel_dp->is_mst) + return true; + + ret = drm_dp_dpcd_readb(intel_dp->mst_mgr.aux, DP_MSTM_CTRL, &val); + + /* Adjust the expected register value for SST + SideBand. */ + if (ret < 0 || val != (DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC)) { + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s][ENCODER:%d:%s] MST mode got reset, removing topology (ret=%d, ctrl=0x%02x)\n", + connector->base.base.id, connector->base.name, + encoder->base.base.id, encoder->base.name, + ret, val); + + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index fba76454fa67..8343804ce3f8 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -28,5 +28,6 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp); +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp); #endif /* __INTEL_DP_MST_H__ */ From 67733d7a71503fd3e32eeada371f8aa2516c5c95 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 1 Aug 2024 20:10:51 -0700 Subject: [PATCH 1498/1523] drm/i915: ARL requires a newer GSC firmware ARL and MTL share a single GSC firmware blob. However, ARL requires a newer version of it. So add differentiate of the PCI ids for ARL from MTL and create ARL as a sub-platform of MTL. That way, all the existing workarounds and such still treat ARL as MTL exactly as before. However, now the GSC code can check for ARL and do an extra version check on the firmware before committing to it. Also, the version extraction code has various ways of failing but the return code was being ignore and so the firmware load would attempt to continue anyway. Fix that by propagating the return code to the next level out. Signed-off-by: John Harrison Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Reviewed-by: Daniele Ceraolo Spurio Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 31 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++++++-- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_device_info.c | 7 +++++ drivers/gpu/drm/i915/intel_device_info.h | 3 +++ include/drm/intel/i915_pciids.h | 13 ++++++---- 6 files changed, 59 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 3b69bc6616bd..551b0d7974ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + if (IS_ARROWLAKE(gt->i915)) { + bool too_old = false; + + /* + * ARL requires a newer firmware than MTL did (102.0.10.1878) but the + * firmware is actually common. So, need to do an explicit version check + * here rather than using a separate table entry. And if the older + * MTL-only version is found, then just don't use GSC rather than aborting + * the driver load. + */ + if (gsc->release.major < 102) { + too_old = true; + } else if (gsc->release.major == 102) { + if (gsc->release.minor == 0) { + if (gsc->release.patch < 10) { + too_old = true; + } else if (gsc->release.patch == 10) { + if (gsc->release.build < 1878) + too_old = true; + } + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d80278eb45d7..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb4c33e83c7c..d772cbe15fec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -536,6 +536,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_LUNARLAKE(i915) (0 && i915) #define IS_BATTLEMAGE(i915) (0 && i915) +#define IS_ARROWLAKE(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b485e959f064..3c47c625993e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -200,6 +200,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_arl_ids[] = { + INTEL_ARL_IDS(ID), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -257,6 +261,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_arl_ids, + ARRAY_SIZE(subplatform_arl_ids))) { + mask = BIT(INTEL_SUBPLATFORM_ARL); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index fb8a08623eb0..643ff1bf74ee 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -127,6 +127,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 +/* MTL */ +#define INTEL_SUBPLATFORM_ARL 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index b21374f76df2..2bf03ebfcf73 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -772,15 +772,18 @@ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) /* MTL */ -#define INTEL_MTL_IDS(MACRO__, ...) \ - MACRO__(0x7D40, ## __VA_ARGS__), \ +#define INTEL_ARL_IDS(MACRO__, ...) \ MACRO__(0x7D41, ## __VA_ARGS__), \ - MACRO__(0x7D45, ## __VA_ARGS__), \ MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + +#define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x7D40, ## __VA_ARGS__), \ + MACRO__(0x7D45, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) /* LNL */ From 9c57bc08652a7c8e0e355c52c023fddc090fe9bb Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Aug 2024 15:46:15 -0700 Subject: [PATCH 1499/1523] drm/xe/lnl: Drop force_probe requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lunar Lake has been usable for a while in a desktop setup. Bugs are sporadically showing up in CI, but being promptly fixed. Nothing very concerning. All the uapi changes related to fundamental platform usage have been finalized. Remove the force_probe requirement and enable the platform by default. Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Jani Nikula Reviewed-by: Thomas Hellström Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240822224615.793540-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0f66cf067e2a..703822d39800 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -338,7 +338,6 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .has_display = true, - .require_force_probe = true, }; static const struct xe_device_desc bmg_desc = { From 11b7309dbe9fa98d9b8d18cd51db4f385c37ae30 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:28 +0530 Subject: [PATCH 1500/1523] drm/xe: Remove extra dma_fence_put on xe_sync_entry_add_deps failure drm_sched_job_add_dependency() drops references even in case of error, no need for caller to call dma_fence_put. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Ashutosh Dixit Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-1-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ca826aeb41ea..a0675f57a398 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -206,16 +206,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { - int err; - - if (sync->fence) { - err = drm_sched_job_add_dependency(&job->drm, - dma_fence_get(sync->fence)); - if (err) { - dma_fence_put(sync->fence); - return err; - } - } + if (sync->fence) + return drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); return 0; } From 19f01d4bbe9daf71901b200ab5c52591946b022a Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:29 +0530 Subject: [PATCH 1501/1523] drm/xe: Remove unrequired NULL checks in xe_sync_entry_cleanup dma_fence_put() and dma_fence_chain_free() can handle NULL input, there is no need for NULL check by caller. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-2-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index a0675f57a398..436faff09bac 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -249,10 +249,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) { if (sync->syncobj) drm_syncobj_put(sync->syncobj); - if (sync->fence) - dma_fence_put(sync->fence); - if (sync->chain_fence) - dma_fence_chain_free(sync->chain_fence); + dma_fence_put(sync->fence); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } From 8a04e342684a75ad1455fc3f23fab8e67c1aa9fc Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:30 +0530 Subject: [PATCH 1502/1523] drm/xe: Remove unrequired NULL check in xe_sched_job_free_fences dma_fence_chain_free() can handle NULL input, there is no need for NULL check by caller. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-3-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sched_job.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 9628f9deb3c0..55d47450b2c6 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job) if (ptrs->lrc_fence) xe_lrc_free_seqno_fence(ptrs->lrc_fence); - if (ptrs->chain_fence) - dma_fence_chain_free(ptrs->chain_fence); + dma_fence_chain_free(ptrs->chain_fence); } } From 014125c64d09e58e90dde49fbb57d802a13e2559 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 27 Aug 2024 14:09:05 +0200 Subject: [PATCH 1503/1523] drm/xe: Support 'nomodeset' kernel command-line option Setting 'nomodeset' on the kernel command line disables all graphics drivers with modesetting capabilities, leaving only firmware drivers, such as simpledrm or efifb. Most DRM drivers automatically support 'nomodeset' via DRM's module helper macros. In xe, which uses regular module_init(), manually call drm_firmware_drivers_only() to test for 'nomodeset'. Do not register the driver if set. v2: - use xe's init table (Lucas) - do NULL test for init/exit functions Signed-off-by: Thomas Zimmermann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240827121003.97429-1-tzimmermann@suse.de Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_module.c | 39 +++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e7e4b1ebab50..cbc444ec1854 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -8,6 +8,8 @@ #include #include +#include + #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" @@ -61,12 +63,23 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); +static int xe_check_nomodeset(void) +{ + if (drm_firmware_drivers_only()) + return -ENODEV; + + return 0; +} + struct init_funcs { int (*init)(void); void (*exit)(void); }; static const struct init_funcs init_funcs[] = { + { + .init = xe_check_nomodeset, + }, { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, @@ -85,15 +98,35 @@ static const struct init_funcs init_funcs[] = { }, }; +static int __init xe_call_init_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return 0; + if (!init_funcs[i].init) + return 0; + + return init_funcs[i].init(); +} + +static void xe_call_exit_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return; + if (!init_funcs[i].exit) + return; + + init_funcs[i].exit(); +} + static int __init xe_init(void) { int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); + err = xe_call_init_func(i); if (err) { while (i--) - init_funcs[i].exit(); + xe_call_exit_func(i); return err; } } @@ -106,7 +139,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - init_funcs[i].exit(); + xe_call_exit_func(i); } module_init(xe_init); From 97c6efb3649724e6163774f0e7405c5ca8b1097a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 26 Aug 2024 19:01:15 +0200 Subject: [PATCH 1504/1523] drm/i915/display: Plane capability for 64k phys alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some plane formats have been designed to require 64k physical alignment. By returning whether this is the case for certain formats, we do not need to hardcode this check inside Xe. Signed-off-by: Maarten Lankhorst Reviewed-by: Zbigniew Kempczyński Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240826170117.327709-2-maarten.lankhorst@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fb.c | 20 +++++++++++++++++++- drivers/gpu/drm/i915/display/intel_fb.h | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index d2716915d046..5be7bb43e2e0 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -169,7 +169,7 @@ static const struct intel_modifier_desc intel_modifiers[] = { }, { .modifier = I915_FORMAT_MOD_4_TILED_BMG_CCS, .display_ver = { 14, -1 }, - .plane_caps = INTEL_PLANE_CAP_TILING_4, + .plane_caps = INTEL_PLANE_CAP_TILING_4 | INTEL_PLANE_CAP_NEED64K_PHYS, }, { .modifier = I915_FORMAT_MOD_4_TILED_MTL_MC_CCS, .display_ver = { 14, 14 }, @@ -420,6 +420,24 @@ bool intel_fb_is_mc_ccs_modifier(u64 modifier) INTEL_PLANE_CAP_CCS_MC); } +/** + * intel_fb_needs_64k_phys: Check if modifier requires 64k physical placement. + * @modifier: Modifier to check + * + * Returns: + * Returns %true if @modifier requires 64k aligned physical pages. + */ +bool intel_fb_needs_64k_phys(u64 modifier) +{ + const struct intel_modifier_desc *md = lookup_modifier_or_null(modifier); + + if (!md) + return false; + + return plane_caps_contain_any(md->plane_caps, + INTEL_PLANE_CAP_NEED64K_PHYS); +} + static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md, u8 display_ver_from, u8 display_ver_until) { diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 6dee0c8b7f22..10de437e8ef8 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -28,11 +28,13 @@ struct intel_plane_state; #define INTEL_PLANE_CAP_TILING_Y BIT(4) #define INTEL_PLANE_CAP_TILING_Yf BIT(5) #define INTEL_PLANE_CAP_TILING_4 BIT(6) +#define INTEL_PLANE_CAP_NEED64K_PHYS BIT(7) bool intel_fb_is_tiled_modifier(u64 modifier); bool intel_fb_is_ccs_modifier(u64 modifier); bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier); bool intel_fb_is_mc_ccs_modifier(u64 modifier); +bool intel_fb_needs_64k_phys(u64 modifier); bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane); int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb); From c66f4711f79c937832b6c0e0b1805061667d99ea Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 26 Aug 2024 19:01:16 +0200 Subject: [PATCH 1505/1523] drm/xe: Align all VRAM scanout buffers to 64k physical pages when needed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For CCS formats on affected platforms, CCS can be used freely, but display engine requires a multiple of 64k physical pages. No other changes are needed. At the BO creation time we don't know if the BO will be used for CCS or not. If the scanout flag is set, and the BO is a multiple of 64k, we take the safe route and force the physical alignment of 64k pages. If the BO is not a multiple of 64k, or the scanout flag was not set at BO creation, we reject it for usage as CCS in display. The physical pages are likely not aligned correctly, and this will cause corruption when used as FB. The scanout flag and size being a multiple of 64k are used together to enforce 64k physical placement. VM_BIND is completely unaffected, mappings to a VM can still be aligned to 4k, just like for normal buffers. Signed-off-by: Zbigniew Kempczyński Signed-off-by: Maarten Lankhorst Cc: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Juha-Pekka Heikkilä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240826170117.327709-3-maarten.lankhorst@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/intel_fb_bo.c | 9 +++++++++ drivers/gpu/drm/xe/xe_bo.c | 7 +++++++ drivers/gpu/drm/xe/xe_vm.c | 11 ++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index f835492f73fb..63ce97cc4cfe 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -7,6 +7,7 @@ #include #include "intel_display_types.h" +#include "intel_fb.h" #include "intel_fb_bo.h" #include "xe_bo.h" @@ -28,6 +29,14 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, struct xe_device *xe = to_xe_device(bo->ttm.base.dev); int ret; + /* + * Some modifiers require physical alignment of 64KiB VRAM pages; + * require that the BO in those cases is created correctly. + */ + if (XE_IOCTL_DBG(xe, intel_fb_needs_64k_phys(mode_cmd->modifier[0]) && + !(bo->flags & XE_BO_FLAG_NEEDS_64K))) + return -EINVAL; + xe_bo_get(bo); ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 45652d7e6fa6..4b6834653810 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1990,6 +1990,13 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); + /* CCS formats need physical placement at a 64K alignment in VRAM. */ + if ((bo_flags & XE_BO_FLAG_VRAM_MASK) && + (bo_flags & XE_BO_FLAG_SCANOUT) && + !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) && + IS_ALIGNED(args->size, SZ_64K)) + bo_flags |= XE_BO_FLAG_NEEDS_64K; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK))) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dab2a3b2e17f..1ff94a9ae7c9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2905,7 +2905,16 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, return -EINVAL; } - if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + /* + * Some platforms require 64k VM_BIND alignment, + * specifically those with XE_VRAM_FLAGS_NEED64K. + * + * Other platforms may have BO's set to 64k physical placement, + * but can be mapped at 4k offsets anyway. This check is only + * there for the former case. + */ + if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && + (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || From 7546a8201ba288530ed1bbf2a8bdcce5e034a234 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:34 +0200 Subject: [PATCH 1506/1523] Revert "drm/xe/lnl: Offload system clear page activity to GPU" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization relied on having to clear CCS on allocations. If there is no need to clear CCS on allocations then this would mostly help in reducing CPU utilization. Revert this patch at this moment because of: 1 Currently Xe can't do clear on free and using a invalid ttm flag, TTM_TT_FLAG_CLEARED_ON_FREE which could poison global ttm pool on multi-device setup. 2 Also for LNL CPU:WB doesn't require clearing CCS as such BO will not be allowed to bind with compression PTE. Subsequent patch will disable clearing CCS for CPU:WB BOs for LNL. This reverts commit 23683061805be368c8d1c7e7ff52abc470cac275. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Thomas Hellström Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 26 ++------------------------ drivers/gpu/drm/xe/xe_device_types.h | 2 -- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 12 ------------ 3 files changed, 2 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index cbe7bf098970..e3d68710a91a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -396,14 +396,6 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, caching = ttm_uncached; } - /* - * If the device can support gpu clear system pages then set proper ttm - * flag. Zeroed pages are only required for ttm_bo_type_device so - * unwanted data is not leaked to userspace. - */ - if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) - page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE; - err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); @@ -425,10 +417,6 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; - /* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */ - if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE) - tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC; - err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); if (err) return err; @@ -671,16 +659,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool needs_clear; bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; - bool clear_system_pages; int ret = 0; - /* - * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when - * moving to system as the bo doesn't have dma_mapping. - */ - if (!old_mem && ttm && !ttm_tt_is_populated(ttm)) - ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE; - /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) @@ -703,10 +683,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data); - clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || - (!ttm && ttm_bo->type == ttm_bo_type_device) || - clear_system_pages; + (!ttm && ttm_bo->type == ttm_bo_type_device); if (new_mem->mem_type == XE_PL_TT) { ret = xe_tt_map_sg(ttm); @@ -818,7 +796,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) { u32 flags = 0; - if (mem_type_is_vram(new_mem->mem_type) || clear_system_pages) + if (mem_type_is_vram(new_mem->mem_type)) flags |= XE_MIGRATE_CLEAR_FLAG_FULL; else if (handle_system_ccs) flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 121a403a1478..856db4020048 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -339,8 +339,6 @@ struct xe_device { struct xe_mem_region vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; - /** @mem.gpu_page_clear_sys: clear system pages offloaded to GPU */ - bool gpu_page_clear_sys; } mem; /** @sriov: device level virtualization data */ diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index e0ac20f20758..9844a8edbfe1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -117,17 +117,5 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); - - /* - * On iGFX device with flat CCS, we clear CCS metadata, let's extend that - * and use GPU to clear pages as well. - * - * Disable this when init_on_free and/or init_on_alloc is on to avoid double - * zeroing pages with CPU and GPU. - */ - if (xe_device_has_flat_ccs(xe) && !IS_DGFX(xe) && - !want_init_on_alloc(GFP_USER) && !want_init_on_free()) - xe->mem.gpu_page_clear_sys = true; - return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); } From 789e51597d33ec0053b029127d797d86c0d857eb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:35 +0200 Subject: [PATCH 1507/1523] Revert "drm/ttm: Add a flag to allow drivers to skip clear-on-free" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove TTM_TT_FLAG_CLEARED_ON_FREE now that XE stopped using this flag. This reverts commit decbfaf06db05fa1f9b33149ebb3c145b44e878f. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Nirmoy Das Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++----------- include/drm/ttm/ttm_tt.h | 6 +----- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 935ab3cfd046..8504dbe19c1a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,18 +222,15 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, - bool cleared) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) { unsigned int i, num_pages = 1 << pt->order; - if (!cleared) { - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); - } + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); } spin_lock(&pt->lock); @@ -397,7 +394,6 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -411,7 +407,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages, cleared); + ttm_pool_type_give(pt, *pages); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index cfaf49de2419..2b9d856ff388 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,9 +85,6 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * - * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles - * clearing backing store - * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -97,9 +94,8 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) -#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; From 379cad69bdfe522e840ed5f5c01ac8769006d53e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 26 Aug 2024 16:34:50 +0200 Subject: [PATCH 1508/1523] drm/xe: Use separate rpm lockdep map for non-d3cold-capable devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non-d3cold-capable devices we'd like to be able to wake up the device from reclaim. In particular, for Lunar Lake we'd like to be able to blit CCS metadata to system at shrink time; at least from kswapd where it's reasonable OK to wait for rpm resume and a preceding rpm suspend. Therefore use a separate lockdep map for such devices and prime it reclaim-tainted. v2: - Rename lockmap acquire- and release functions. (Rodrigo Vivi). - Reinstate the old xe_pm_runtime_lockdep_prime() function and rename it to xe_rpm_might_enter_cb(). (Matthew Auld). - Introduce a separate xe_pm_runtime_lockdep_prime function called from module init for known required locking orders. v3: - Actually hook up the prime function at module init. v4: - Rebase. v5: - Don't use reclaim-safe RPM with sriov. Cc: "Vivi, Rodrigo" Cc: "Auld, Matthew" Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240826143450.92511-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_module.c | 9 ++++ drivers/gpu/drm/xe/xe_pm.c | 84 ++++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index cbc444ec1854..bfc3deebdaa2 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -13,6 +13,7 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" @@ -76,6 +77,10 @@ struct init_funcs { void (*exit)(void); }; +static void xe_dummy_exit(void) +{ +} + static const struct init_funcs init_funcs[] = { { .init = xe_check_nomodeset, @@ -96,6 +101,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_observation_sysctl_register, .exit = xe_observation_sysctl_unregister, }, + { + .init = xe_pm_module_init, + .exit = xe_dummy_exit, + }, }; static int __init xe_call_init_func(unsigned int i) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c247e1cb8aba..2600c936527e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -70,11 +70,34 @@ */ #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_pm_runtime_lockdep_map = { - .name = "xe_pm_runtime_lockdep_map" +static struct lockdep_map xe_pm_runtime_d3cold_map = { + .name = "xe_rpm_d3cold_map" +}; + +static struct lockdep_map xe_pm_runtime_nod3cold_map = { + .name = "xe_rpm_nod3cold_map" }; #endif +static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) +{ + return !xe->d3cold.capable && !xe->info.has_sriov; +} + +static void xe_rpm_lockmap_acquire(const struct xe_device *xe) +{ + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +static void xe_rpm_lockmap_release(const struct xe_device *xe) +{ + lock_map_release(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -354,7 +377,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -387,7 +410,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) out: if (err) xe_display_pm_resume(xe, true); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -408,7 +431,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); @@ -440,7 +463,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) } out: - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -454,15 +477,37 @@ out: * stuff that can happen inside the runtime_resume callback by acquiring * a dummy lock (it doesn't protect anything and gets compiled out on * non-debug builds). Lockdep then only needs to see the - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. * For example if the (callers_locks) are ever grabbed in the * runtime_resume callback, lockdep should give us a nice splat. */ -static void pm_runtime_lockdep_prime(void) +static void xe_rpm_might_enter_cb(const struct xe_device *xe) { - lock_map_acquire(&xe_pm_runtime_lockdep_map); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); + xe_rpm_lockmap_release(xe); +} + +/* + * Prime the lockdep maps for known locking orders that need to + * be supported but that may not always occur on all systems. + */ +static void xe_pm_runtime_lockdep_prime(void) +{ + struct dma_resv lockdep_resv; + + dma_resv_init(&lockdep_resv); + lock_map_acquire(&xe_pm_runtime_d3cold_map); + /* D3Cold takes the dma_resv locks to evict bos */ + dma_resv_lock(&lockdep_resv, NULL); + dma_resv_unlock(&lockdep_resv); + lock_map_release(&xe_pm_runtime_d3cold_map); + + /* Shrinkers might like to wake up the device under reclaim. */ + fs_reclaim_acquire(GFP_KERNEL); + lock_map_acquire(&xe_pm_runtime_nod3cold_map); + lock_map_release(&xe_pm_runtime_nod3cold_map); + fs_reclaim_release(GFP_KERNEL); } /** @@ -477,7 +522,7 @@ void xe_pm_runtime_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); pm_runtime_resume(xe->drm.dev); } @@ -509,7 +554,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe) if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_get_sync(xe->drm.dev); } @@ -577,7 +622,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } @@ -669,3 +714,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) drm_dbg(&xe->drm, "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } + +/** + * xe_pm_module_init() - Perform xe_pm specific module initialization. + * + * Return: 0 on success. Currently doesn't fail. + */ +int __init xe_pm_module_init(void) +{ + xe_pm_runtime_lockdep_prime(); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 104a21ae6dfd..9aef673b1c8a 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_module_init(void); #endif From c72084163cd22ebf59d936669ec25b1fc2b7494c Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 28 Aug 2024 14:52:29 +0530 Subject: [PATCH 1509/1523] drm/xe: Fix NPD in ggtt_node_remove() Make sure that ggtt_node_remove() is invoked only if both node and ggtt are not null. Move the null checks to the caller function xe_ggtt_node_remove(). v2: Move null check below declarations (Tejas) Fixes: 919bb54e989c ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Tejas Upadhyay Reviewed-by: Tejas Upadhyay Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240828092229.3606503-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 86fc6afa43bd..f3fca5565d32 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -286,9 +286,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) bool bound; int idx; - if (!node || !node->ggtt) - return; - bound = drm_dev_enter(&xe->drm, &idx); mutex_lock(&ggtt->lock); @@ -328,8 +325,14 @@ static void ggtt_node_remove_work_func(struct work_struct *work) */ void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) { - struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); + struct xe_ggtt *ggtt; + struct xe_device *xe; + + if (!node || !node->ggtt) + return; + + ggtt = node->ggtt; + xe = tile_to_xe(ggtt->tile); node->invalidate_on_remove = invalidate; From 3adcf970dc7ec0469ec3116a5a8be9161d17a335 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Wed, 28 Aug 2024 13:51:52 +0530 Subject: [PATCH 1510/1523] drm/xe/bmg: Drop force_probe requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Battlemage platform is sufficiently tested and found stable. CI is also pretty stable. Remove the force_probe requirement to enable the platform support by default. Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Jani Nikula Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240828082152.3194814-1-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 703822d39800..7eb00a87b786 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -344,7 +344,6 @@ static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, - .require_force_probe = true, .has_heci_cscfi = 1, }; From 33eca84db6e31091cef63584158ab64704f78462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 10 Jul 2024 15:41:37 +0300 Subject: [PATCH 1511/1523] drm/i915: Fix readout degamma_lut mismatch on ilk/snb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ilk/snb the pipe may be configured to place the LUT before or after the CSC depending on various factors, but as there is only one LUT (no split mode like on IVB+) we only advertise a gamma_lut and no degamma_lut in the uapi to avoid confusing userspace. This can cause a problem during readout if the VBIOS/GOP enabled the LUT in the pre CSC configuration. The current code blindly assigns the results of the readout to the degamma_lut, which will cause a failure during the next atomic_check() as we aren't expecting anything to be in degamma_lut since it's not visible to userspace. Fix the problem by assigning whatever LUT we read out from the hardware into gamma_lut. Cc: stable@vger.kernel.org Fixes: d2559299d339 ("drm/i915: Make ilk_read_luts() capable of degamma readout") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11608 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240710124137.16773-1-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- .../drm/i915/display/intel_modeset_setup.c | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 6f85f5352455..72694dde3c22 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -326,6 +326,8 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + if (intel_crtc_is_joiner_secondary(crtc_state)) return; @@ -337,11 +339,30 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter; - /* assume 1:1 mapping */ - drm_property_replace_blob(&crtc_state->hw.degamma_lut, - crtc_state->pre_csc_lut); - drm_property_replace_blob(&crtc_state->hw.gamma_lut, - crtc_state->post_csc_lut); + if (DISPLAY_INFO(i915)->color.degamma_lut_size) { + /* assume 1:1 mapping */ + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + crtc_state->pre_csc_lut); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut); + } else { + /* + * ilk/snb hw may be configured for either pre_csc_lut + * or post_csc_lut, but we don't advertise degamma_lut as + * being available in the uapi since there is only one + * hardware LUT. Always assign the result of the readout + * to gamma_lut as that is the only valid source of LUTs + * in the uapi. + */ + drm_WARN_ON(&i915->drm, crtc_state->post_csc_lut && + crtc_state->pre_csc_lut); + + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + NULL); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut ?: + crtc_state->pre_csc_lut); + } drm_property_replace_blob(&crtc_state->uapi.degamma_lut, crtc_state->hw.degamma_lut); From 81a1c37c8b52eff636e77a794d0f0620c3a40af0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Jun 2024 16:58:52 +0300 Subject: [PATCH 1512/1523] drm/i915/dsb: Hook up DSB error interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable all DSB error/fault interrupts so that we can see if anything goes terribly wrong. v2: Pass intel_display to DISPLAY_VER() (Jani) Drop extra '/' from drm_err() for consistency v3: Reorder the irq handler a bit Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240625135852.13431-1-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- .../gpu/drm/i915/display/intel_display_irq.c | 17 ++++++ drivers/gpu/drm/i915/display/intel_dsb.c | 56 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 6 ++ drivers/gpu/drm/i915/i915_reg.h | 4 ++ 4 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index afcd2af82942..d85c33eabc47 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -14,6 +14,7 @@ #include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_dp_aux.h" +#include "intel_dsb.h" #include "intel_fdi_regs.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" @@ -1164,6 +1165,17 @@ void gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (iir & gen8_de_pipe_flip_done_mask(dev_priv)) flip_done_handler(dev_priv, pipe); + if (HAS_DSB(dev_priv)) { + if (iir & GEN12_DSB_INT(INTEL_DSB_0)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_0); + + if (iir & GEN12_DSB_INT(INTEL_DSB_1)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_1); + + if (iir & GEN12_DSB_INT(INTEL_DSB_2)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_2); + } + if (iir & GEN8_PIPE_CDCLK_CRC_DONE) hsw_pipe_crc_irq_handler(dev_priv, pipe); @@ -1736,6 +1748,11 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) de_port_masked |= DSI0_TE | DSI1_TE; } + if (HAS_DSB(dev_priv)) + de_pipe_masked |= GEN12_DSB_INT(INTEL_DSB_0) | + GEN12_DSB_INT(INTEL_DSB_1) | + GEN12_DSB_INT(INTEL_DSB_2); + de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | gen8_de_pipe_underrun_mask(dev_priv) | diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 2ab3765f6c06..3453989728aa 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -339,6 +339,40 @@ static u32 dsb_chicken(struct intel_crtc *crtc) return DSB_SKIP_WAITS_EN; } +static u32 dsb_error_int_status(struct intel_display *display) +{ + u32 errors; + + errors = DSB_GTT_FAULT_INT_STATUS | + DSB_RSPTIMEOUT_INT_STATUS | + DSB_POLL_ERR_INT_STATUS; + + /* + * All the non-existing status bits operate as + * normal r/w bits, so any attempt to clear them + * will just end up setting them. Never do that so + * we won't mistake them for actual error interrupts. + */ + if (DISPLAY_VER(display) >= 14) + errors |= DSB_ATS_FAULT_INT_STATUS; + + return errors; +} + +static u32 dsb_error_int_en(struct intel_display *display) +{ + u32 errors; + + errors = DSB_GTT_FAULT_INT_EN | + DSB_RSPTIMEOUT_INT_EN | + DSB_POLL_ERR_INT_EN; + + if (DISPLAY_VER(display) >= 14) + errors |= DSB_ATS_FAULT_INT_EN; + + return errors; +} + static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, int dewake_scanline) { @@ -363,6 +397,10 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id), dsb_chicken(crtc)); + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS | + dsb_error_int_en(display)); + intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); @@ -430,6 +468,9 @@ void intel_dsb_wait(struct intel_dsb *dsb) dsb->free_pos = 0; dsb->ins_start_offset = 0; intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0); + + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS); } /** @@ -513,3 +554,18 @@ void intel_dsb_cleanup(struct intel_dsb *dsb) intel_dsb_buffer_cleanup(&dsb->dsb_buf); kfree(dsb); } + +void intel_dsb_irq_handler(struct intel_display *display, + enum pipe pipe, enum intel_dsb_id dsb_id) +{ + struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(display->drm), pipe); + u32 tmp, errors; + + tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id)); + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp); + + errors = tmp & dsb_error_int_status(display); + if (errors) + drm_err(display->drm, "[CRTC:%d:%s] DSB %d error interrupt: 0x%x\n", + crtc->base.base.id, crtc->base.name, dsb_id, errors); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index bb42749f2ea4..84fc2f8434d1 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -13,8 +13,11 @@ struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dsb; +enum pipe; + enum intel_dsb_id { INTEL_DSB_0, INTEL_DSB_1, @@ -41,4 +44,7 @@ void intel_dsb_commit(struct intel_dsb *dsb, bool wait_for_vblank); void intel_dsb_wait(struct intel_dsb *dsb); +void intel_dsb_irq_handler(struct intel_display *display, + enum pipe pipe, enum intel_dsb_id dsb_id); + #endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 569b461022c5..41f4350a7c6c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2516,6 +2516,10 @@ #define GEN11_PIPE_PLANE7_FLIP_DONE REG_BIT(18) /* icl/tgl */ #define GEN11_PIPE_PLANE6_FLIP_DONE REG_BIT(17) /* icl/tgl */ #define GEN11_PIPE_PLANE5_FLIP_DONE REG_BIT(16) /* icl+ */ +#define GEN12_DSB_2_INT REG_BIT(15) /* tgl+ */ +#define GEN12_DSB_1_INT REG_BIT(14) /* tgl+ */ +#define GEN12_DSB_0_INT REG_BIT(13) /* tgl+ */ +#define GEN12_DSB_INT(dsb_id) REG_BIT(13 + (dsb_id)) #define GEN9_PIPE_CURSOR_FAULT REG_BIT(11) /* skl+ */ #define GEN9_PIPE_PLANE4_FAULT REG_BIT(10) /* skl+ */ #define GEN8_PIPE_CURSOR_FAULT REG_BIT(10) /* bdw */ From 21bb04152a18ac2314ef4186b6dcd46f1b847354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:22 +0300 Subject: [PATCH 1513/1523] drm/i915/dsb: Convert dewake_scanline to a hw scanline number earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we switch from out software idea of a scanline to the hw's idea of a scanline during the commit phase in _intel_dsb_commit(). While that is slightly easier due to fastsets fiddling with the timings, we'll also need to generate proper hw scanline numbers already when emitting DSB scanline wait instructions. So this approach won't do in the future. Switch to hw scanline numbers earlier. Also intel_dsb_dewake_scanline() itself already makes some assumptions about VRR that don't take into account VRR toggling during fastsets, so technically delaying the sw->hw conversion doesn't even help us. The other reason for delaying the conversion was that we are using intel_get_crtc_scanline() during intel_dsb_commit() which gives us the current sw scanline. But this is pretty low level stuff anyway so just using raw PIPEDSL reads seems fine here, and that of course gives us the hw scanline directly, reducing the need to do so many conversions. v2: Return the non-hw scanline from intel_dsb_dewake_scanline() Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 21 ++++++++++++--------- drivers/gpu/drm/i915/display/intel_vblank.c | 9 ++++----- drivers/gpu/drm/i915/display/intel_vblank.h | 3 ++- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 3453989728aa..54969660333a 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "i915_reg.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" @@ -42,7 +43,7 @@ struct intel_dsb { */ unsigned int ins_start_offset; - int dewake_scanline; + int hw_dewake_scanline; }; /** @@ -374,7 +375,7 @@ static u32 dsb_error_int_en(struct intel_display *display) } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, - int dewake_scanline) + int hw_dewake_scanline) { struct intel_crtc *crtc = dsb->crtc; struct intel_display *display = to_intel_display(crtc->base.dev); @@ -404,10 +405,8 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); - if (dewake_scanline >= 0) { - int diff, hw_dewake_scanline; - - hw_dewake_scanline = intel_crtc_scanline_to_hw(crtc, dewake_scanline); + if (hw_dewake_scanline >= 0) { + int diff, position; intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), DSB_ENABLE_DEWAKE | @@ -417,7 +416,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, * Force DEwake immediately if we're already past * or close to racing past the target scanline. */ - diff = dewake_scanline - intel_get_crtc_scanline(crtc); + position = intel_de_read_fw(display, PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; + + diff = hw_dewake_scanline - position; intel_de_write_fw(display, DSB_PMCTRL_2(pipe, dsb->id), (diff >= 0 && diff < 5 ? DSB_FORCE_DEWAKE : 0) | DSB_BLOCK_DEWAKE_EXTENSION); @@ -439,7 +440,7 @@ void intel_dsb_commit(struct intel_dsb *dsb, { _intel_dsb_commit(dsb, wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0, - wait_for_vblank ? dsb->dewake_scanline : -1); + wait_for_vblank ? dsb->hw_dewake_scanline : -1); } void intel_dsb_wait(struct intel_dsb *dsb) @@ -527,7 +528,9 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->size = size / 4; /* in dwords */ dsb->free_pos = 0; dsb->ins_start_offset = 0; - dsb->dewake_scanline = intel_dsb_dewake_scanline(crtc_state); + + dsb->hw_dewake_scanline = + intel_crtc_scanline_to_hw(crtc_state, intel_dsb_dewake_scanline(crtc_state)); return dsb; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 838b55ecb1d8..f337cde82198 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -284,13 +284,12 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + vtotal + crtc->scanline_offset) % vtotal; } -int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) +int intel_crtc_scanline_to_hw(const struct intel_crtc_state *crtc_state, + int scanline) { - const struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); - const struct drm_display_mode *mode = &vblank->hwmode; - int vtotal = intel_mode_vtotal(mode); + int vtotal = intel_mode_vtotal(&crtc_state->hw.adjusted_mode); - return (scanline + vtotal - crtc->scanline_offset) % vtotal; + return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal; } /* diff --git a/drivers/gpu/drm/i915/display/intel_vblank.h b/drivers/gpu/drm/i915/display/intel_vblank.h index 7e526f6861e4..45a4a961aaab 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.h +++ b/drivers/gpu/drm/i915/display/intel_vblank.h @@ -40,6 +40,7 @@ void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc); void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc); void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, bool vrr_enable); -int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline); +int intel_crtc_scanline_to_hw(const struct intel_crtc_state *crtc_state, + int scanline); #endif /* __INTEL_VBLANK_H__ */ From 70a65a5de23337c0c0251c482520224ac80bdeb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:23 +0300 Subject: [PATCH 1514/1523] drm/i915/dsb: Shuffle code around MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate intel_dsb_dewake_scanline() and dsb_chicken() upwards in the file. I need to reuse these while emitting DSB commands, and I'd like to keep the DSB command emission stuff more or less grouped together in the file. Also drop the intel_ prefix from intel_dsb_dewake_scanline() since it's all internal stuff and thus doesn't need so much namespacing. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-6-ville.syrjala@linux.intel.com Reviewed-by: Animesh manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 56 ++++++++++++------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 54969660333a..944fb0990045 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -83,6 +83,33 @@ struct intel_dsb { #define DSB_OPCODE_POLL 0xA /* see DSB_REG_VALUE_MASK */ +static int dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + unsigned int latency = skl_watermark_max_latency(i915, 0); + int vblank_start; + + if (crtc_state->vrr.enable) + vblank_start = intel_vrr_vmin_vblank_start(crtc_state); + else + vblank_start = intel_mode_vblank_start(adjusted_mode); + + return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); +} + +static u32 dsb_chicken(struct intel_crtc *crtc) +{ + if (crtc->mode_flags & I915_MODE_FLAG_VRR) + return DSB_SKIP_WAITS_EN | + DSB_CTRL_WAIT_SAFE_WINDOW | + DSB_CTRL_NO_WAIT_VBLANK | + DSB_INST_WAIT_SAFE_WINDOW | + DSB_INST_NO_WAIT_VBLANK; + else + return DSB_SKIP_WAITS_EN; +} + static bool assert_dsb_has_room(struct intel_dsb *dsb) { struct intel_crtc *crtc = dsb->crtc; @@ -313,33 +340,6 @@ void intel_dsb_finish(struct intel_dsb *dsb) intel_dsb_buffer_flush_map(&dsb->dsb_buf); } -static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - unsigned int latency = skl_watermark_max_latency(i915, 0); - int vblank_start; - - if (crtc_state->vrr.enable) - vblank_start = intel_vrr_vmin_vblank_start(crtc_state); - else - vblank_start = intel_mode_vblank_start(adjusted_mode); - - return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); -} - -static u32 dsb_chicken(struct intel_crtc *crtc) -{ - if (crtc->mode_flags & I915_MODE_FLAG_VRR) - return DSB_SKIP_WAITS_EN | - DSB_CTRL_WAIT_SAFE_WINDOW | - DSB_CTRL_NO_WAIT_VBLANK | - DSB_INST_WAIT_SAFE_WINDOW | - DSB_INST_NO_WAIT_VBLANK; - else - return DSB_SKIP_WAITS_EN; -} - static u32 dsb_error_int_status(struct intel_display *display) { u32 errors; @@ -530,7 +530,7 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->ins_start_offset = 0; dsb->hw_dewake_scanline = - intel_crtc_scanline_to_hw(crtc_state, intel_dsb_dewake_scanline(crtc_state)); + intel_crtc_scanline_to_hw(crtc_state, dsb_dewake_scanline(crtc_state)); return dsb; From eb4556f25fb4cb3a005a93dcc6dcc4b0c024f5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:24 +0300 Subject: [PATCH 1515/1523] drm/i915/dsb: Fix dewake scanline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we calculate the DEwake scanline based on the delayed vblank start, while in reality it should be computed based on the undelayed vblank start (as that is where the DSB actually starts). Currently it doesn't really matter as we don't have any vblank delay configured, but that may change in the future so let's be accurate in what we do. We can also remove the max() as intel_crtc_scanline_to_hw() can deal with negative numbers, which there really shouldn't be anyway. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-7-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 944fb0990045..6ca052558e3b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -86,16 +86,10 @@ struct intel_dsb { static int dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; unsigned int latency = skl_watermark_max_latency(i915, 0); - int vblank_start; - if (crtc_state->vrr.enable) - vblank_start = intel_vrr_vmin_vblank_start(crtc_state); - else - vblank_start = intel_mode_vblank_start(adjusted_mode); - - return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); + return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) - + intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency); } static u32 dsb_chicken(struct intel_crtc *crtc) From a69dcaf9310ab09ed97711e0fee08e59218ed8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:25 +0300 Subject: [PATCH 1516/1523] drm/i915/dsb: Account for VRR properly in DSB scanline stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When determining various scanlines for DSB use we should take into account whether VRR is active at the time when the DSB uses said scanline information. For now all DSB scanline usage occurs prior to the actual commit, so we only need to care about the state of VRR at that time. I've decided to move intel_crtc_scanline_to_hw() in its entirety to the DSB code as it will also need to know the actual state of VRR in order to do its job 100% correctly. TODO: figure out how much of this could be moved to some more generic place and perhaps be shared with the CPU vblank evasion code/etc... Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-8-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_display.c | 4 +- drivers/gpu/drm/i915/display/intel_display.h | 3 + drivers/gpu/drm/i915/display/intel_dsb.c | 65 ++++++++++++++++++-- drivers/gpu/drm/i915/display/intel_vblank.c | 10 +-- drivers/gpu/drm/i915/display/intel_vblank.h | 3 +- 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9049b9a1209d..9ad0cb3fab29 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1032,8 +1032,8 @@ static bool intel_crtc_vrr_enabling(struct intel_atomic_state *state, vrr_params_changed(old_crtc_state, new_crtc_state))); } -static bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, - struct intel_crtc *crtc) +bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index b0cf6ca70952..b21d9578d5db 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -532,6 +532,9 @@ void intel_plane_fixup_bitmasks(struct intel_crtc_state *crtc_state); void intel_update_watermarks(struct drm_i915_private *i915); +bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, + struct intel_crtc *crtc); + /* modesetting */ int intel_modeset_pipes_in_mask_early(struct intel_atomic_state *state, const char *reason, u8 pipe_mask); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 6ca052558e3b..2c7c21c69ad6 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -83,15 +83,72 @@ struct intel_dsb { #define DSB_OPCODE_POLL 0xA /* see DSB_REG_VALUE_MASK */ -static int dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) +static bool pre_commit_is_vrr_active(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* VRR will be enabled afterwards, if necessary */ + if (intel_crtc_needs_modeset(new_crtc_state)) + return false; + + /* VRR will have been disabled during intel_pre_plane_update() */ + return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc); +} + +static const struct intel_crtc_state * +pre_commit_crtc_state(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* + * During fastsets/etc. the transcoder is still + * running with the old timings at this point. + */ + if (intel_crtc_needs_modeset(new_crtc_state)) + return new_crtc_state; + else + return old_crtc_state; +} + +static int dsb_vtotal(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + + if (pre_commit_is_vrr_active(state, crtc)) + return crtc_state->vrr.vmax; + else + return intel_mode_vtotal(&crtc_state->hw.adjusted_mode); +} + +static int dsb_dewake_scanline(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + struct drm_i915_private *i915 = to_i915(state->base.dev); unsigned int latency = skl_watermark_max_latency(i915, 0); return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) - intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency); } +static int dsb_scanline_to_hw(struct intel_atomic_state *state, + struct intel_crtc *crtc, int scanline) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + int vtotal = dsb_vtotal(state, crtc); + + return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal; +} + static u32 dsb_chicken(struct intel_crtc *crtc) { if (crtc->mode_flags & I915_MODE_FLAG_VRR) @@ -487,8 +544,6 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, unsigned int max_cmds) { struct drm_i915_private *i915 = to_i915(state->base.dev); - const struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); intel_wakeref_t wakeref; struct intel_dsb *dsb; unsigned int size; @@ -524,7 +579,7 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->ins_start_offset = 0; dsb->hw_dewake_scanline = - intel_crtc_scanline_to_hw(crtc_state, dsb_dewake_scanline(crtc_state)); + dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline(state, crtc)); return dsb; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index f337cde82198..0b7f2134e441 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -190,7 +190,7 @@ static u32 __intel_get_crtc_scanline_from_timestamp(struct intel_crtc *crtc) return scanline; } -static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) +int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); @@ -284,14 +284,6 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + vtotal + crtc->scanline_offset) % vtotal; } -int intel_crtc_scanline_to_hw(const struct intel_crtc_state *crtc_state, - int scanline) -{ - int vtotal = intel_mode_vtotal(&crtc_state->hw.adjusted_mode); - - return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal; -} - /* * The uncore version of the spin lock functions is used to decide * whether we need to lock the uncore lock or not. This is only diff --git a/drivers/gpu/drm/i915/display/intel_vblank.h b/drivers/gpu/drm/i915/display/intel_vblank.h index 45a4a961aaab..6d7336256982 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.h +++ b/drivers/gpu/drm/i915/display/intel_vblank.h @@ -40,7 +40,6 @@ void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc); void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc); void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, bool vrr_enable); -int intel_crtc_scanline_to_hw(const struct intel_crtc_state *crtc_state, - int scanline); +int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VBLANK_H__ */ From 8d5ac8efb6d94efda53f604fd9c072b4754a3d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:26 +0300 Subject: [PATCH 1517/1523] drm/i915/dsb: Precompute DSB_CHICKEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the code that determines the correct DSB_CHICKEN value to be usable for use within DSB commands themselves. Ie. precompute it based on our knowledge of what the hardware state (VRR vs. not mainly) will be at the time of the commit. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-9-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 2c7c21c69ad6..779ba7eb8042 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -43,6 +43,7 @@ struct intel_dsb { */ unsigned int ins_start_offset; + u32 chicken; int hw_dewake_scanline; }; @@ -149,9 +150,10 @@ static int dsb_scanline_to_hw(struct intel_atomic_state *state, return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal; } -static u32 dsb_chicken(struct intel_crtc *crtc) +static u32 dsb_chicken(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - if (crtc->mode_flags & I915_MODE_FLAG_VRR) + if (pre_commit_is_vrr_active(state, crtc)) return DSB_SKIP_WAITS_EN | DSB_CTRL_WAIT_SAFE_WINDOW | DSB_CTRL_NO_WAIT_VBLANK | @@ -447,7 +449,7 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, ctrl | DSB_ENABLE); intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id), - dsb_chicken(crtc)); + dsb->chicken); intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id), dsb_error_int_status(display) | DSB_PROG_INT_STATUS | @@ -578,6 +580,7 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->free_pos = 0; dsb->ins_start_offset = 0; + dsb->chicken = dsb_chicken(state, crtc); dsb->hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline(state, crtc)); From 2039809783d630c1022bb3debe648abced7861c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:27 +0300 Subject: [PATCH 1518/1523] drm/i915/dsb: Introduce intel_dsb_wait_scanline_{in,out}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add functions to emit a DSB scanline window wait instructions. We can either wait for the scanline to be IN the window or OUT of the window. The hardware doesn't handle wraparound so we must manually deal with it by swapping the IN range to the inverse OUT range, or vice versa. Also add a bit of paranoia to catch the edge case of waiting for the entire frame. That doesn't make sense since an IN wait would be a nop, and an OUT wait would imply waiting forever. Most of the time this also results in both scanline ranges (original and inverted) to have lower=upper+1 which is nonsense from the hw POV. For now we are only handling the case where the scanline wait happens prior to latching the double buffered registers during the commit (which might change the timings due to LRR/VRR/etc.) Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-10-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 73 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 6 ++ 2 files changed, 79 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 779ba7eb8042..9fd0a40ab540 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -362,6 +362,79 @@ void intel_dsb_nonpost_end(struct intel_dsb *dsb) intel_dsb_noop(dsb, 4); } +static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb, + u32 opcode, int lower, int upper) +{ + u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) | + ((u64)lower << DSB_SCANLINE_LOWER_SHIFT); + + intel_dsb_emit(dsb, lower_32_bits(window), + (opcode << DSB_OPCODE_SHIFT) | + upper_32_bits(window)); +} + +static void intel_dsb_wait_dsl(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower_in, int upper_in, + int lower_out, int upper_out) +{ + struct intel_crtc *crtc = dsb->crtc; + + lower_in = dsb_scanline_to_hw(state, crtc, lower_in); + upper_in = dsb_scanline_to_hw(state, crtc, upper_in); + + lower_out = dsb_scanline_to_hw(state, crtc, lower_out); + upper_out = dsb_scanline_to_hw(state, crtc, upper_out); + + if (upper_in >= lower_in) + intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN, + lower_in, upper_in); + else if (upper_out >= lower_out) + intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, + lower_out, upper_out); + else + drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */ +} + +static void assert_dsl_ok(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + struct intel_crtc *crtc = dsb->crtc; + int vtotal = dsb_vtotal(state, crtc); + + /* + * Waiting for the entire frame doesn't make sense, + * (IN==don't wait, OUT=wait forever). + */ + drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1, + "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n", + crtc->base.base.id, crtc->base.name, dsb->id, + start, end, vtotal); +} + +void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + assert_dsl_ok(state, dsb, start, end); + + intel_dsb_wait_dsl(state, dsb, + start, end, + end + 1, start - 1); +} + +void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + assert_dsl_ok(state, dsb, start, end); + + intel_dsb_wait_dsl(state, dsb, + end + 1, start - 1, + start, end); +} + static void intel_dsb_align_tail(struct intel_dsb *dsb) { u32 aligned_tail, tail; diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index 84fc2f8434d1..d0737cefb393 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -39,6 +39,12 @@ void intel_dsb_reg_write_masked(struct intel_dsb *dsb, void intel_dsb_noop(struct intel_dsb *dsb, int count); void intel_dsb_nonpost_start(struct intel_dsb *dsb); void intel_dsb_nonpost_end(struct intel_dsb *dsb); +void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower, int upper); +void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower, int upper); void intel_dsb_commit(struct intel_dsb *dsb, bool wait_for_vblank); From 06358ccecd75bfcd988f347f79592e23159aaaa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:28 +0300 Subject: [PATCH 1519/1523] drm/i915/dsb: Introduce intel_dsb_chain() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to handle the DEwake tricks without involving the CPU we need a mechanism by which one DSB can start another one. Add a basic function to do so. We'll extend it later with additional code to actually deal with DEwake. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-11-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 42 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 3 ++ 2 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 9fd0a40ab540..642c280d4f29 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -500,6 +500,48 @@ static u32 dsb_error_int_en(struct intel_display *display) return errors; } +static void _intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb, + u32 ctrl) +{ + struct intel_display *display = to_intel_display(state->base.dev); + struct intel_crtc *crtc = dsb->crtc; + enum pipe pipe = crtc->pipe; + u32 tail; + + if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id)) + return; + + tail = chained_dsb->free_pos * 4; + if (drm_WARN_ON(display->drm, !IS_ALIGNED(tail, CACHELINE_BYTES))) + return; + + intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id), + ctrl | DSB_ENABLE); + + intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id), + dsb_chicken(state, crtc)); + + intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS | + dsb_error_int_en(display)); + + intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id), + intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf)); + + intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id), + intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf) + tail); +} + +void intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb) +{ + _intel_dsb_chain(state, dsb, chained_dsb, + 0); +} + static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, int hw_dewake_scanline) { diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index d0737cefb393..e59fd7da0fc0 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -45,6 +45,9 @@ void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, struct intel_dsb *dsb, int lower, int upper); +void intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb); void intel_dsb_commit(struct intel_dsb *dsb, bool wait_for_vblank); From 51e039542b87cb649d50ea3b5cf90847be32a30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:29 +0300 Subject: [PATCH 1520/1523] drm/i915/dsb: Allow intel_dsb_chain() to use DSB_WAIT_FOR_VBLANK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow intel_dsb_chain() to start the chained DSB at start of the undelaye vblank. This is slightly more involved than simply setting the bit as we must use the DEwake mechanism to eliminate pkgC latency. And DSB_ENABLE_DEWAKE itself is problematic in that it allows us to configure just a single scanline, and if the current scanline is already past that DSB_ENABLE_DEWAKE won't do anything, rendering the whole thing moot. The current workaround involves checking the pipe's current scanline with the CPU, and if it looks like we're about to miss the configured DEwake scanline we set DSB_FORCE_DEWAKE to immediately assert DEwake. This is somewhat racy since the hardware is making progress all the while we're checking it on the CPU. We can make things less racy by chaining two DSBs and handling the DSB_FORCE_DEWAKE stuff entirely without CPU involvement: 1. CPU starts the first DSB immediately 2. First DSB configures the second DSB, including its dewake_scanline 3. First DSB starts the second w/ DSB_WAIT_FOR_VBLANK 4. First DSB asserts DSB_FORCE_DEWAKE 5. First DSB waits until we're outside the dewake_scanline-vblank_start window 6. First DSB deasserts DSB_FORCE_DEWAKE That will guarantee that the we are fully awake when the second DSB starts to actually execute. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-12-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 43 +++++++++++++++++++++--- drivers/gpu/drm/i915/display/intel_dsb.h | 3 +- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 642c280d4f29..864d293d3f33 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -130,8 +130,8 @@ static int dsb_vtotal(struct intel_atomic_state *state, return intel_mode_vtotal(&crtc_state->hw.adjusted_mode); } -static int dsb_dewake_scanline(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static int dsb_dewake_scanline_start(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); struct drm_i915_private *i915 = to_i915(state->base.dev); @@ -141,6 +141,14 @@ static int dsb_dewake_scanline(struct intel_atomic_state *state, intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency); } +static int dsb_dewake_scanline_end(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + + return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode); +} + static int dsb_scanline_to_hw(struct intel_atomic_state *state, struct intel_crtc *crtc, int scanline) { @@ -527,19 +535,44 @@ static void _intel_dsb_chain(struct intel_atomic_state *state, dsb_error_int_status(display) | DSB_PROG_INT_STATUS | dsb_error_int_en(display)); + if (ctrl & DSB_WAIT_FOR_VBLANK) { + int dewake_scanline = dsb_dewake_scanline_start(state, crtc); + int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline); + + intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), + DSB_ENABLE_DEWAKE | + DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline)); + } + intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id), intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf)); intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id), intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf) + tail); + + if (ctrl & DSB_WAIT_FOR_VBLANK) { + /* + * Keep DEwake alive via the first DSB, in + * case we're already past dewake_scanline, + * and thus DSB_ENABLE_DEWAKE on the second + * DSB won't do its job. + */ + intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id), + DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE); + + intel_dsb_wait_scanline_out(state, dsb, + dsb_dewake_scanline_start(state, crtc), + dsb_dewake_scanline_end(state, crtc)); + } } void intel_dsb_chain(struct intel_atomic_state *state, struct intel_dsb *dsb, - struct intel_dsb *chained_dsb) + struct intel_dsb *chained_dsb, + bool wait_for_vblank) { _intel_dsb_chain(state, dsb, chained_dsb, - 0); + wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0); } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, @@ -697,7 +730,7 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->chicken = dsb_chicken(state, crtc); dsb->hw_dewake_scanline = - dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline(state, crtc)); + dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc)); return dsb; diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index e59fd7da0fc0..c352c12aa59f 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -47,7 +47,8 @@ void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, int lower, int upper); void intel_dsb_chain(struct intel_atomic_state *state, struct intel_dsb *dsb, - struct intel_dsb *chained_dsb); + struct intel_dsb *chained_dsb, + bool wait_for_vblank); void intel_dsb_commit(struct intel_dsb *dsb, bool wait_for_vblank); From 44378f6ef2efee3ccfa87de5860662f2356bdde8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:30 +0300 Subject: [PATCH 1521/1523] drm/i915/dsb: Clear DSB_ENABLE_DEWAKE once the DSB is done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to avoid the DSB keeping the DEwake permanently asserted we must clear DSB_PMCTRL_2.DSB_FORCE_DEWAKE once we are done. For good measure do the same for DSB_PMCTRL.DSB_ENABLE_DEWAKE. Experimentally this doens't seem to be actually necessary (unlike with DSB_FORCE_DEWAKE). That is, the DSB_ENABLE_DEWAKE doesn't seem to do anything whenever the DSB is not active. But I'd hate to waste a ton of power in case there I'm wrong and there is some way DEwake could remaing asserted. One extra register write is a small price to pay for some peace of mind. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-13-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_dsb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 864d293d3f33..da24e041d269 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -464,8 +464,10 @@ void intel_dsb_finish(struct intel_dsb *dsb) /* * DSB_FORCE_DEWAKE remains active even after DSB is * disabled, so make sure to clear it (if set during - * intel_dsb_commit()). + * intel_dsb_commit()). And clear DSB_ENABLE_DEWAKE as + * well for good measure. */ + intel_dsb_reg_write(dsb, DSB_PMCTRL(crtc->pipe, dsb->id), 0); intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id), DSB_FORCE_DEWAKE, 0); From 07226d09a200b92797afabd3a5131a0b504344c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:31 +0300 Subject: [PATCH 1522/1523] drm/i915/dsb: s/dsb/dsb_color_vblank/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll soon utilize several DSBs during the commit. To that end rename the current crtc_state->dsb to crtc_state->dsb_color_vblank to better reflect its role (color managemnent stuff programmed during vblank). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-14-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_atomic.c | 4 +-- drivers/gpu/drm/i915/display/intel_color.c | 36 +++++++++---------- drivers/gpu/drm/i915/display/intel_display.c | 2 +- .../drm/i915/display/intel_display_types.h | 4 +-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 76aa10b6f647..55ce71be41ec 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -276,7 +276,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->do_async_flip = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; - crtc_state->dsb = NULL; + crtc_state->dsb_color_vblank = NULL; return &crtc_state->uapi; } @@ -310,7 +310,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, { struct intel_crtc_state *crtc_state = to_intel_crtc_state(state); - drm_WARN_ON(crtc->dev, crtc_state->dsb); + drm_WARN_ON(crtc->dev, crtc_state->dsb_color_vblank); __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); intel_crtc_free_hw_state(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 7ac50aacec73..27acbf92d60f 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1313,8 +1313,8 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state, { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (crtc_state->dsb) - intel_dsb_reg_write(crtc_state->dsb, reg, val); + if (crtc_state->dsb_color_vblank) + intel_dsb_reg_write(crtc_state->dsb_color_vblank, reg, val); else intel_de_write_fw(i915, reg, val); } @@ -1337,15 +1337,15 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, * unless we either write each entry twice, * or use non-posted writes */ - if (crtc_state->dsb) - intel_dsb_nonpost_start(crtc_state->dsb); + if (crtc_state->dsb_color_vblank) + intel_dsb_nonpost_start(crtc_state->dsb_color_vblank); for (i = 0; i < 256; i++) ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i])); - if (crtc_state->dsb) - intel_dsb_nonpost_end(crtc_state->dsb); + if (crtc_state->dsb_color_vblank) + intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); } static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, @@ -1870,7 +1870,7 @@ void intel_color_load_luts(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (crtc_state->dsb) + if (crtc_state->dsb_color_vblank) return; i915->display.funcs.color->load_luts(crtc_state); @@ -1890,8 +1890,8 @@ void intel_color_commit_arm(const struct intel_crtc_state *crtc_state) i915->display.funcs.color->color_commit_arm(crtc_state); - if (crtc_state->dsb) - intel_dsb_commit(crtc_state->dsb, true); + if (crtc_state->dsb_color_vblank) + intel_dsb_commit(crtc_state->dsb_color_vblank, true); } void intel_color_post_update(const struct intel_crtc_state *crtc_state) @@ -1919,33 +1919,33 @@ void intel_color_prepare_commit(struct intel_atomic_state *state, if (!crtc_state->pre_csc_lut && !crtc_state->post_csc_lut) return; - crtc_state->dsb = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 1024); - if (!crtc_state->dsb) + crtc_state->dsb_color_vblank = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 1024); + if (!crtc_state->dsb_color_vblank) return; i915->display.funcs.color->load_luts(crtc_state); - intel_dsb_finish(crtc_state->dsb); + intel_dsb_finish(crtc_state->dsb_color_vblank); } void intel_color_cleanup_commit(struct intel_crtc_state *crtc_state) { - if (!crtc_state->dsb) + if (!crtc_state->dsb_color_vblank) return; - intel_dsb_cleanup(crtc_state->dsb); - crtc_state->dsb = NULL; + intel_dsb_cleanup(crtc_state->dsb_color_vblank); + crtc_state->dsb_color_vblank = NULL; } void intel_color_wait_commit(const struct intel_crtc_state *crtc_state) { - if (crtc_state->dsb) - intel_dsb_wait(crtc_state->dsb); + if (crtc_state->dsb_color_vblank) + intel_dsb_wait(crtc_state->dsb_color_vblank); } bool intel_color_uses_dsb(const struct intel_crtc_state *crtc_state) { - return crtc_state->dsb; + return crtc_state->dsb_color_vblank; } static bool intel_can_preload_luts(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9ad0cb3fab29..10a82596472a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7513,7 +7513,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * * FIXME get rid of this funny new->old swapping */ - old_crtc_state->dsb = fetch_and_zero(&new_crtc_state->dsb); + old_crtc_state->dsb_color_vblank = fetch_and_zero(&new_crtc_state->dsb_color_vblank); } /* Underruns don't always raise interrupts, so check manually */ diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index bd290536a1b7..8ae9d06915e6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1396,8 +1396,8 @@ struct intel_crtc_state { /* Only valid on TGL+ */ enum transcoder mst_master_transcoder; - /* For DSB related info */ - struct intel_dsb *dsb; + /* For DSB based color LUT updates */ + struct intel_dsb *dsb_color_vblank; u32 psr2_man_track_ctl; From b5d4657e192ba7a3f21fc397cf5d169982b4ec0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Jun 2024 22:10:32 +0300 Subject: [PATCH 1523/1523] drm/i915/dsb: Use chained DSBs for LUT programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to better handle the necessary DSB DEwake tricks let's switch over to using a chained DSB for the actual LUT programming. The CPU will start 'dsb_color_commit', which in turn will start the chained 'dsb_color_vblank'. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240624191032.27333-15-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna --- drivers/gpu/drm/i915/display/intel_atomic.c | 2 ++ drivers/gpu/drm/i915/display/intel_color.c | 32 +++++++++++++++---- drivers/gpu/drm/i915/display/intel_display.c | 1 + .../drm/i915/display/intel_display_types.h | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 55ce71be41ec..12d6ed940751 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -277,6 +277,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->fb_bits = 0; crtc_state->update_planes = 0; crtc_state->dsb_color_vblank = NULL; + crtc_state->dsb_color_commit = NULL; return &crtc_state->uapi; } @@ -311,6 +312,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state = to_intel_crtc_state(state); drm_WARN_ON(crtc->dev, crtc_state->dsb_color_vblank); + drm_WARN_ON(crtc->dev, crtc_state->dsb_color_commit); __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); intel_crtc_free_hw_state(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 27acbf92d60f..5d701f48351b 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1890,8 +1890,8 @@ void intel_color_commit_arm(const struct intel_crtc_state *crtc_state) i915->display.funcs.color->color_commit_arm(crtc_state); - if (crtc_state->dsb_color_vblank) - intel_dsb_commit(crtc_state->dsb_color_vblank, true); + if (crtc_state->dsb_color_commit) + intel_dsb_commit(crtc_state->dsb_color_commit, false); } void intel_color_post_update(const struct intel_crtc_state *crtc_state) @@ -1919,26 +1919,44 @@ void intel_color_prepare_commit(struct intel_atomic_state *state, if (!crtc_state->pre_csc_lut && !crtc_state->post_csc_lut) return; - crtc_state->dsb_color_vblank = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 1024); + crtc_state->dsb_color_vblank = intel_dsb_prepare(state, crtc, INTEL_DSB_1, 1024); if (!crtc_state->dsb_color_vblank) return; i915->display.funcs.color->load_luts(crtc_state); intel_dsb_finish(crtc_state->dsb_color_vblank); + + crtc_state->dsb_color_commit = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 16); + if (!crtc_state->dsb_color_commit) { + intel_dsb_cleanup(crtc_state->dsb_color_vblank); + crtc_state->dsb_color_vblank = NULL; + return; + } + + intel_dsb_chain(state, crtc_state->dsb_color_commit, + crtc_state->dsb_color_vblank, true); + + intel_dsb_finish(crtc_state->dsb_color_commit); } void intel_color_cleanup_commit(struct intel_crtc_state *crtc_state) { - if (!crtc_state->dsb_color_vblank) - return; + if (crtc_state->dsb_color_commit) { + intel_dsb_cleanup(crtc_state->dsb_color_commit); + crtc_state->dsb_color_commit = NULL; + } - intel_dsb_cleanup(crtc_state->dsb_color_vblank); - crtc_state->dsb_color_vblank = NULL; + if (crtc_state->dsb_color_vblank) { + intel_dsb_cleanup(crtc_state->dsb_color_vblank); + crtc_state->dsb_color_vblank = NULL; + } } void intel_color_wait_commit(const struct intel_crtc_state *crtc_state) { + if (crtc_state->dsb_color_commit) + intel_dsb_wait(crtc_state->dsb_color_commit); if (crtc_state->dsb_color_vblank) intel_dsb_wait(crtc_state->dsb_color_vblank); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 10a82596472a..78ce402a5cd0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7514,6 +7514,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * FIXME get rid of this funny new->old swapping */ old_crtc_state->dsb_color_vblank = fetch_and_zero(&new_crtc_state->dsb_color_vblank); + old_crtc_state->dsb_color_commit = fetch_and_zero(&new_crtc_state->dsb_color_commit); } /* Underruns don't always raise interrupts, so check manually */ diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 8ae9d06915e6..868ff8976ed9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1397,7 +1397,7 @@ struct intel_crtc_state { enum transcoder mst_master_transcoder; /* For DSB based color LUT updates */ - struct intel_dsb *dsb_color_vblank; + struct intel_dsb *dsb_color_vblank, *dsb_color_commit; u32 psr2_man_track_ctl;