scsi: qla2xxx: target: Add back SRR support

Background: loading qla2xxx with "ql2xtgt_tape_enable=1" enables Sequence Level Error Recovery (SLER), which is most commonly used for tape drives. With SLER enabled, if there is a recoverable I/O error during a SCSI command, a Sequence Retransmission Request (SRR) will be used to retry the I/O at a low-level completely within the driver without propagating the error to the upper levels of the SCSI stack. SRR support was removed in 2017 by commit 2c39b5ca2a ("qla2xxx: Remove SRR code"). Add it back, new and improved. The old removed SRR code used sequence numbers to correlate the SRR CTIOs with SRR immediate notify messages. I don't see how that would work reliably with MSI-X interrupts and multiple queues. So instead use the exchange address to find the command associated with the immediate notify (qlt_srr_to_cmd). The old removed SRR code had a function qlt_check_srr_debug() to simulate a SRR, but it didn't work for me. Instead I just used fiber optic attenuators attached to the FC cable to reduce the strength of the signal and induce errors. Unfortunately this only worked for inducing SRRs on Data-Out (write) commands, so that is all I was able to test. The code to build a new scatterlist for a SRR with nonzero offset has been improved to reduce memory requirements and has been well-tested. However it does not support protection information. When a single cmd gets multiple SRRs, the old removed SRR code would restore the data buffer from the values in cmd->se_cmd before processing the new SRR. That might be needed if the offset for the new SRR was lower than the offset for the previous SRR, but I am not sure if that can happen. In my testing, when a single cmd gets multiple SRRs, the SRR offset always increases or stays the same. But in case it can decrease, I added the function qlt_restore_orig_sg(). If this is not supposed to happen then qlt_restore_orig_sg() can be removed to simplify the code. I ran into some HBA firmware bugs with QLE269x, QLE27xx, and QLE28xx firmware 9.05.xx - 9.08.xx where a SRR would cause the HBA to misbehave badly. Since SRRs are rare and therefore difficult to test, I figured it would be worth checking for the buggy firmware and disabling SLER with a warning instead of letting others run into the same problem on the rare occasion that they get a SRR. This turned out to be difficult because the firmware version isn't known in the normal NVRAM config routine, so I added a second NVRAM config routine that is called after the firmware version is known. Signed-off-by: Tony Battersby <tonyb@cybernetics.com> Link: https://patch.msgid.link/654b7181-b79e-40ed-a15b-6d6e441a5d5f@cybernetics.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2025-12-07 20:06:24 +00:00 · 2025-11-10 11:04:42 -05:00
parent 04957d8c98
commit c7bd85a7b9
5 changed files with 1141 additions and 1 deletions
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -58,6 +58,7 @@
 * | Target Mode Management	  |	  0xf09b       | 0xf002		|
 * |                              |                    | 0xf046-0xf049  |
 * | Target Mode Task Management  |	  0x1000d      |		|
+ * | Target Mode SRR		  |	  0x11038      |		|
 * ----------------------------------------------------------------------
 */

--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4369,6 +4369,7 @@ enable_82xx_npiv:
 						ha->max_npiv_vports =
 						    MIN_MULTI_ID_FABRIC - 1;
 				}
+				qlt_config_nvram_with_fw_version(vha);
 				qla2x00_get_resource_cnts(vha);
 				qla_init_iocb_limit(vha);

--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -184,6 +184,7 @@ struct nack_to_isp {
 #define NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM	0x9

 #define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL		0
+#define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_INVALID_OX_ID_RX_ID	0x17
 #define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_UNABLE_TO_SUPPLY_DATA	0x2a

 #define NOTIFY_ACK_SUCCESS      0x01
@@ -686,6 +687,8 @@ struct qla_tgt_func_tmpl {
 	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, u64, uint16_t,
 			uint32_t);
 	struct qla_tgt_cmd *(*get_cmd)(struct fc_port *);
+	int (*get_cmd_ref)(struct qla_tgt_cmd *cmd);
+	void (*put_cmd_ref)(struct qla_tgt_cmd *cmd);
 	void (*rel_cmd)(struct qla_tgt_cmd *);
 	void (*free_cmd)(struct qla_tgt_cmd *);
 	void (*free_mcmd)(struct qla_tgt_mgmt_cmd *);
@@ -823,7 +826,13 @@ struct qla_tgt {
 	int notify_ack_expected;
 	int abts_resp_expected;
 	int modify_lun_expected;
+
+	spinlock_t srr_lock;
+	struct list_head srr_list;
+	struct work_struct srr_work;
+
 	atomic_t tgt_global_resets_count;
+
 	struct list_head tgt_list_entry;
 };

@@ -861,6 +870,7 @@ enum trace_flags {
 	TRC_DATA_IN = BIT_18,
 	TRC_ABORT = BIT_19,
 	TRC_DIF_ERR = BIT_20,
+	TRC_SRR_IMM = BIT_21,
 };

 struct qla_tgt_cmd {
@@ -881,6 +891,10 @@ struct qla_tgt_cmd {

 	unsigned int conf_compl_supported:1;
 	unsigned int sg_mapped:1;
+
+	/* Call qlt_free_sg() if set. */
+	unsigned int free_sg:1;
+
 	unsigned int write_data_transferred:1;

 	/* Set if the SCSI status was sent successfully. */
@@ -892,6 +906,9 @@ struct qla_tgt_cmd {
 	unsigned int cmd_in_wq:1;
 	unsigned int edif:1;

+	/* Set if a SRR was rejected. */
+	unsigned int srr_failed:1;
+
 	/* Set if the exchange has been terminated. */
 	unsigned int sent_term_exchg:1;

@@ -901,6 +918,7 @@ struct qla_tgt_cmd {
 	 */
 	unsigned int aborted:1;

+	struct qla_tgt_srr *srr;
 	struct scatterlist *sg;	/* cmd data buffer SG vector */
 	int sg_cnt;		/* SG segments count */
 	int bufflen;		/* cmd buffer length */
@@ -940,6 +958,14 @@ struct qla_tgt_cmd {
 	uint16_t prot_flags;

 	unsigned long jiffies_at_term_exchg;
+
+	/*
+	 * jiffies64 when qlt_rdy_to_xfer() or qlt_xmit_response() first
+	 * called, or 0 when not in those states.  Used to limit the number of
+	 * SRR retries.
+	 */
+	uint64_t jiffies_at_hw_st_entry;
+
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;

@@ -1002,6 +1028,45 @@ struct qla_tgt_prm {
 	uint16_t tot_dsds;
 };

+/*
+ * SRR (Sequence Retransmission Request) - resend or re-receive some or all
+ * data or status to recover from a transient I/O error.
+ */
+struct qla_tgt_srr {
+	/*
+	 * Copy of immediate notify SRR message received from hw; valid only if
+	 * imm_ntfy_recvd is true.
+	 */
+	struct imm_ntfy_from_isp imm_ntfy;
+
+	struct list_head srr_list_entry;
+
+	/* The command affected by this SRR, or NULL if not yet determined. */
+	struct qla_tgt_cmd *cmd;
+
+	/* Used to detect if the HBA has been reset since receiving the SRR. */
+	uint32_t reset_count;
+
+	/*
+	 * The hardware sends two messages for each SRR - an immediate notify
+	 * and a CTIO with CTIO_SRR_RECEIVED status.  These keep track of which
+	 * messages have been received.  The SRR can be processed once both of
+	 * these are true.
+	 */
+	bool imm_ntfy_recvd;
+	bool ctio_recvd;
+
+	/*
+	 * This is set to true if the affected command was aborted (cmd may be
+	 * set to NULL), in which case the immediate notify exchange also needs
+	 * to be aborted.
+	 */
+	bool aborted;
+
+	/* This is set to true to force the SRR to be rejected. */
+	bool reject;
+};
+
 /* Check for Switch reserved address */
 #define IS_SW_RESV_ADDR(_s_id) \
 	((_s_id.b.domain == 0xff) && ((_s_id.b.area & 0xf0) == 0xf0))
@@ -1056,6 +1121,20 @@ static inline uint32_t sid_to_key(const be_id_t s_id)
 		s_id.al_pa;
 }

+/*
+ * Free the scatterlist allocated by qlt_set_data_offset().  Call this only if
+ * cmd->free_sg is set.
+ */
+static inline void qlt_free_sg(struct qla_tgt_cmd *cmd)
+{
+	/*
+	 * The scatterlist may be chained to the original scatterlist, but we
+	 * only need to free the first segment here since that is the only part
+	 * allocated by qlt_set_data_offset().
+	 */
+	kfree(cmd->sg);
+}
+
 /*
 * Exported symbols from qla_target.c LLD logic used by qla2xxx code..
 */
@@ -1064,6 +1143,7 @@ extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, struct rsp_que *,
 extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
 extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
 extern int qlt_abort_cmd(struct qla_tgt_cmd *);
+void qlt_srr_abort(struct qla_tgt_cmd *cmd, bool reject);
 void qlt_send_term_exchange(struct qla_qpair *qpair,
 	struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked);
 extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
@@ -1086,6 +1166,7 @@ extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *,
 	struct init_cb_81xx *);
 extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *,
 	struct nvram_81xx *);
+void qlt_config_nvram_with_fw_version(struct scsi_qla_host *vha);
 extern void qlt_modify_vp_config(struct scsi_qla_host *,
 	struct vp_config_entry_24xx *);
 extern void qlt_probe_one_stage1(struct scsi_qla_host *, struct qla_hw_data *);
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -291,6 +291,16 @@ static struct qla_tgt_cmd *tcm_qla2xxx_get_cmd(struct fc_port *sess)
 	return cmd;
 }

+static int tcm_qla2xxx_get_cmd_ref(struct qla_tgt_cmd *cmd)
+{
+	return target_get_sess_cmd(&cmd->se_cmd, true);
+}
+
+static void tcm_qla2xxx_put_cmd_ref(struct qla_tgt_cmd *cmd)
+{
+	target_put_sess_cmd(&cmd->se_cmd);
+}
+
 static void tcm_qla2xxx_rel_cmd(struct qla_tgt_cmd *cmd)
 {
 	target_free_tag(cmd->sess->se_sess, &cmd->se_cmd);
@@ -531,6 +541,9 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 		if (cmd->se_cmd.pi_err)
 			transport_generic_request_failure(&cmd->se_cmd,
 				cmd->se_cmd.pi_err);
+		else if (cmd->srr_failed)
+			transport_generic_request_failure(&cmd->se_cmd,
+				TCM_SNACK_REJECTED);
 		else
 			transport_generic_request_failure(&cmd->se_cmd,
 				TCM_CHECK_CONDITION_ABORT_CMD);
@@ -1526,6 +1539,8 @@ static const struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.handle_data		= tcm_qla2xxx_handle_data,
 	.handle_tmr		= tcm_qla2xxx_handle_tmr,
 	.get_cmd		= tcm_qla2xxx_get_cmd,
+	.get_cmd_ref		= tcm_qla2xxx_get_cmd_ref,
+	.put_cmd_ref		= tcm_qla2xxx_put_cmd_ref,
 	.rel_cmd		= tcm_qla2xxx_rel_cmd,
 	.free_cmd		= tcm_qla2xxx_free_cmd,
 	.free_mcmd		= tcm_qla2xxx_free_mcmd,