[Open-FCoE] [PATCH v2] libfc: rport retry on LS_RJT from certain ELS

Abhijeet Joglekar ajoglekar at nuovasystems.com
Tue Jan 20 21:48:12 UTC 2009


Chris, 

I was running into the same issue while testing with fnic. On rapid link
flaps, some times the plogi connection to a remote port would not
recover.

I will apply this patch and re-test.

Thanks
-- abhijeet

> -----Original Message-----
> From: devel-bounces at open-fcoe.org [mailto:devel-bounces at open-fcoe.org]
On
> Behalf Of Chris Leech
> Sent: Friday, January 16, 2009 12:03 PM
> To: devel at open-fcoe.org
> Subject: [Open-FCoE] [PATCH v2] libfc: rport retry on LS_RJT from
certain
> ELS
> 
> This allows any rport ELS to retry on LS_RJT.
> 
> The rport error handling would only retry on resource allocation
failures
> and exchange timeouts.  I have a target that will occasionally reject
> PLOGI
> when we do a quick LOGO/PLOGI.  When a critical ELS was rejected,
libfc
> would
> fail silently leaving the rport in a dead state.
> 
> The retry count and delay are managed by fc_rport_error_retry.  If the
> retry
> count is exceeded fc_rport_error will be called.  When retrying is not
the
> correct course of action, fc_rport_error can be called directly.
> 
> Signed-off-by: Chris Leech <christopher.leech at intel.com>
> ---
> 
>  drivers/scsi/libfc/fc_exch.c  |    2 -
>  drivers/scsi/libfc/fc_rport.c |  111
++++++++++++++++++++++++------------
> -----
>  include/scsi/fc/fc_fs.h       |    5 ++
>  3 files changed, 69 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/scsi/libfc/fc_exch.c
b/drivers/scsi/libfc/fc_exch.c
> index 66db08a..574cab4 100644
> --- a/drivers/scsi/libfc/fc_exch.c
> +++ b/drivers/scsi/libfc/fc_exch.c
> @@ -32,8 +32,6 @@
>  #include <scsi/libfc.h>
>  #include <scsi/fc_encode.h>
> 
> -#define	  FC_DEF_R_A_TOV      (10 * 1000) /* resource allocation
> timeout */
> -
>  /*
>   * fc_exch_debug can be set in debugger or at compile time to get
more
> logs.
>   */
> diff --git a/drivers/scsi/libfc/fc_rport.c
b/drivers/scsi/libfc/fc_rport.c
> index e780d8c..fed4d6d 100644
> --- a/drivers/scsi/libfc/fc_rport.c
> +++ b/drivers/scsi/libfc/fc_rport.c
> @@ -81,6 +81,7 @@ static void fc_rport_recv_logo_req(struct fc_rport
*,
>  				   struct fc_seq *, struct fc_frame *);
>  static void fc_rport_timeout(struct work_struct *);
>  static void fc_rport_error(struct fc_rport *, struct fc_frame *);
> +static void fc_rport_error_retry(struct fc_rport *, struct fc_frame
*);
>  static void fc_rport_work(struct work_struct *);
> 
>  static const char *fc_rport_state_names[] = {
> @@ -405,58 +406,74 @@ static void fc_rport_timeout(struct work_struct
> *work)
>  }
> 
>  /**
> - * fc_rport_error - Handler for any errors
> + * fc_rport_error - Error handler, called once retries have been
> exhausted
>   * @rport: The fc_rport object
>   * @fp: The frame pointer
>   *
> - * If the error was caused by a resource allocation failure
> - * then wait for half a second and retry, otherwise retry
> - * immediately.
> - *
>   * Locking Note: The rport lock is expected to be held before
>   * calling this routine
>   */
>  static void fc_rport_error(struct fc_rport *rport, struct fc_frame
*fp)
>  {
>  	struct fc_rport_libfc_priv *rdata = rport->dd_data;
> -	unsigned long delay = 0;
> 
>  	FC_DEBUG_RPORT("Error %ld in state %s, retries %d\n",
>  		       PTR_ERR(fp), fc_rport_state(rport),
rdata->retries);
> 
> -	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
> -		/*
> -		 * Memory allocation failure, or the exchange timed out.
> -		 *  Retry after delay
> -		 */
> -		if (rdata->retries < rdata->local_port->max_retry_count)
{
> -			rdata->retries++;
> -			if (!fp)
> -				delay = msecs_to_jiffies(500);
> -			get_device(&rport->dev);
> -			schedule_delayed_work(&rdata->retry_work,
delay);
> -		} else {
> -			switch (rdata->rp_state) {
> -			case RPORT_ST_PLOGI:
> -			case RPORT_ST_PRLI:
> -			case RPORT_ST_LOGO:
> -				rdata->event = RPORT_EV_FAILED;
> -				queue_work(rport_event_queue,
> -					   &rdata->event_work);
> -				break;
> -			case RPORT_ST_RTV:
> -				fc_rport_enter_ready(rport);
> -				break;
> -			case RPORT_ST_NONE:
> -			case RPORT_ST_READY:
> -			case RPORT_ST_INIT:
> -				break;
> -			}
> -		}
> +	switch (rdata->rp_state) {
> +	case RPORT_ST_PLOGI:
> +	case RPORT_ST_PRLI:
> +	case RPORT_ST_LOGO:
> +		rdata->event = RPORT_EV_FAILED;
> +		queue_work(rport_event_queue,
> +			   &rdata->event_work);
> +		break;
> +	case RPORT_ST_RTV:
> +		fc_rport_enter_ready(rport);
> +		break;
> +	case RPORT_ST_NONE:
> +	case RPORT_ST_READY:
> +	case RPORT_ST_INIT:
> +		break;
>  	}
>  }
> 
>  /**
> + * fc_rport_error_retry - Error handler when retries are desired
> + * @rport: The fc_rport object
> + * @fp: The frame pointer
> + *
> + * If the error was an exchange timeout retry immediately,
> + * otherwise wait for E_D_TOV.
> + *
> + * Locking Note: The rport lock is expected to be held before
> + * calling this routine
> + */
> +static void fc_rport_error_retry(struct fc_rport *rport, struct
fc_frame
> *fp)
> +{
> +	struct fc_rport_libfc_priv *rdata = rport->dd_data;
> +	unsigned long delay = FC_DEF_E_D_TOV;
> +
> +	/* make sure this isn't an FC_EX_CLOSED error, never retry those
*/
> +	if (PTR_ERR(fp) == -FC_EX_CLOSED)
> +		return fc_rport_error(rport, fp);
> +
> +	if (rdata->retries < rdata->local_port->max_retry_count) {
> +		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n"
> +				PTR_ERR(fp), fc_rport_state(rport));
> +		rdata->retries++;
> +		/* no additional delay on exchange timeouts */
> +		if (PTR_ERR(fp) == -FC_EX_TIMEOUT)
> +			delay = 0;
> +		get_device(&rport->dev);
> +		schedule_delayed_work(&rdata->retry_work, delay);
> +		return;
> +	}
> +
> +	return fc_rport_error(rport, fp);
> +}
> +
> +/**
>   * fc_rport_plogi_recv_resp - Handle incoming ELS PLOGI response
>   * @sp: current sequence in the PLOGI exchange
>   * @fp: response frame
> @@ -490,7 +507,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp,
> struct fc_frame *fp,
>  	}
> 
>  	if (IS_ERR(fp)) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		goto err;
>  	}
> 
> @@ -522,7 +539,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp,
> struct fc_frame *fp,
>  		else
>  			fc_rport_enter_prli(rport);
>  	} else
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
> 
>  out:
>  	fc_frame_free(fp);
> @@ -552,14 +569,14 @@ static void fc_rport_enter_plogi(struct fc_rport
> *rport)
>  	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
>  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
>  	if (!fp) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		return;
>  	}
>  	rdata->e_d_tov = lport->e_d_tov;
> 
>  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI,
>  				  fc_rport_plogi_resp, rport,
lport->e_d_tov))
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  	else
>  		get_device(&rport->dev);
>  }
> @@ -599,7 +616,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp,
> struct fc_frame *fp,
>  	}
> 
>  	if (IS_ERR(fp)) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		goto err;
>  	}
> 
> @@ -657,7 +674,7 @@ static void fc_rport_logo_resp(struct fc_seq *sp,
> struct fc_frame *fp,
>  		       rport->port_id);
> 
>  	if (IS_ERR(fp)) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		goto err;
>  	}
> 
> @@ -707,13 +724,13 @@ static void fc_rport_enter_prli(struct fc_rport
> *rport)
> 
>  	fp = fc_frame_alloc(lport, sizeof(*pp));
>  	if (!fp) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		return;
>  	}
> 
>  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI,
>  				  fc_rport_prli_resp, rport,
lport->e_d_tov))
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  	else
>  		get_device(&rport->dev);
>  }
> @@ -804,13 +821,13 @@ static void fc_rport_enter_rtv(struct fc_rport
> *rport)
> 
>  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_rtv));
>  	if (!fp) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		return;
>  	}
> 
>  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV,
>  				     fc_rport_rtv_resp, rport,
lport->e_d_tov))
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  	else
>  		get_device(&rport->dev);
>  }
> @@ -835,13 +852,13 @@ static void fc_rport_enter_logo(struct fc_rport
> *rport)
> 
>  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo));
>  	if (!fp) {
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  		return;
>  	}
> 
>  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO,
>  				  fc_rport_logo_resp, rport,
lport->e_d_tov))
> -		fc_rport_error(rport, fp);
> +		fc_rport_error_retry(rport, fp);
>  	else
>  		get_device(&rport->dev);
>  }
> diff --git a/include/scsi/fc/fc_fs.h b/include/scsi/fc/fc_fs.h
> index 3e4801d..1b7af3a 100644
> --- a/include/scsi/fc/fc_fs.h
> +++ b/include/scsi/fc/fc_fs.h
> @@ -337,4 +337,9 @@ enum fc_pf_rjt_reason {
>  	FC_RJT_VENDOR =		0xff,	/* vendor specific reject */
>  };
> 
> +/* default timeout values */
> +
> +#define FC_DEF_E_D_TOV	2000UL
> +#define FC_DEF_R_A_TOV	10000UL
> +
>  #endif /* _FC_FS_H_ */
> 
> _______________________________________________
> devel mailing list
> devel at open-fcoe.org
> http://www.open-fcoe.org/mailman/listinfo/devel



More information about the devel mailing list