[Open-FCoE] [PATCH v2] libfc: rport retry on LS_RJT fromcertain ELS

Abhijeet Joglekar ajoglekar at nuovasystems.com
Wed Jan 21 02:33:43 UTC 2009


Chris, 

I am getting exchange leak, and hence a crash while unloading libfc
module after applying this patch. Are you seeing this in your tests?

I am missing a couple of libFC patches that went in last month, but a
quick glance at those patches does not show any dependency with this
specific patch. 

It's most likely a simple fix, since I don't have to do extensive link
flapping to get the exchange leak. Simply load the libFC and fnic
module, and then unload it and the leak shows up. I have multiple libFC
initiators that are doing Plogis to each other (unsuccessfully since
initiators don't support incoming plogis). Maybe the leak has to do with
these exchanges that time out.

I will spend some time debugging this, meanwhile if you are seeing the
exchange leak in your test, let me know. 

-- abhijeet


> -----Original Message-----
> From: devel-bounces at open-fcoe.org [mailto:devel-bounces at open-fcoe.org]
On
> Behalf Of Abhijeet Joglekar
> Sent: Tuesday, January 20, 2009 1:48 PM
> To: devel at open-fcoe.org
> Subject: Re: [Open-FCoE] [PATCH v2] libfc: rport retry on LS_RJT
> fromcertain ELS
> 
> Chris,
> 
> I was running into the same issue while testing with fnic. On rapid
link
> flaps, some times the plogi connection to a remote port would not
> recover.
> 
> I will apply this patch and re-test.
> 
> Thanks
> -- abhijeet
> 
> > -----Original Message-----
> > From: devel-bounces at open-fcoe.org
[mailto:devel-bounces at open-fcoe.org]
> On
> > Behalf Of Chris Leech
> > Sent: Friday, January 16, 2009 12:03 PM
> > To: devel at open-fcoe.org
> > Subject: [Open-FCoE] [PATCH v2] libfc: rport retry on LS_RJT from
> certain
> > ELS
> >
> > This allows any rport ELS to retry on LS_RJT.
> >
> > The rport error handling would only retry on resource allocation
> failures
> > and exchange timeouts.  I have a target that will occasionally
reject
> > PLOGI
> > when we do a quick LOGO/PLOGI.  When a critical ELS was rejected,
> libfc
> > would
> > fail silently leaving the rport in a dead state.
> >
> > The retry count and delay are managed by fc_rport_error_retry.  If
the
> > retry
> > count is exceeded fc_rport_error will be called.  When retrying is
not
> the
> > correct course of action, fc_rport_error can be called directly.
> >
> > Signed-off-by: Chris Leech <christopher.leech at intel.com>
> > ---
> >
> >  drivers/scsi/libfc/fc_exch.c  |    2 -
> >  drivers/scsi/libfc/fc_rport.c |  111
> ++++++++++++++++++++++++------------
> > -----
> >  include/scsi/fc/fc_fs.h       |    5 ++
> >  3 files changed, 69 insertions(+), 49 deletions(-)
> >
> > diff --git a/drivers/scsi/libfc/fc_exch.c
> b/drivers/scsi/libfc/fc_exch.c
> > index 66db08a..574cab4 100644
> > --- a/drivers/scsi/libfc/fc_exch.c
> > +++ b/drivers/scsi/libfc/fc_exch.c
> > @@ -32,8 +32,6 @@
> >  #include <scsi/libfc.h>
> >  #include <scsi/fc_encode.h>
> >
> > -#define	  FC_DEF_R_A_TOV      (10 * 1000) /* resource allocation
> > timeout */
> > -
> >  /*
> >   * fc_exch_debug can be set in debugger or at compile time to get
> more
> > logs.
> >   */
> > diff --git a/drivers/scsi/libfc/fc_rport.c
> b/drivers/scsi/libfc/fc_rport.c
> > index e780d8c..fed4d6d 100644
> > --- a/drivers/scsi/libfc/fc_rport.c
> > +++ b/drivers/scsi/libfc/fc_rport.c
> > @@ -81,6 +81,7 @@ static void fc_rport_recv_logo_req(struct fc_rport
> *,
> >  				   struct fc_seq *, struct fc_frame *);
> >  static void fc_rport_timeout(struct work_struct *);
> >  static void fc_rport_error(struct fc_rport *, struct fc_frame *);
> > +static void fc_rport_error_retry(struct fc_rport *, struct fc_frame
> *);
> >  static void fc_rport_work(struct work_struct *);
> >
> >  static const char *fc_rport_state_names[] = {
> > @@ -405,58 +406,74 @@ static void fc_rport_timeout(struct
work_struct
> > *work)
> >  }
> >
> >  /**
> > - * fc_rport_error - Handler for any errors
> > + * fc_rport_error - Error handler, called once retries have been
> > exhausted
> >   * @rport: The fc_rport object
> >   * @fp: The frame pointer
> >   *
> > - * If the error was caused by a resource allocation failure
> > - * then wait for half a second and retry, otherwise retry
> > - * immediately.
> > - *
> >   * Locking Note: The rport lock is expected to be held before
> >   * calling this routine
> >   */
> >  static void fc_rport_error(struct fc_rport *rport, struct fc_frame
> *fp)
> >  {
> >  	struct fc_rport_libfc_priv *rdata = rport->dd_data;
> > -	unsigned long delay = 0;
> >
> >  	FC_DEBUG_RPORT("Error %ld in state %s, retries %d\n",
> >  		       PTR_ERR(fp), fc_rport_state(rport),
> rdata->retries);
> >
> > -	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
> > -		/*
> > -		 * Memory allocation failure, or the exchange timed out.
> > -		 *  Retry after delay
> > -		 */
> > -		if (rdata->retries < rdata->local_port->max_retry_count)
> {
> > -			rdata->retries++;
> > -			if (!fp)
> > -				delay = msecs_to_jiffies(500);
> > -			get_device(&rport->dev);
> > -			schedule_delayed_work(&rdata->retry_work,
> delay);
> > -		} else {
> > -			switch (rdata->rp_state) {
> > -			case RPORT_ST_PLOGI:
> > -			case RPORT_ST_PRLI:
> > -			case RPORT_ST_LOGO:
> > -				rdata->event = RPORT_EV_FAILED;
> > -				queue_work(rport_event_queue,
> > -					   &rdata->event_work);
> > -				break;
> > -			case RPORT_ST_RTV:
> > -				fc_rport_enter_ready(rport);
> > -				break;
> > -			case RPORT_ST_NONE:
> > -			case RPORT_ST_READY:
> > -			case RPORT_ST_INIT:
> > -				break;
> > -			}
> > -		}
> > +	switch (rdata->rp_state) {
> > +	case RPORT_ST_PLOGI:
> > +	case RPORT_ST_PRLI:
> > +	case RPORT_ST_LOGO:
> > +		rdata->event = RPORT_EV_FAILED;
> > +		queue_work(rport_event_queue,
> > +			   &rdata->event_work);
> > +		break;
> > +	case RPORT_ST_RTV:
> > +		fc_rport_enter_ready(rport);
> > +		break;
> > +	case RPORT_ST_NONE:
> > +	case RPORT_ST_READY:
> > +	case RPORT_ST_INIT:
> > +		break;
> >  	}
> >  }
> >
> >  /**
> > + * fc_rport_error_retry - Error handler when retries are desired
> > + * @rport: The fc_rport object
> > + * @fp: The frame pointer
> > + *
> > + * If the error was an exchange timeout retry immediately,
> > + * otherwise wait for E_D_TOV.
> > + *
> > + * Locking Note: The rport lock is expected to be held before
> > + * calling this routine
> > + */
> > +static void fc_rport_error_retry(struct fc_rport *rport, struct
> fc_frame
> > *fp)
> > +{
> > +	struct fc_rport_libfc_priv *rdata = rport->dd_data;
> > +	unsigned long delay = FC_DEF_E_D_TOV;
> > +
> > +	/* make sure this isn't an FC_EX_CLOSED error, never retry those
> */
> > +	if (PTR_ERR(fp) == -FC_EX_CLOSED)
> > +		return fc_rport_error(rport, fp);
> > +
> > +	if (rdata->retries < rdata->local_port->max_retry_count) {
> > +		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n"
> > +				PTR_ERR(fp), fc_rport_state(rport));
> > +		rdata->retries++;
> > +		/* no additional delay on exchange timeouts */
> > +		if (PTR_ERR(fp) == -FC_EX_TIMEOUT)
> > +			delay = 0;
> > +		get_device(&rport->dev);
> > +		schedule_delayed_work(&rdata->retry_work, delay);
> > +		return;
> > +	}
> > +
> > +	return fc_rport_error(rport, fp);
> > +}
> > +
> > +/**
> >   * fc_rport_plogi_recv_resp - Handle incoming ELS PLOGI response
> >   * @sp: current sequence in the PLOGI exchange
> >   * @fp: response frame
> > @@ -490,7 +507,7 @@ static void fc_rport_plogi_resp(struct fc_seq
*sp,
> > struct fc_frame *fp,
> >  	}
> >
> >  	if (IS_ERR(fp)) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		goto err;
> >  	}
> >
> > @@ -522,7 +539,7 @@ static void fc_rport_plogi_resp(struct fc_seq
*sp,
> > struct fc_frame *fp,
> >  		else
> >  			fc_rport_enter_prli(rport);
> >  	} else
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >
> >  out:
> >  	fc_frame_free(fp);
> > @@ -552,14 +569,14 @@ static void fc_rport_enter_plogi(struct
fc_rport
> > *rport)
> >  	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
> >  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
> >  	if (!fp) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		return;
> >  	}
> >  	rdata->e_d_tov = lport->e_d_tov;
> >
> >  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI,
> >  				  fc_rport_plogi_resp, rport,
> lport->e_d_tov))
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  	else
> >  		get_device(&rport->dev);
> >  }
> > @@ -599,7 +616,7 @@ static void fc_rport_prli_resp(struct fc_seq
*sp,
> > struct fc_frame *fp,
> >  	}
> >
> >  	if (IS_ERR(fp)) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		goto err;
> >  	}
> >
> > @@ -657,7 +674,7 @@ static void fc_rport_logo_resp(struct fc_seq
*sp,
> > struct fc_frame *fp,
> >  		       rport->port_id);
> >
> >  	if (IS_ERR(fp)) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		goto err;
> >  	}
> >
> > @@ -707,13 +724,13 @@ static void fc_rport_enter_prli(struct
fc_rport
> > *rport)
> >
> >  	fp = fc_frame_alloc(lport, sizeof(*pp));
> >  	if (!fp) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		return;
> >  	}
> >
> >  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI,
> >  				  fc_rport_prli_resp, rport,
> lport->e_d_tov))
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  	else
> >  		get_device(&rport->dev);
> >  }
> > @@ -804,13 +821,13 @@ static void fc_rport_enter_rtv(struct fc_rport
> > *rport)
> >
> >  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_rtv));
> >  	if (!fp) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		return;
> >  	}
> >
> >  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV,
> >  				     fc_rport_rtv_resp, rport,
> lport->e_d_tov))
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  	else
> >  		get_device(&rport->dev);
> >  }
> > @@ -835,13 +852,13 @@ static void fc_rport_enter_logo(struct
fc_rport
> > *rport)
> >
> >  	fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo));
> >  	if (!fp) {
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  		return;
> >  	}
> >
> >  	if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO,
> >  				  fc_rport_logo_resp, rport,
> lport->e_d_tov))
> > -		fc_rport_error(rport, fp);
> > +		fc_rport_error_retry(rport, fp);
> >  	else
> >  		get_device(&rport->dev);
> >  }
> > diff --git a/include/scsi/fc/fc_fs.h b/include/scsi/fc/fc_fs.h
> > index 3e4801d..1b7af3a 100644
> > --- a/include/scsi/fc/fc_fs.h
> > +++ b/include/scsi/fc/fc_fs.h
> > @@ -337,4 +337,9 @@ enum fc_pf_rjt_reason {
> >  	FC_RJT_VENDOR =		0xff,	/* vendor specific reject */
> >  };
> >
> > +/* default timeout values */
> > +
> > +#define FC_DEF_E_D_TOV	2000UL
> > +#define FC_DEF_R_A_TOV	10000UL
> > +
> >  #endif /* _FC_FS_H_ */
> >
> > _______________________________________________
> > devel mailing list
> > devel at open-fcoe.org
> > http://www.open-fcoe.org/mailman/listinfo/devel
> _______________________________________________
> devel mailing list
> devel at open-fcoe.org
> http://www.open-fcoe.org/mailman/listinfo/devel



More information about the devel mailing list