[Open-FCoE] [RESUBMIT PATCH] libfc, fcoe: fixed locking issues with lport->lp_mutex around lport->link_status

Joe Eykholt jre at nuovasystems.com
Fri Jan 16 03:18:03 UTC 2009


Vasu Dev wrote:
> The fcoe_xmit could call fc_pause in case the pending skb queue len is larger
> than FCOE_MAX_QUEUE_DEPTH, the fc_pause was trying to grab lport->lp_muex to
> change lport->link_status and that had these issues :-
> 
> 1. The fcoe_xmit was getting called with bh disabled, thus causing
> "BUG: scheduling while atomic" when grabbing lport->lp_muex with bh disabled.
> 
> 2. fc_linkup and fc_linkdown function calls lport_enter function with
> lport->lp_mutex held and these enter function in turn calls fcoe_xmit to send
> lport related FC frame, e.g. fc_linkup => fc_lport_enter_flogi to send flogi
> req. In this case grabbing the same lport->lp_mutex again in fc_puase from
> fcoe_xmit would cause deadlock.
> 
> The lport->lp_mutex was used for setting FC_PAUSE in fcoe_xmit path but
> FC_PAUSE bit was not used anywhere beside just setting and clear this
> bit in lport->link_status, instead used a separate field link_pause in fc_lport
> to eliminate need for lport->lp_mutex to track this pause and in turn avoid
> above described two locking issues.
> 
> Also added check for lp->link_pause in fc_fcp_lport_queue_ready to trigger
> SCSI_MLQUEUE_HOST_BUSY when lp->link_pause is set, this should prevent more
> scsi-ml cmds while lp->link_pause is set.
> 
> This patch eliminated FC_LINK_UP and FC_PAUSE and instead used dedicated bit
> fields for these bits in fc_lport, this simplified all related conditional
> code.
> 
> Signed-off-by: Vasu Dev <vasu.dev at intel.com>
> ---
> 
>  drivers/scsi/fcoe/fcoe_sw.c   |    6 +++---
>  drivers/scsi/fcoe/libfcoe.c   |   18 +++++++++---------
>  drivers/scsi/libfc/fc_fcp.c   |    4 ++--
>  drivers/scsi/libfc/fc_lport.c |   20 ++++++++------------
>  include/scsi/libfc.h          |    6 ++----
>  5 files changed, 24 insertions(+), 30 deletions(-)
> 
> 
> diff --git a/drivers/scsi/fcoe/fcoe_sw.c b/drivers/scsi/fcoe/fcoe_sw.c
> index dc4cd5e..09661f0 100644
> --- a/drivers/scsi/fcoe/fcoe_sw.c
> +++ b/drivers/scsi/fcoe/fcoe_sw.c
> @@ -116,7 +116,8 @@ static int fcoe_sw_lport_config(struct fc_lport *lp)
>  {
>  	int i = 0;
>  
> -	lp->link_status = 0;
> +	lp->link_up = 0;
> +	lp->link_pause = 0;
>  	lp->max_retry_count = 3;
>  	lp->e_d_tov = 2 * 1000;	/* FC-FS default */
>  	lp->r_a_tov = 2 * 2 * 1000;
> @@ -181,9 +182,8 @@ static int fcoe_sw_netdev_config(struct fc_lport *lp, struct net_device *netdev)
>  	if (fc_set_mfs(lp, mfs))
>  		return -EINVAL;
>  
> -	lp->link_status = ~FC_PAUSE & ~FC_LINK_UP;
>  	if (!fcoe_link_ok(lp))
> -		lp->link_status |= FC_LINK_UP;
> +		lp->link_up = 1;
>  
>  	/* offload features support */
>  	if (fc->real_dev->features & NETIF_F_SG)
> diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
> index e419f48..f5908fc 100644
> --- a/drivers/scsi/fcoe/libfcoe.c
> +++ b/drivers/scsi/fcoe/libfcoe.c
> @@ -873,7 +873,7 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  	struct net_device *real_dev = ptr;
>  	struct fcoe_softc *fc;
>  	struct fcoe_dev_stats *stats;
> -	u16 new_status;
> +	u32 new_link_up;
>  	u32 mfs;
>  	int rc = NOTIFY_OK;
>  
> @@ -890,17 +890,17 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  		goto out;
>  	}
>  
> -	new_status = lp->link_status;
> +	new_link_up = lp->link_up;
>  	switch (event) {
>  	case NETDEV_DOWN:
>  	case NETDEV_GOING_DOWN:
> -		new_status &= ~FC_LINK_UP;
> +		new_link_up = 0;
>  		break;
>  	case NETDEV_UP:
>  	case NETDEV_CHANGE:
> -		new_status &= ~FC_LINK_UP;
> +		new_link_up = 0;
>  		if (!fcoe_link_ok(lp))
> -			new_status |= FC_LINK_UP;
> +			new_link_up = 1;

We could say:
		new_link_up = !fcoe_link_ok(lp);

But it is OK the way you have it.

I think we should change the name of fcoe_link_ok, since it returns 0 when the
link is good.  I'd call it fcoe_link_bad(), or have it return 1 when the link is good.

>  		break;
>  	case NETDEV_CHANGEMTU:
>  		mfs = fc->real_dev->mtu -
> @@ -908,17 +908,17 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  			 sizeof(struct fcoe_crc_eof));
>  		if (mfs >= FC_MIN_MAX_FRAME)
>  			fc_set_mfs(lp, mfs);
> -		new_status &= ~FC_LINK_UP;
> +		new_link_up = 0;
>  		if (!fcoe_link_ok(lp))
> -			new_status |= FC_LINK_UP;
> +			new_link_up = 1;
>  		break;
>  	case NETDEV_REGISTER:
>  		break;
>  	default:
>  		FC_DBG("unknown event %ld call", event);
>  	}
> -	if (lp->link_status != new_status) {
> -		if ((new_status & FC_LINK_UP) == FC_LINK_UP)
> +	if (lp->link_up != new_link_up) {
> +		if (new_link_up)
>  			fc_linkup(lp);
>  		else {
>  			stats = lp->dev_stats[smp_processor_id()];
> diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
> index 404e63f..c35a1ac 100644
> --- a/drivers/scsi/libfc/fc_fcp.c
> +++ b/drivers/scsi/libfc/fc_fcp.c
> @@ -1621,7 +1621,7 @@ out:
>  static inline int fc_fcp_lport_queue_ready(struct fc_lport *lp)
>  {
>  	/* lock ? */
> -	return (lp->state == LPORT_ST_READY) && (lp->link_status & FC_LINK_UP);
> +	return (lp->state == LPORT_ST_READY) && lp->link_up && !lp->link_pause;
>  }
>  
>  /**
> @@ -1890,7 +1890,7 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd)
>  	lp = shost_priv(sc_cmd->device->host);
>  	if (lp->state != LPORT_ST_READY)
>  		return rc;
> -	else if (!(lp->link_status & FC_LINK_UP))
> +	else if (!lp->link_up)
>  		return rc;
>  
>  	spin_lock_irqsave(lp->host->host_lock, flags);
> diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
> index 5db223c..f383639 100644
> --- a/drivers/scsi/libfc/fc_lport.c
> +++ b/drivers/scsi/libfc/fc_lport.c
> @@ -250,7 +250,7 @@ void fc_get_host_port_state(struct Scsi_Host *shost)
>  {
>  	struct fc_lport *lp = shost_priv(shost);
>  
> -	if ((lp->link_status & FC_LINK_UP) == FC_LINK_UP)
> +	if (lp->link_up)
>  		fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
>  	else
>  		fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
> @@ -577,8 +577,8 @@ void fc_linkup(struct fc_lport *lport)
>  		       fc_host_port_id(lport->host));
>  
>  	mutex_lock(&lport->lp_mutex);
> -	if ((lport->link_status & FC_LINK_UP) != FC_LINK_UP) {
> -		lport->link_status |= FC_LINK_UP;
> +	if (!lport->link_up) {
> +		lport->link_up = 1;
 >  		if (lport->state == LPORT_ST_RESET)
>  			fc_lport_enter_flogi(lport);
> @@ -597,8 +597,8 @@ void fc_linkdown(struct fc_lport *lport)
>  	FC_DEBUG_LPORT("Link is down for port (%6x)\n",
>  		       fc_host_port_id(lport->host));
>  
> -	if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP) {
> -		lport->link_status &= ~(FC_LINK_UP);
> +	if (lport->link_up) {
> +		lport->link_up = 0;
>  		fc_lport_enter_reset(lport);
>  		lport->tt.fcp_cleanup(lport);
>  	}
> @@ -612,9 +612,7 @@ EXPORT_SYMBOL(fc_linkdown);
>   */
>  void fc_pause(struct fc_lport *lport)
>  {
> -	mutex_lock(&lport->lp_mutex);
> -	lport->link_status |= FC_PAUSE;
> -	mutex_unlock(&lport->lp_mutex);
> +	lport->link_pause = 1;
>  }
>  EXPORT_SYMBOL(fc_pause);
>  
> @@ -624,9 +622,7 @@ EXPORT_SYMBOL(fc_pause);
>   */
>  void fc_unpause(struct fc_lport *lport)
>  {
> -	mutex_lock(&lport->lp_mutex);
> -	lport->link_status &= ~(FC_PAUSE);
> -	mutex_unlock(&lport->lp_mutex);
> +	lport->link_pause = 0;
>  }
>  EXPORT_SYMBOL(fc_unpause);
>  
> @@ -977,7 +973,7 @@ static void fc_lport_enter_reset(struct fc_lport *lport)
>  	fc_host_fabric_name(lport->host) = 0;
>  	fc_host_port_id(lport->host) = 0;
>  
> -	if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP)
> +	if (lport->link_up)
>  		fc_lport_enter_flogi(lport);
>  }
>  
> diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
> index 042f4ad..4732846 100644
> --- a/include/scsi/libfc.h
> +++ b/include/scsi/libfc.h
> @@ -68,9 +68,6 @@
>  /*
>   * FC HBA status
>   */
> -#define FC_PAUSE		    (1 << 1)
> -#define FC_LINK_UP		    (1 << 0)
> -
>  enum fc_lport_state {
>  	LPORT_ST_NONE = 0,
>  	LPORT_ST_FLOGI,
> @@ -603,7 +600,8 @@ struct fc_lport {
>  
>  	/* Operational Information */
>  	struct libfc_function_template tt;
> -	u16			link_status;
> +	u32			link_up:1;
> +	u32			link_pause:1;

We can't use bit fields, because on some architectures to set a bit field requires a
read/modify/write, so we would need a lock that was in common over all of the bits.
It's better to just use two u8 fields here.

	u8 link_up;
	u8 link_pause;

Umm. Also, it's not really pause, and it isn't a link condition.
It's back-pressure, either due to pause or
because the NIC can't send as fast as we would like (it's rings are full).
So, could we rename this as 'backpressure' ... maybe later.  At that
point we could also remove the functions to pause/unpause since they're so trivial.

So just eliminate the bit fields and it looks fine to me.

>  	enum fc_lport_state	state;
>  	unsigned long		boot_time;

	Regards,
	Joe



More information about the devel mailing list