[Open-FCoE] [RFC PATCH V4 15/16] fcoe: Fix module ref count bug by adding NETDEV UNREGISTER handling

John Fastabend john.r.fastabend at intel.com
Thu Jul 23 14:50:11 UTC 2009


Chris Leech wrote:
> Fixes reference counting on fcoe_instance and net_device, and adds
> NETDEV_UNREGISTER notifier handling so that you can unload network drivers.
> FCoE no longer increments the module use count for the network driver.
>
> On an NETDEV_UNREGISTER event, destroying the FCoE instance is deferred to a
> workqueue context to avoid RTNL deadlocks.
>
> Based in part by an earlier patch from John Fastabend
>
> John's patch description:
> Currently, the netdev module ref count is not decremented with module_put()
> when the module is unloaded while fcoe instances are present. To fix this
> removed reference count on netdev module completely and added functionality to
> netdev event handling for NETDEV_UNREGISTER events.
>
> This allows fcoe to remove devices cleanly when the netdev module is unloaded
> so we no longer need to hold a reference count for the netdev module.
>
> CC: John Fastabend <john.r.fastabend at intel.com>
>
> Signed-off-by: Chris Leech <christopher.leech at intel.com>
> ---
>
>  drivers/scsi/fcoe/fcoe.c |  170 +++++++++++++++-------------------------------
>  drivers/scsi/fcoe/fcoe.h |    1 
>  2 files changed, 55 insertions(+), 116 deletions(-)
>
> diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
> index ab4d5e8..b0a7a8e 100644
> --- a/drivers/scsi/fcoe/fcoe.c
> +++ b/drivers/scsi/fcoe/fcoe.c
> @@ -68,12 +68,13 @@ static int fcoe_link_ok(struct fc_lport *lp);
>  
>  static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
>  static int fcoe_hostlist_add(const struct fc_lport *);
> -static int fcoe_hostlist_remove(const struct fc_lport *);
>  
>  static void fcoe_check_wait_queue(struct fc_lport *, struct sk_buff *);
>  static int fcoe_device_notification(struct notifier_block *, ulong, void *);
>  static void fcoe_dev_setup(void);
>  static void fcoe_dev_cleanup(void);
> +static struct fcoe_interface *
> +	fcoe_hostlist_lookup_port(const struct net_device *dev);
>  
>  /* notification function from net device */
>  static struct notifier_block fcoe_notifier = {
> @@ -215,6 +216,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
>  
>  static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb);
>  static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new);
> +static void fcoe_interface_destroy_work(struct work_struct *work);
>  
>  /**
>   * fcoe_interface_create()
> @@ -232,7 +234,9 @@ static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev)
>  		return NULL;
>  	}
>  
> +	dev_hold(netdev);
>  	kref_init(&fcoe->kref);
> +	INIT_WORK(&fcoe->destroy_work, fcoe_interface_destroy_work);
>  
>  	/*
>  	 * Initialize FIP.
> @@ -288,10 +292,13 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
>  static void fcoe_interface_release(struct kref *kref)
>  {
>  	struct fcoe_interface *fcoe;
> +	struct net_device *netdev;
>  
>  	fcoe = container_of(kref, struct fcoe_interface, kref);
> +	netdev = fcoe->real_dev;
>  	fcoe_interface_cleanup(fcoe);
>  	kfree(fcoe);
> +	dev_put(netdev);
>  }
>  
>  /**
> @@ -630,8 +637,7 @@ static void fcoe_if_destroy(struct fc_lport *lport)
>  	/* Free memory used by statistical counters */
>  	fc_lport_free_stats(lport);
>  
> -	/* Release the net_device and Scsi_Host */
> -	dev_put(fcoe->real_dev);
> +	/* Release the Scsi_Host */
>  	scsi_host_put(lport->host);
>  }
>  
> @@ -757,7 +763,6 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
>  		goto out_lp_destroy;
>  	}
>  
> -	dev_hold(netdev);
>  	fcoe_interface_get(fcoe);
>  	return lport;
>  
> @@ -1524,14 +1529,13 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  	u32 mfs;
>  	int rc = NOTIFY_OK;
>  
> -	read_lock(&fcoe_hostlist_lock);
> +	write_lock(&fcoe_hostlist_lock);
>  	list_for_each_entry(fcoe, &fcoe_hostlist, list) {
>  		if (fcoe->real_dev == real_dev) {
>  			lp = fcoe->ctlr.lp;
>  			break;
>  		}
>  	}
> -	read_unlock(&fcoe_hostlist_lock);
>  	if (lp == NULL) {
>  		rc = NOTIFY_DONE;
>  		goto out;
> @@ -1554,6 +1558,11 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  		break;
>  	case NETDEV_REGISTER:
>  		break;
> +	case NETDEV_UNREGISTER:
> +		list_del(&fcoe->list);
> +		schedule_work(&fcoe->destroy_work);
> +		goto out;
> +		break;
>  	default:
>  		FCOE_NETDEV_DBG(real_dev, "Unknown event %ld "
>  				"from netdev netlink\n", event);
> @@ -1566,6 +1575,7 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>  		fcoe_clean_pending_queue(lp);
>  	}
>  out:
> +	write_unlock(&fcoe_hostlist_lock);
>  	return rc;
>  }
>  
> @@ -1590,73 +1600,10 @@ static struct net_device *fcoe_if_to_netdev(const char *buffer)
>  	return NULL;
>  }
>  
> -/**
> - * fcoe_netdev_to_module_owner() - finds out the driver module of the netdev
> - * @netdev: the target netdev
> - *
> - * Returns: ptr to the struct module, NULL for failure
> - */
> -static struct module *
> -fcoe_netdev_to_module_owner(const struct net_device *netdev)
> +static void __fcoe_destroy(struct fcoe_interface *fcoe)
>  {
> -	struct device *dev;
> -
> -	if (!netdev)
> -		return NULL;
> -
> -	dev = netdev->dev.parent;
> -	if (!dev)
> -		return NULL;
> -
> -	if (!dev->driver)
> -		return NULL;
> -
> -	return dev->driver->owner;
> -}
> -
> -/**
> - * fcoe_ethdrv_get() - Hold the Ethernet driver
> - * @netdev: the target netdev
> - *
> - * Holds the Ethernet driver module by try_module_get() for
> - * the corresponding netdev.
> - *
> - * Returns: 0 for success
> - */
> -static int fcoe_ethdrv_get(const struct net_device *netdev)
> -{
> -	struct module *owner;
> -
> -	owner = fcoe_netdev_to_module_owner(netdev);
> -	if (owner) {
> -		FCOE_NETDEV_DBG(netdev, "Hold driver module %s\n",
> -				module_name(owner));
> -		return  try_module_get(owner);
> -	}
> -	return -ENODEV;
> -}
> -
> -/**
> - * fcoe_ethdrv_put() - Release the Ethernet driver
> - * @netdev: the target netdev
> - *
> - * Releases the Ethernet driver module by module_put for
> - * the corresponding netdev.
> - *
> - * Returns: 0 for success
> - */
> -static int fcoe_ethdrv_put(const struct net_device *netdev)
> -{
> -	struct module *owner;
> -
> -	owner = fcoe_netdev_to_module_owner(netdev);
> -	if (owner) {
> -		FCOE_NETDEV_DBG(netdev, "Release driver module %s\n",
> -				module_name(owner));
> -		module_put(owner);
> -		return 0;
> -	}
> -	return -ENODEV;
> +	struct fc_lport *lport = fcoe->ctlr.lp;
> +	fcoe_if_destroy(lport);
>  }
>  
>  /**
> @@ -1668,10 +1615,8 @@ static int fcoe_ethdrv_put(const struct net_device *netdev)
>   */
>  static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
>  {
> -	struct net_device *netdev;
>  	struct fcoe_interface *fcoe;
> -	struct fcoe_port *port;
> -	struct fc_lport *lport;
> +	struct net_device *netdev;
>  	int rc;
>  
>  	mutex_lock(&fcoe_create_mutex);
> @@ -1692,26 +1637,33 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
>  		rc = -ENODEV;
>  		goto out_nodev;
>  	}
> -	/* look for existing lport */
> -	lport = fcoe_hostlist_lookup(netdev);
> -	if (!lport) {
> +
> +	write_lock(&fcoe_hostlist_lock);
> +	fcoe = fcoe_hostlist_lookup_port(netdev);
> +	if (!fcoe) {
> +		write_unlock(&fcoe_hostlist_lock);
>  		rc = -ENODEV;
> -		goto out_putdev;
> +		goto out_nodev;
>   
By going to out_nodev, dev_put() never gets called as it should.

>  	}
> -	/* Remove the instance from fcoe's list */
> -	fcoe_hostlist_remove(lport);
> -	port = lport_priv(lport);
> -	fcoe = port->fcoe;
> -	fcoe_if_destroy(lport);
> -	fcoe_ethdrv_put(netdev);
> -	rc = 0;
> -out_putdev:
> +	list_del(&fcoe->list);
> +	write_unlock(&fcoe_hostlist_lock);
> +	__fcoe_destroy(fcoe);
>  	dev_put(netdev);
>  out_nodev:
>  	mutex_unlock(&fcoe_create_mutex);
>  	return rc;
>  }
>   
<-- snip -->

Chris,

Why did you remove 'goto out_putdev'? If 'fcoe = 
fcoe_hostlist_lookup_port(netdev);' fails you will need to decrement the 
netdev reference which was incremented by fcoe_netdev_if_to_ndetdev(). 
To see the issue try this, 'modprobe fcoe; fcoeadm -d ethx; rmmod ixgbe' 
note ixgbe is waiting for ethx to become free.

Thanks,
John.





More information about the devel mailing list