[Open-FCoE] [PATCH V5 15/16] fcoe: Fix module ref count bug by adding NETDEV UNREGISTER handling

John Fastabend john.r.fastabend at intel.com
Mon Jul 27 14:12:33 UTC 2009


Chris Leech wrote:
> Fixes reference counting on fcoe_instance and net_device, and adds
> NETDEV_UNREGISTER notifier handling so that you can unload network drivers.
> FCoE no longer increments the module use count for the network driver.
>
> On an NETDEV_UNREGISTER event, destroying the FCoE instance is deferred to a
> workqueue context to avoid RTNL deadlocks.
>
> Based in part by an earlier patch from John Fastabend
>
> John's patch description:
> Currently, the netdev module ref count is not decremented with module_put()
> when the module is unloaded while fcoe instances are present. To fix this
> removed reference count on netdev module completely and added functionality to
> netdev event handling for NETDEV_UNREGISTER events.
>
> This allows fcoe to remove devices cleanly when the netdev module is unloaded
> so we no longer need to hold a reference count for the netdev module.
>
> Signed-off-by: Chris Leech <christopher.leech at intel.com>
> ---
>
>  drivers/scsi/fcoe/fcoe.c |  175 +++++++++++++++-------------------------------
>  drivers/scsi/fcoe/fcoe.h |    2 +
>  2 files changed, 58 insertions(+), 119 deletions(-)
>
> diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
> index 2330403..290baaa 100644
> --- a/drivers/scsi/fcoe/fcoe.c
> +++ b/drivers/scsi/fcoe/fcoe.c
> @@ -74,12 +74,13 @@ static int fcoe_link_ok(struct fc_lport *lp);
>
>  static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
>  static int fcoe_hostlist_add(const struct fc_lport *);
> -static int fcoe_hostlist_remove(const struct fc_lport *);
>
>  static void fcoe_check_wait_queue(struct fc_lport *, struct sk_buff *);
>  static int fcoe_device_notification(struct notifier_block *, ulong, void *);
>  static void fcoe_dev_setup(void);
>  static void fcoe_dev_cleanup(void);
> +static struct fcoe_interface *
> +       fcoe_hostlist_lookup_port(const struct net_device *dev);
>
>  /* notification function from net device */
>  static struct notifier_block fcoe_notifier = {
> @@ -216,6 +217,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
>
>  static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb);
>  static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new);
> +static void fcoe_destroy_work(struct work_struct *work);
>
>  /**
>   * fcoe_interface_create()
> @@ -233,6 +235,7 @@ static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev)
>                 return NULL;
>         }
>
> +       dev_hold(netdev);
>         kref_init(&fcoe->kref);
>
>         /*
> @@ -289,10 +292,13 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
>  static void fcoe_interface_release(struct kref *kref)
>  {
>         struct fcoe_interface *fcoe;
> +       struct net_device *netdev;
>
>         fcoe = container_of(kref, struct fcoe_interface, kref);
> +       netdev = fcoe->netdev;
>         fcoe_interface_cleanup(fcoe);
>         kfree(fcoe);
> +       dev_put(netdev);
>  }
>
>  /**
> @@ -643,8 +649,7 @@ static void fcoe_if_destroy(struct fc_lport *lport)
>         /* Free memory used by statistical counters */
>         fc_lport_free_stats(lport);
>
> -       /* Release the net_device and Scsi_Host */
> -       dev_put(netdev);
> +       /* Release the Scsi_Host */
>         scsi_host_put(lport->host);
>  }
>
> @@ -719,7 +724,9 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
>         }
>         lport = shost_priv(shost);
>         port = lport_priv(lport);
> +       port->lport = lport;
>         port->fcoe = fcoe;
> +       INIT_WORK(&port->destroy_work, fcoe_destroy_work);
>
>         /* configure fc_lport, e.g., em */
>         rc = fcoe_lport_config(lport);
> @@ -770,7 +777,6 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
>                 goto out_lp_destroy;
>         }
>
> -       dev_hold(netdev);
>         fcoe_interface_get(fcoe);
>         return lport;
>
> @@ -1532,19 +1538,19 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>         struct fc_lport *lp = NULL;
>         struct net_device *netdev = ptr;
>         struct fcoe_interface *fcoe;
> +       struct fcoe_port *port;
>         struct fcoe_dev_stats *stats;
>         u32 link_possible = 1;
>         u32 mfs;
>         int rc = NOTIFY_OK;
>
> -       read_lock(&fcoe_hostlist_lock);
> +       write_lock(&fcoe_hostlist_lock);
>         list_for_each_entry(fcoe, &fcoe_hostlist, list) {
>                 if (fcoe->netdev == netdev) {
>                         lp = fcoe->ctlr.lp;
>                         break;
>                 }
>         }
> -       read_unlock(&fcoe_hostlist_lock);
>         if (lp == NULL) {
>                 rc = NOTIFY_DONE;
>                 goto out;
> @@ -1566,6 +1572,12 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>                 break;
>         case NETDEV_REGISTER:
>                 break;
> +       case NETDEV_UNREGISTER:
> +               list_del(&fcoe->list);
> +               port = lport_priv(fcoe->ctlr.lp);
> +               schedule_work(&port->destroy_work);
> +               goto out;
> +               break;
>         default:
>                 FCOE_NETDEV_DBG(netdev, "Unknown event %ld "
>                                 "from netdev netlink\n", event);
> @@ -1578,6 +1590,7 @@ static int fcoe_device_notification(struct notifier_block *notifier,
>                 fcoe_clean_pending_queue(lp);
>         }
>  out:
> +       write_unlock(&fcoe_hostlist_lock);
>         return rc;
>  }
>
> @@ -1603,75 +1616,6 @@ static struct net_device *fcoe_if_to_netdev(const char *buffer)
>  }
>
>  /**
> - * fcoe_netdev_to_module_owner() - finds out the driver module of the netdev
> - * @netdev: the target netdev
> - *
> - * Returns: ptr to the struct module, NULL for failure
> - */
> -static struct module *
> -fcoe_netdev_to_module_owner(const struct net_device *netdev)
> -{
> -       struct device *dev;
> -
> -       if (!netdev)
> -               return NULL;
> -
> -       dev = netdev->dev.parent;
> -       if (!dev)
> -               return NULL;
> -
> -       if (!dev->driver)
> -               return NULL;
> -
> -       return dev->driver->owner;
> -}
> -
> -/**
> - * fcoe_ethdrv_get() - Hold the Ethernet driver
> - * @netdev: the target netdev
> - *
> - * Holds the Ethernet driver module by try_module_get() for
> - * the corresponding netdev.
> - *
> - * Returns: 0 for success
> - */
> -static int fcoe_ethdrv_get(const struct net_device *netdev)
> -{
> -       struct module *owner;
> -
> -       owner = fcoe_netdev_to_module_owner(netdev);
> -       if (owner) {
> -               FCOE_NETDEV_DBG(netdev, "Hold driver module %s\n",
> -                               module_name(owner));
> -               return  try_module_get(owner);
> -       }
> -       return -ENODEV;
> -}
> -
> -/**
> - * fcoe_ethdrv_put() - Release the Ethernet driver
> - * @netdev: the target netdev
> - *
> - * Releases the Ethernet driver module by module_put for
> - * the corresponding netdev.
> - *
> - * Returns: 0 for success
> - */
> -static int fcoe_ethdrv_put(const struct net_device *netdev)
> -{
> -       struct module *owner;
> -
> -       owner = fcoe_netdev_to_module_owner(netdev);
> -       if (owner) {
> -               FCOE_NETDEV_DBG(netdev, "Release driver module %s\n",
> -                               module_name(owner));
> -               module_put(owner);
> -               return 0;
> -       }
> -       return -ENODEV;
> -}
> -
> -/**
>   * fcoe_destroy() - handles the destroy from sysfs
>   * @buffer: expected to be an eth if name
>   * @kp: associated kernel param
> @@ -1680,10 +1624,8 @@ static int fcoe_ethdrv_put(const struct net_device *netdev)
>   */
>  static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
>  {
> -       struct net_device *netdev;
>         struct fcoe_interface *fcoe;
> -       struct fcoe_port *port;
> -       struct fc_lport *lport;
> +       struct net_device *netdev;
>         int rc;
>
>         mutex_lock(&fcoe_config_mutex);
> @@ -1704,26 +1646,33 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
>                 rc = -ENODEV;
>                 goto out_nodev;
>         }
> -       /* look for existing lport */
> -       lport = fcoe_hostlist_lookup(netdev);
> -       if (!lport) {
> +
> +       write_lock(&fcoe_hostlist_lock);
> +       fcoe = fcoe_hostlist_lookup_port(netdev);
> +       if (!fcoe) {
> +               write_unlock(&fcoe_hostlist_lock);
>                 rc = -ENODEV;
> -               goto out_putdev;
> +               goto out_nodev;
>         }
> -       /* Remove the instance from fcoe's list */
> -       fcoe_hostlist_remove(lport);
> -       port = lport_priv(lport);
> -       fcoe = port->fcoe;
> -       fcoe_if_destroy(lport);
> -       fcoe_ethdrv_put(netdev);
> -       rc = 0;
> -out_putdev:
> +       list_del(&fcoe->list);
> +       write_unlock(&fcoe_hostlist_lock);
> +       fcoe_if_destroy(fcoe->ctlr.lp);
>         dev_put(netdev);
>  out_nodev:
>         mutex_unlock(&fcoe_config_mutex);
>         return rc;
>  }
>   
If fcoe_hostlist_lookup_port() fails then dev_put() needs to be called. 
Removing out_putdev breaks this and the netdev reference count is wrong. 
The failure occurs when attempting to delete an fcoe interface that has 
not been created. After this occurs trying to remove the netdevice will 
hang.

Also there is a memory leak with dev_addr_delete. If dev_addr_delete is 
called after the netdevice has flushed the dev_addr_list (which is done 
after the NETDEV_UNREGISTER notifiers return) calling dev_addr_delete 
does not free memory correctly. If the work scheduled by the netdev 
unregister handling happens after the dev_addr_list is flushed this 
could occur. Testing this gave the error "__dev_addr_discard: address 
leakage!" which seems to indicate this is happening.

thanks,
john.






More information about the devel mailing list