[Open-FCoE] [RFC PATCH 1/2] fcoe, libfc: adds per cpu exches pool within an exchange manager(EM)

Vasu Dev vasu.dev at intel.com
Tue Jul 7 17:14:05 UTC 2009


Adds per cpu exches pool for these reasons:-

 1. Currently an EM instance is shared across all cpus to manage
    all exches for all cpus. This required em_lock across all cpus
    for an exch alloc, free, lookup and reset for each IO or its
    frame and that made em_lock expensive, so instead having per
    cpu exches pool with their own per cpu pool lock will likely
    reduce locking contention in fast path for an exch alloc,
    free and lookup.

 2. Per cpu exches pool will likely improve cache hit ratio since
    all frames of an exch will be processed on the same cpu on
    which exch was originated, next patch will do this change
    using added per cpu exches pool.

The EM exch id range is equally divided across all cpus to allocate
exches pool per cpu. The exch id space is divided such that lower bits
of exch id carries cpu number on which exch was originated, this will
help in easily directing all frames of an exch to same cpu on which
exch was originated by simple bitwise AND operation between exch id
of a incoming frame and cpu_mask. This required cpu_mask and cpu_order
updated to max possible cpus nr_cpu_ids rounded up to 2's power and
later used in next patch in mapping between exch id and exch ptr
index in exches pool array.

Increased default FCOE_MAX_XID to 0x1000 from 0x07EF to make more exch
id available per cpu after above described exch id range division
across all cpus.

This patch sets up per cpu exches pool struct fc_exches_pool with
all required fields to manage exches in pool as prep work to next
patch fully making use exches pool and removing em_lock.

RFC notes:-

 1. AFAIU these change should not break fnic but to be sure, testing
    is required with fnic. I'd appreciate if someone with fnic could
    test these patches with fnic. I think fnic will also need
    additional changes to fully leverage exches pool implementation.

 2. These patches works fine when offloads are not in use but with
    offload the large size IOs > 1M writes causing BUG_ON in kfree
    called from ixgbe driver. I'm debugging this bug and in the
    meanwhile I'd appreciate any review comments on these patches.

These patches are based on fcoe-next tree top commit:-

 commit a92e688d433a84a689604ed6ec3e2c787271781d
 Author: Vasu Dev <vasu.dev at intel.com>
 Date:   Wed Jun 24 21:15:45 2009 -0700
 fcoe, libfc: adds offload EM per eth device with only single xid range per
EM

Signed-off-by: Vasu Dev <vasu.dev at intel.com>
---

 drivers/scsi/fcoe/fcoe.h      |    2 +-
 drivers/scsi/libfc/fc_exch.c  |   42 +++++++++++++++++++++++++++++++++++++++--
 drivers/scsi/libfc/fc_lport.c |   23 ++++++++++++++++++++++
 include/scsi/libfc.h          |    2 ++
 4 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h
index 6905efc..5ef7556 100644
--- a/drivers/scsi/fcoe/fcoe.h
+++ b/drivers/scsi/fcoe/fcoe.h
@@ -38,7 +38,7 @@
 #define FCOE_MAX_OUTSTANDING_COMMANDS	1024
 
 #define FCOE_MIN_XID		0x0000	/* the min xid supported by fcoe_sw */
-#define FCOE_MAX_XID		0x07ef	/* the max xid supported by fcoe_sw */
+#define FCOE_MAX_XID		0x1000	/* the max xid supported by fcoe_sw */
 
 unsigned int fcoe_debug_logging;
 module_param_named(debug_logging, fcoe_debug_logging, int, S_IRUGO|S_IWUSR);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index f42695e..609590d 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -47,6 +47,14 @@ static struct kmem_cache *fc_em_cachep;        /* cache for exchanges */
  * fc_seq holds the state for an individual sequence.
  */
 
+struct fc_exches_pool {
+	u16		next_xid;	/* next possible free exchange ID */
+	spinlock_t	lock;		/* exches pool lock */
+	struct list_head	ex_list;	/* allocated exchanges list */
+	u32		total_exches;	/* total allocated exchanges */
+	struct fc_exch **exches;	/* for exch pointers indexed by xid */
+};
+
 /*
  * Exchange manager.
  *
@@ -66,6 +74,8 @@ struct fc_exch_mgr {
 	u32	total_exches;		/* total allocated exchanges */
 	struct list_head	ex_list;	/* allocated exchanges list */
 	mempool_t	*ep_pool;	/* reserve ep's */
+	u16		pool_max_xid;	/* max exchange id per pool */
+	struct fc_exches_pool *pool;	/* per cpu exches pool */
 
 	/*
 	 * currently exchange mgr stats are updated but not used.
@@ -1742,6 +1752,7 @@ static void fc_exch_mgr_destroy(struct kref *kref)
 	 */
 	WARN_ON(mp->total_exches != 0);
 	mempool_destroy(mp->ep_pool);
+	free_percpu(mp->pool);
 	kfree(mp);
 }
 
@@ -1761,6 +1772,10 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 {
 	struct fc_exch_mgr *mp;
 	size_t len;
+	u16 pool_xid;
+	size_t psize;
+	unsigned int cpu;
+	struct fc_exches_pool *pool;
 
 	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN) {
 		FC_LPORT_DBG(lp, "Invalid min_xid 0x:%x and max_xid 0x:%x\n",
@@ -1793,10 +1808,31 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	if (!mp->ep_pool)
 		goto free_mp;
 
+	/*
+	 * Setup per cpu exches pool with entire exchange
+	 * id range equally divided across all cpus.
+	 */
+	pool_xid = (mp->max_xid - mp->min_xid + 1) / (lp->cpu_mask + 1);
+	mp->pool_max_xid = pool_xid - 1;
+
+	/*
+	 * Allocate and initialize per cpu exches pool
+	 */
+	psize = pool_xid * sizeof(struct fc_exch *) + sizeof(*pool);
+	mp->pool = __alloc_percpu(psize, __alignof__(sizeof(struct fc_exch *)));
+	if (!mp->pool)
+		goto free_mempool;
+	for_each_possible_cpu(cpu) {
+		pool = per_cpu_ptr(mp->pool, cpu);
+		spin_lock_init(&pool->lock);
+		INIT_LIST_HEAD(&pool->ex_list);
+		pool->exches = (struct fc_exch **) (pool + 1);
+	}
+
 	kref_init(&mp->kref);
 	if (!fc_exch_mgr_add(lp, mp, match)) {
-		mempool_destroy(mp->ep_pool);
-		goto free_mp;
+		free_percpu(mp->pool);
+		goto free_mempool;
 	}
 
 	/*
@@ -1807,6 +1843,8 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	kref_put(&mp->kref, fc_exch_mgr_destroy);
 	return mp;
 
+free_mempool:
+	mempool_destroy(mp->ep_pool);
 free_mp:
 	kfree(mp);
 	return NULL;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index ef6d2bf..be3370b 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1620,6 +1620,29 @@ int fc_lport_init(struct fc_lport *lport)
 		fc_host_supported_speeds(lport->host) |= FC_PORTSPEED_10GBIT;
 
 	INIT_LIST_HEAD(&lport->ema_list);
+
+	/*
+	 * Update cpu_mask and cpu_order, the cpu_mask is
+	 * set for nr_cpu_ids rounded up to order of 2's
+	 * power and order is stored in cpu_order as
+	 * this is later required in mapping between an
+	 * exch id and array index in per cpu pool of
+	 * exches.
+	 *
+	 * This round up is required to align cpu_mask to
+	 * exchange id's lower bits such that all incoming
+	 * frames of an exchange gets delivered to same cpu
+	 * on which exchange originated by simple bitwise
+	 * AND operation between cpu_mask and exchange id.
+	 */
+	lport->cpu_mask = 1;
+	lport->cpu_order = 0;
+	while (lport->cpu_mask < nr_cpu_ids) {
+		lport->cpu_mask <<= 1;
+		lport->cpu_order++;
+	}
+	lport->cpu_mask--;
+
 	return 0;
 }
 EXPORT_SYMBOL(fc_lport_init);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 3f1993a..6c69b3a 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -703,6 +703,8 @@ struct fc_lport {
 	struct libfc_function_template tt;
 	u8			link_up;
 	u8			qfull;
+	u16			cpu_mask;
+	u16			cpu_order;
 	enum fc_lport_state	state;
 	unsigned long		boot_time;
 




More information about the devel mailing list