[Infiniband] Add preliminary multiple port support for Hermon cards

Infiniband devices no longer block waiting for link-up in
register_ibdev().

Hermon driver needs to create an event queue and poll for link-up events.

Infiniband core needs to reread MAD parameters when link state changes.

IPoIB needs to cope with Infiniband link parameters being only partially
available at probe and open time.
pull/1/head
Michael Brown 2008-04-18 02:50:48 +01:00
parent 3475b693b7
commit a176a24ac0
6 changed files with 579 additions and 143 deletions

View File

@ -30,6 +30,7 @@
#include <gpxe/umalloc.h>
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
#include <gpxe/process.h>
#include <gpxe/infiniband.h>
#include "hermon.h"
@ -317,19 +318,30 @@ hermon_cmd_write_mtt ( struct hermon *hermon,
}
static inline int
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
const struct hermonprm_eqc *eqc ) {
hermon_cmd_map_eq ( struct hermon *hermon, unsigned long index_map,
const struct hermonprm_event_mask *mask ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
1, sizeof ( *eqc ) ),
0, eqc, index, NULL );
HERMON_HCR_IN_CMD ( HERMON_HCR_MAP_EQ,
0, sizeof ( *mask ) ),
0, mask, index_map, NULL );
}
static inline int
hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index ) {
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
const struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_VOID_CMD ( HERMON_HCR_HW2SW_EQ ),
1, NULL, index, NULL );
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
1, sizeof ( *eqctx ) ),
0, eqctx, index, NULL );
}
static inline int
hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index,
struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_OUT_CMD ( HERMON_HCR_HW2SW_EQ,
1, sizeof ( *eqctx ) ),
1, NULL, index, eqctx );
}
static inline int
@ -377,6 +389,15 @@ hermon_cmd_rtr2rts_qp ( struct hermon *hermon, unsigned long qpn,
0, ctx, qpn, NULL );
}
static inline int
hermon_cmd_rts2rts_qp ( struct hermon *hermon, unsigned long qpn,
const struct hermonprm_qp_ee_state_transitions *ctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_RTS2RTS_QP,
1, sizeof ( *ctx ) ),
0, ctx, qpn, NULL );
}
static inline int
hermon_cmd_2rst_qp ( struct hermon *hermon, unsigned long qpn ) {
return hermon_cmd ( hermon,
@ -859,6 +880,39 @@ static int hermon_create_qp ( struct ib_device *ibdev,
return rc;
}
/**
* Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @ret rc Return status code
*/
static int hermon_modify_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp,
unsigned long mod_list ) {
struct hermon *hermon = ib_get_drvdata ( ibdev );
struct hermonprm_qp_ee_state_transitions qpctx;
unsigned long optparammask = 0;
int rc;
/* Construct optparammask */
if ( mod_list & IB_MODIFY_QKEY )
optparammask |= HERMON_QP_OPT_PARAM_QKEY;
/* Issue RTS2RTS_QP */
memset ( &qpctx, 0, sizeof ( qpctx ) );
MLX_FILL_1 ( &qpctx, 0, opt_param_mask, optparammask );
MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey );
if ( ( rc = hermon_cmd_rts2rts_qp ( hermon, qp->qpn, &qpctx ) ) != 0 ){
DBGC ( hermon, "Hermon %p RTS2RTS_QP failed: %s\n",
hermon, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Destroy queue pair
*
@ -1356,6 +1410,7 @@ static struct ib_device_operations hermon_ib_operations = {
.create_cq = hermon_create_cq,
.destroy_cq = hermon_destroy_cq,
.create_qp = hermon_create_qp,
.modify_qp = hermon_modify_qp,
.destroy_qp = hermon_destroy_qp,
.post_send = hermon_post_send,
.post_recv = hermon_post_recv,
@ -1367,6 +1422,211 @@ static struct ib_device_operations hermon_ib_operations = {
.mad = hermon_mad,
};
/***************************************************************************
*
* Event queues
*
***************************************************************************
*/
/**
* Create event queue
*
* @v hermon Hermon device
* @ret rc Return status code
*/
static int hermon_create_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
struct hermonprm_eqc eqctx;
struct hermonprm_event_mask mask;
unsigned int i;
int rc;
/* Allocate event queue itself */
hermon_eq->eqe_size =
( HERMON_NUM_EQES * sizeof ( hermon_eq->eqe[0] ) );
hermon_eq->eqe = malloc_dma ( hermon_eq->eqe_size,
sizeof ( hermon_eq->eqe[0] ) );
if ( ! hermon_eq->eqe ) {
rc = -ENOMEM;
goto err_eqe;
}
memset ( hermon_eq->eqe, 0, hermon_eq->eqe_size );
for ( i = 0 ; i < HERMON_NUM_EQES ; i++ ) {
MLX_FILL_1 ( &hermon_eq->eqe[i].generic, 7, owner, 1 );
}
barrier();
/* Allocate MTT entries */
if ( ( rc = hermon_alloc_mtt ( hermon, hermon_eq->eqe,
hermon_eq->eqe_size,
&hermon_eq->mtt ) ) != 0 )
goto err_alloc_mtt;
/* Hand queue over to hardware */
memset ( &eqctx, 0, sizeof ( eqctx ) );
MLX_FILL_1 ( &eqctx, 0, st, 0xa /* "Fired" */ );
MLX_FILL_1 ( &eqctx, 2,
page_offset, ( hermon_eq->mtt.page_offset >> 5 ) );
MLX_FILL_1 ( &eqctx, 3, log_eq_size, fls ( HERMON_NUM_EQES - 1 ) );
MLX_FILL_1 ( &eqctx, 7, mtt_base_addr_l,
( hermon_eq->mtt.mtt_base_addr >> 3 ) );
if ( ( rc = hermon_cmd_sw2hw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
DBGC ( hermon, "Hermon %p SW2HW_EQ failed: %s\n",
hermon, strerror ( rc ) );
goto err_sw2hw_eq;
}
/* Map events to this event queue */
memset ( &mask, 0, sizeof ( mask ) );
MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_MAP | 0 ),
&mask ) ) != 0 ) {
DBGC ( hermon, "Hermon %p MAP_EQ failed: %s\n",
hermon, strerror ( rc ) );
goto err_map_eq;
}
return 0;
err_map_eq:
hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx );
err_sw2hw_eq:
hermon_free_mtt ( hermon, &hermon_eq->mtt );
err_alloc_mtt:
free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
err_eqe:
memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
return rc;
}
/**
* Destroy event queue
*
* @v hermon Hermon device
*/
static void hermon_destroy_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
struct hermonprm_eqc eqctx;
struct hermonprm_event_mask mask;
int rc;
/* Unmap events from event queue */
memset ( &mask, 0, sizeof ( mask ) );
MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_UNMAP | 0 ),
&mask ) ) != 0 ) {
DBGC ( hermon, "Hermon %p FATAL MAP_EQ failed to unmap: %s\n",
hermon, strerror ( rc ) );
/* Continue; HCA may die but system should survive */
}
/* Take ownership back from hardware */
if ( ( rc = hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
DBGC ( hermon, "Hermon %p FATAL HW2SW_EQ failed: %s\n",
hermon, strerror ( rc ) );
/* Leak memory and return; at least we avoid corruption */
return;
}
/* Free MTT entries */
hermon_free_mtt ( hermon, &hermon_eq->mtt );
/* Free memory */
free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
}
/**
* Handle port state event
*
* @v hermon Hermon device
* @v eqe Port state change event queue entry
*/
static void hermon_event_port_state_change ( struct hermon *hermon,
union hermonprm_event_entry *eqe){
unsigned int port;
int link_up;
/* Get port and link status */
port = ( MLX_GET ( &eqe->port_state_change, data.p ) - 1 );
link_up = ( MLX_GET ( &eqe->generic, event_sub_type ) & 0x04 );
DBGC ( hermon, "Hermon %p port %d link %s\n", hermon, ( port + 1 ),
( link_up ? "up" : "down" ) );
/* Sanity check */
if ( port >= HERMON_NUM_PORTS ) {
DBGC ( hermon, "Hermon %p port %d does not exist!\n",
hermon, ( port + 1 ) );
return;
}
/* Notify Infiniband core of link state change */
ib_link_state_changed ( hermon->ibdev[port] );
}
/**
* Poll event queue
*
* @v hermon Hermon device
*/
static void hermon_poll_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
union hermonprm_event_entry *eqe;
union hermonprm_doorbell_register db_reg;
unsigned int eqe_idx_mask;
unsigned int event_type;
while ( 1 ) {
eqe_idx_mask = ( HERMON_NUM_EQES - 1 );
eqe = &hermon_eq->eqe[hermon_eq->next_idx & eqe_idx_mask];
if ( MLX_GET ( &eqe->generic, owner ) ^
( ( hermon_eq->next_idx & HERMON_NUM_EQES ) ? 1 : 0 ) ) {
/* Entry still owned by hardware; end of poll */
break;
}
DBGCP ( hermon, "Hermon %p event:\n", hermon );
DBGCP_HD ( hermon, eqe, sizeof ( *eqe ) );
/* Handle event */
event_type = MLX_GET ( &eqe->generic, event_type );
switch ( event_type ) {
case HERMON_EV_PORT_STATE_CHANGE:
hermon_event_port_state_change ( hermon, eqe );
break;
default:
DBGC ( hermon, "Hermon %p unrecognised event type "
"%#x:\n", hermon, event_type );
DBGC_HD ( hermon, eqe, sizeof ( *eqe ) );
break;
}
/* Update event queue's index */
hermon_eq->next_idx++;
/* Ring doorbell */
memset ( &db_reg, 0, sizeof ( db_reg ) );
MLX_FILL_1 ( &db_reg.event, 0, ci, hermon_eq->next_idx );
DBGCP ( hermon, "Ringing doorbell %08lx with %08lx\n",
virt_to_phys ( hermon->uar + HERMON_DB_EQ0_OFFSET ),
db_reg.dword[0] );
writel ( db_reg.dword[0],
( hermon->uar + HERMON_DB_EQ0_OFFSET ) );
}
}
/**
* Event queue poll processor
*
* @v process Hermon event queue process
*/
static void hermon_step ( struct process *process ) {
struct hermon *hermon =
container_of ( process, struct hermon, event_process );
hermon_poll_eq ( hermon );
}
/***************************************************************************
*
* Firmware control
@ -1879,6 +2139,7 @@ static int hermon_probe ( struct pci_device *pci,
goto err_alloc_hermon;
}
pci_set_drvdata ( pci, hermon );
process_init ( &hermon->event_process, hermon_step, NULL );
/* Allocate Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
@ -1945,6 +2206,10 @@ static int hermon_probe ( struct pci_device *pci,
if ( ( rc = hermon_setup_mpt ( hermon ) ) != 0 )
goto err_setup_mpt;
/* Set up event queue */
if ( ( rc = hermon_create_eq ( hermon ) ) != 0 )
goto err_create_eq;
/* Register Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
if ( ( rc = register_ibdev ( hermon->ibdev[i] ) ) != 0 ) {
@ -1960,6 +2225,8 @@ static int hermon_probe ( struct pci_device *pci,
err_register_ibdev:
for ( ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
hermon_destroy_eq ( hermon );
err_create_eq:
err_setup_mpt:
hermon_cmd_close_hca ( hermon );
err_init_hca:
@ -1976,6 +2243,7 @@ static int hermon_probe ( struct pci_device *pci,
err_alloc_ibdev:
for ( ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
process_del ( &hermon->event_process );
free ( hermon );
err_alloc_hermon:
return rc;
@ -1992,6 +2260,7 @@ static void hermon_remove ( struct pci_device *pci ) {
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
hermon_destroy_eq ( hermon );
hermon_cmd_close_hca ( hermon );
hermon_free_icm ( hermon );
hermon_stop_firmware ( hermon );
@ -2000,6 +2269,7 @@ static void hermon_remove ( struct pci_device *pci ) {
free_dma ( hermon->mailbox_in, HERMON_MBOX_SIZE );
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
process_del ( &hermon->event_process );
free ( hermon );
}

View File

@ -9,6 +9,7 @@
#include <stdint.h>
#include <gpxe/uaccess.h>
#include <gpxe/process.h>
#include "mlx_bitops.h"
#include "MT25408_PRM.h"
@ -18,7 +19,7 @@
*/
/* Ports in existence */
#define HERMON_NUM_PORTS 1
#define HERMON_NUM_PORTS 2
#define HERMON_PORT_BASE 1
/* PCI BARs */
@ -48,6 +49,7 @@
#define HERMON_HCR_RST2INIT_QP 0x0019
#define HERMON_HCR_INIT2RTR_QP 0x001a
#define HERMON_HCR_RTR2RTS_QP 0x001b
#define HERMON_HCR_RTS2RTS_QP 0x001c
#define HERMON_HCR_2RST_QP 0x0021
#define HERMON_HCR_MAD_IFC 0x0024
#define HERMON_HCR_READ_MCG 0x0025
@ -75,6 +77,14 @@
#define HERMON_PAGE_SIZE 4096
#define HERMON_DB_POST_SND_OFFSET 0x14
#define HERMON_DB_EQ0_OFFSET 0x800
#define HERMON_QP_OPT_PARAM_QKEY 0x00000020UL
#define HERMON_MAP_EQ_MAP ( 0UL << 31 )
#define HERMON_MAP_EQ_UNMAP ( 1UL << 31 )
#define HERMON_EV_PORT_STATE_CHANGE 0x09
/*
* Datatypes that seem to be missing from the autogenerated documentation
@ -108,12 +118,32 @@ struct hermonprm_send_db_register_st {
pseudo_bit_t qn[0x00018];
} __attribute__ (( packed ));
struct hermonprm_event_db_register_st {
pseudo_bit_t ci[0x00018];
pseudo_bit_t reserver[0x00007];
pseudo_bit_t a[0x00001];
} __attribute__ (( packed ));
struct hermonprm_scalar_parameter_st {
pseudo_bit_t value_hi[0x00020];
/* -------------- */
pseudo_bit_t value[0x00020];
} __attribute__ (( packed ));
struct hermonprm_event_mask_st {
pseudo_bit_t reserved0[0x00020];
/* -------------- */
pseudo_bit_t completion[0x00001];
pseudo_bit_t reserved1[0x0008];
pseudo_bit_t port_state_change[0x00001];
pseudo_bit_t reserved2[0x00016];
} __attribute__ (( packed ));
struct hermonprm_port_state_change_event_st {
pseudo_bit_t reserved[0x00020];
struct hermonprm_port_state_change_st data;
} __attribute__ (( packed ));
/*
* Wrapper structures for hardware datatypes
*
@ -124,6 +154,9 @@ struct MLX_DECLARE_STRUCT ( hermonprm_completion_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_completion_with_error );
struct MLX_DECLARE_STRUCT ( hermonprm_cq_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_eqc );
struct MLX_DECLARE_STRUCT ( hermonprm_event_db_register );
struct MLX_DECLARE_STRUCT ( hermonprm_event_mask );
struct MLX_DECLARE_STRUCT ( hermonprm_event_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_hca_command_register );
struct MLX_DECLARE_STRUCT ( hermonprm_init_hca );
struct MLX_DECLARE_STRUCT ( hermonprm_init_port );
@ -132,6 +165,7 @@ struct MLX_DECLARE_STRUCT ( hermonprm_mcg_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_mgm_hash );
struct MLX_DECLARE_STRUCT ( hermonprm_mpt );
struct MLX_DECLARE_STRUCT ( hermonprm_mtt );
struct MLX_DECLARE_STRUCT ( hermonprm_port_state_change_event );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_ee_state_transitions );
struct MLX_DECLARE_STRUCT ( hermonprm_query_dev_cap );
@ -175,8 +209,14 @@ union hermonprm_completion_entry {
struct hermonprm_completion_with_error error;
} __attribute__ (( packed ));
union hermonprm_event_entry {
struct hermonprm_event_queue_entry generic;
struct hermonprm_port_state_change_event port_state_change;
} __attribute__ (( packed ));
union hermonprm_doorbell_register {
struct hermonprm_send_db_register send;
struct hermonprm_event_db_register event;
uint32_t dword[1];
} __attribute__ (( packed ));
@ -362,6 +402,24 @@ struct hermon_completion_queue {
*/
#define HERMON_MAX_EQS 4
/** A Hermon event queue */
struct hermon_event_queue {
/** Event queue entries */
union hermonprm_event_entry *eqe;
/** Size of event queue */
size_t eqe_size;
/** MTT descriptor */
struct hermon_mtt mtt;
/** Next event queue entry index */
unsigned long next_idx;
};
/** Number of event queue entries
*
* This is a policy decision.
*/
#define HERMON_NUM_EQES 4
/** A Hermon resource bitmask */
typedef uint32_t hermon_bitmask_t;
@ -397,6 +455,11 @@ struct hermon {
*/
unsigned long reserved_lkey;
/** Event queue */
struct hermon_event_queue eq;
/** Event queue process */
struct process event_process;
/** Completion queue in-use bitmask */
hermon_bitmask_t cq_inuse[ HERMON_BITMASK_SIZE ( HERMON_MAX_CQS ) ];
/** Queue pair in-use bitmask */

View File

@ -80,10 +80,14 @@ struct ipoib_device {
struct ib_gid broadcast_gid;
/** Broadcast LID */
unsigned int broadcast_lid;
/** Joined to broadcast group */
int broadcast_joined;
/** Data queue key */
unsigned long data_qkey;
/** Attached to multicast group
*
* This flag indicates whether or not we have attached our
* data queue pair to the broadcast multicast GID.
*/
int broadcast_attached;
};
/**
@ -272,6 +276,10 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
struct ib_device *ibdev = ipoib->ibdev;
int rc;
/* Sanity check */
assert ( qset->cq == NULL );
assert ( qset->qp == NULL );
/* Store queue parameters */
qset->recv_max_fill = num_recv_wqes;
@ -617,14 +625,24 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
*/
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
struct ib_mad_mc_member_record *mc_member_record ) {
int joined;
int rc;
/* Record parameters */
ipoib->broadcast_joined =
( mc_member_record->scope__join_state & 0x0f );
joined = ( mc_member_record->scope__join_state & 0x0f );
ipoib->data_qkey = ntohl ( mc_member_record->qkey );
ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
ipoib->data_qkey, ipoib->broadcast_lid );
ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
ipoib->broadcast_lid );
/* Update data queue pair qkey */
if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
ipoib, strerror ( rc ) );
return;
}
}
/**
@ -741,6 +759,56 @@ static void ipoib_irq ( struct net_device *netdev __unused,
/* No implementation */
}
/**
* Join IPv4 broadcast multicast group
*
* @v ipoib IPoIB device
* @ret rc Return status code
*/
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int rc;
/* Sanity check */
if ( ! ipoib->data.qp )
return 0;
/* Attach data queue to broadcast multicast GID */
assert ( ipoib->broadcast_attached == 0 );
if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
&ipoib->broadcast_gid ) ) != 0 ){
DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
"%s\n", ipoib, strerror ( rc ) );
return rc;
}
ipoib->broadcast_attached = 1;
/* Initiate broadcast group join */
if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
1 ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
ipoib, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Leave IPv4 broadcast multicast group
*
* @v ipoib IPoIB device
*/
static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
/* Detach data queue from broadcast multicast GID */
if ( ipoib->broadcast_attached ) {
assert ( ipoib->data.qp != NULL );
ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
&ipoib->broadcast_gid );
ipoib->broadcast_attached = 0;
}
}
/**
* Open IPoIB network device
*
@ -749,22 +817,53 @@ static void ipoib_irq ( struct net_device *netdev __unused,
*/
static int ipoib_open ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
int rc;
/* Attach to broadcast multicast GID */
if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
&ipoib->broadcast_gid ) ) != 0 ) {
DBG ( "Could not attach to broadcast GID: %s\n",
strerror ( rc ) );
return rc;
/* Allocate metadata queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
IPOIB_META_NUM_CQES,
IPOIB_META_NUM_SEND_WQES,
IPOIB_META_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_meta_qset;
}
/* Allocate data queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
IPOIB_DATA_NUM_CQES,
IPOIB_DATA_NUM_SEND_WQES,
IPOIB_DATA_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_data_qset;
}
/* Update MAC address with data QPN */
mac->qpn = htonl ( ipoib->data.qp->qpn );
/* Fill receive rings */
ipoib_refill_recv ( ipoib, &ipoib->meta );
ipoib_refill_recv ( ipoib, &ipoib->data );
/* Join broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
ipoib, strerror ( rc ) );
goto err_join_broadcast;
}
return 0;
err_join_broadcast:
ipoib_destroy_qset ( ipoib, &ipoib->data );
err_create_data_qset:
ipoib_destroy_qset ( ipoib, &ipoib->meta );
err_create_meta_qset:
return rc;
}
/**
@ -774,12 +873,17 @@ static int ipoib_open ( struct net_device *netdev ) {
*/
static void ipoib_close ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
/* Detach from broadcast multicast GID */
ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
/* Leave broadcast group */
ipoib_leave_broadcast_group ( ipoib );
/* FIXME: should probably flush the receive ring */
/* Remove data QPN from MAC address */
mac->qpn = 0;
/* Tear down the queues */
ipoib_destroy_qset ( ipoib, &ipoib->data );
ipoib_destroy_qset ( ipoib, &ipoib->meta );
}
/** IPoIB network device operations */
@ -792,44 +896,53 @@ static struct net_device_operations ipoib_operations = {
};
/**
* Join IPoIB broadcast group
* Update IPoIB dynamic Infiniband parameters
*
* @v ipoib IPoIB device
* @ret rc Return status code
*
* The Infiniband port GID and partition key will change at runtime,
* when the link is established (or lost). The MAC address is based
* on the port GID, and the broadcast GID is based on the partition
* key. This function recalculates these IPoIB device parameters.
*/
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
struct ib_device *ibdev = ipoib->ibdev;
unsigned int delay_ms;
struct ipoib_mac *mac;
/* Calculate GID portion of MAC address based on port GID */
mac = ( ( struct ipoib_mac * ) ipoib->netdev->ll_addr );
memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
/* Calculate broadcast GID based on partition key */
memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
sizeof ( ipoib->broadcast_gid ) );
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
}
/**
* Handle link status change
*
* @v ibdev Infiniband device
*/
void ipoib_link_state_changed ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
struct ipoib_device *ipoib = netdev->priv;
int rc;
/* Make sure we have some receive descriptors */
ipoib_refill_recv ( ipoib, &ipoib->meta );
/* Leave existing broadcast group */
ipoib_leave_broadcast_group ( ipoib );
/* Send join request */
if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
1 ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
ipoib, strerror ( rc ) );
return rc;
}
/* Wait for join to complete. Ideally we wouldn't delay for
* this long, but we need the queue key before we can set up
* the data queue pair, which we need before we can know the
* MAC address.
/* Update MAC address and broadcast GID based on new port GID
* and partition key.
*/
for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
mdelay ( 1 );
ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
ipoib_meta_complete_recv );
ipoib_refill_recv ( ipoib, &ipoib->meta );
if ( ipoib->broadcast_joined )
return 0;
}
DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
ipoib );
ipoib_set_ib_params ( ipoib );
return -ETIMEDOUT;
/* Join new broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
"%s\n", ipoib, strerror ( rc ) );
return;
}
}
/**
@ -841,7 +954,6 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int ipoib_probe ( struct ib_device *ibdev ) {
struct net_device *netdev;
struct ipoib_device *ipoib;
struct ipoib_mac *mac;
int rc;
/* Allocate network device */
@ -856,44 +968,11 @@ int ipoib_probe ( struct ib_device *ibdev ) {
ipoib->netdev = netdev;
ipoib->ibdev = ibdev;
/* Calculate broadcast GID */
memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
sizeof ( ipoib->broadcast_gid ) );
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
/* Allocate metadata queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
IPOIB_META_NUM_CQES,
IPOIB_META_NUM_SEND_WQES,
IPOIB_META_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_meta_qset;
}
/* Join broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
ipoib, strerror ( rc ) );
goto err_join_broadcast_group;
}
/* Allocate data queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
IPOIB_DATA_NUM_CQES,
IPOIB_DATA_NUM_SEND_WQES,
IPOIB_DATA_NUM_RECV_WQES,
ipoib->data_qkey ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_data_qset;
}
/* Construct MAC address */
mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
mac->qpn = htonl ( ipoib->data.qp->qpn );
memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
/* Calculate as much of the broadcast GID and the MAC address
* as we can. We won't know either of these in full until we
* have link-up.
*/
ipoib_set_ib_params ( ipoib );
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
@ -902,11 +981,6 @@ int ipoib_probe ( struct ib_device *ibdev ) {
return 0;
err_register_netdev:
ipoib_destroy_qset ( ipoib, &ipoib->data );
err_join_broadcast_group:
err_create_data_qset:
ipoib_destroy_qset ( ipoib, &ipoib->meta );
err_create_meta_qset:
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
@ -919,11 +993,8 @@ int ipoib_probe ( struct ib_device *ibdev ) {
*/
void ipoib_remove ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
struct ipoib_device *ipoib = netdev->priv;
unregister_netdev ( netdev );
ipoib_destroy_qset ( ipoib, &ipoib->data );
ipoib_destroy_qset ( ipoib, &ipoib->meta );
netdev_nullify ( netdev );
netdev_put ( netdev );
}

View File

@ -95,6 +95,11 @@ struct ib_queue_pair {
void *owner_priv;
};
/** Infiniband queue pair modification flags */
enum ib_queue_pair_mods {
IB_MODIFY_QKEY = 0x0001,
};
/** An Infiniband Completion Queue */
struct ib_completion_queue {
/** Completion queue number */
@ -187,6 +192,16 @@ struct ib_device_operations {
*/
int ( * create_qp ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
/** Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @ret rc Return status code
*/
int ( * modify_qp ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp,
unsigned long mod_list );
/** Destroy queue pair
*
* @v ibdev Infiniband device
@ -291,6 +306,8 @@ struct ib_device {
struct ib_device_operations *op;
/** Port number */
unsigned int port;
/** Link state */
int link_up;
/** Port GID */
struct ib_gid port_gid;
/** Subnet manager LID */
@ -311,6 +328,8 @@ extern struct ib_queue_pair *
ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes,
struct ib_completion_queue *send_cq, unsigned int num_recv_wqes,
struct ib_completion_queue *recv_cq, unsigned long qkey );
extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
unsigned long mod_list, unsigned long qkey );
extern void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
@ -319,6 +338,7 @@ extern struct ib_device * alloc_ibdev ( size_t priv_size );
extern int register_ibdev ( struct ib_device *ibdev );
extern void unregister_ibdev ( struct ib_device *ibdev );
extern void free_ibdev ( struct ib_device *ibdev );
extern void ib_link_state_changed ( struct ib_device *ibdev );
/**
* Post send work queue entry

View File

@ -72,6 +72,7 @@ static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) {
return netdev;
}
extern void ipoib_link_state_changed ( struct ib_device *ibdev );
extern int ipoib_probe ( struct ib_device *ibdev );
extern void ipoib_remove ( struct ib_device *ibdev );

View File

@ -152,15 +152,41 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
return qp;
}
/**
* Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @v qkey New queue key, if applicable
* @ret rc Return status code
*/
int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
unsigned long mod_list, unsigned long qkey ) {
int rc;
DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
if ( mod_list & IB_MODIFY_QKEY )
qp->qkey = qkey;
if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
ibdev, qp->qpn, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Destroy queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp ) {
DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
ibdev, qp->qpn );
ibdev->op->destroy_qp ( ibdev, qp );
list_del ( &qp->send.list );
@ -279,38 +305,6 @@ static int ib_get_pkey_table ( struct ib_device *ibdev,
return 0;
}
/**
* Wait for link up
*
* @v ibdev Infiniband device
* @ret rc Return status code
*
* This function shouldn't really exist. Unfortunately, IB links take
* a long time to come up, and we can't get various key parameters
* e.g. our own IPoIB MAC address without information from the subnet
* manager). We should eventually make link-up an asynchronous event.
*/
static int ib_wait_for_link ( struct ib_device *ibdev ) {
struct ib_mad_port_info port_info;
unsigned int retries;
int rc;
printf ( "Waiting for Infiniband link-up..." );
for ( retries = 20 ; retries ; retries-- ) {
if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
continue;
if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
== 4 ) {
printf ( "ok\n" );
return 0;
}
printf ( "." );
sleep ( 1 );
}
printf ( "failed\n" );
return -ENODEV;
};
/**
* Get MAD parameters
*
@ -326,9 +320,13 @@ static int ib_get_mad_params ( struct ib_device *ibdev ) {
} u;
int rc;
/* Port info gives us the first half of the port GID and the SM LID */
/* Port info gives us the link state, the first half of the
* port GID and the SM LID.
*/
if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
return rc;
ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported
& 0xf ) == 4 );
memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
@ -391,10 +389,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
if ( ( rc = ib_open ( ibdev ) ) != 0 )
goto err_open;
/* Wait for link */
if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
goto err_wait_for_link;
/* Get MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
goto err_get_mad_params;
@ -410,7 +404,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
err_ipoib_probe:
err_get_mad_params:
err_wait_for_link:
ib_close ( ibdev );
err_open:
return rc;
@ -435,3 +428,21 @@ void free_ibdev ( struct ib_device *ibdev ) {
free ( ibdev );
}
/**
* Handle Infiniband link state change
*
* @v ibdev Infiniband device
*/
void ib_link_state_changed ( struct ib_device *ibdev ) {
int rc;
/* Update MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n",
ibdev, strerror ( rc ) );
return;
}
/* Notify IPoIB of link state change */
ipoib_link_state_changed ( ibdev );
}