mirror of https://github.com/ipxe/ipxe.git
[eoib] Add Ethernet over Infiniband (EoIB) driver
EoIB is a fairly simple protocol in which raw Ethernet frames (excluding the CRC) are encapsulated within Infiniband Unreliable Datagrams, with a four-byte fixed EoIB header (which conveys no actual information). The Ethernet broadcast domain is provided by a multicast group, similar to the IPoIB IPv4 multicast group. The mapping from Ethernet MAC addresses to Infiniband address vectors is achieved by snooping incoming traffic and building a peer cache which can then be used to map a MAC address into a port GID. The address vector is completed using a path record lookup, as for IPoIB. Note that this requires every packet to include a GRH. Add basic support for EoIB devices. This driver is substantially derived from the IPoIB driver. There is currently no mechanism for automatically creating EoIB devices. Signed-off-by: Michael Brown <mcb30@ipxe.org>pull/46/head
parent
5bcaa1e4d4
commit
9154d7a65c
|
@ -0,0 +1,754 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License as
|
||||||
|
* published by the Free Software Foundation; either version 2 of the
|
||||||
|
* License, or any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but
|
||||||
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* You can also choose to distribute this program under the terms of
|
||||||
|
* the Unmodified Binary Distribution Licence (as given in the file
|
||||||
|
* COPYING.UBDL), provided that you have satisfied its requirements.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <ipxe/errortab.h>
|
||||||
|
#include <ipxe/malloc.h>
|
||||||
|
#include <ipxe/iobuf.h>
|
||||||
|
#include <ipxe/if_ether.h>
|
||||||
|
#include <ipxe/netdevice.h>
|
||||||
|
#include <ipxe/ethernet.h>
|
||||||
|
#include <ipxe/infiniband.h>
|
||||||
|
#include <ipxe/ib_mcast.h>
|
||||||
|
#include <ipxe/ib_pathrec.h>
|
||||||
|
#include <ipxe/eoib.h>
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
*
|
||||||
|
* Ethernet over Infiniband
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Number of EoIB send work queue entries */
|
||||||
|
#define EOIB_NUM_SEND_WQES 8
|
||||||
|
|
||||||
|
/** Number of EoIB receive work queue entries */
|
||||||
|
#define EOIB_NUM_RECV_WQES 4
|
||||||
|
|
||||||
|
/** Number of EoIB completion queue entries */
|
||||||
|
#define EOIB_NUM_CQES 16
|
||||||
|
|
||||||
|
/** Link status for "broadcast join in progress" */
|
||||||
|
#define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
|
||||||
|
#define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
|
||||||
|
( EINFO_EINPROGRESS, 0x01, "Joining" )
|
||||||
|
|
||||||
|
/** Human-readable message for the link status */
|
||||||
|
struct errortab eoib_errors[] __errortab = {
|
||||||
|
__einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
|
||||||
|
};
|
||||||
|
|
||||||
|
/** List of EoIB devices */
|
||||||
|
static LIST_HEAD ( eoib_devices );
|
||||||
|
|
||||||
|
static struct net_device_operations eoib_operations;
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* EoIB peer cache
|
||||||
|
*
|
||||||
|
****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** An EoIB peer cache entry */
|
||||||
|
struct eoib_peer {
|
||||||
|
/** List of EoIB peer cache entries */
|
||||||
|
struct list_head list;
|
||||||
|
/** Ethernet MAC */
|
||||||
|
uint8_t mac[ETH_ALEN];
|
||||||
|
/** Infiniband address vector */
|
||||||
|
struct ib_address_vector av;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find EoIB peer cache entry
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
* @v mac Ethernet MAC
|
||||||
|
* @ret peer EoIB peer, or NULL if not found
|
||||||
|
*/
|
||||||
|
static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
|
||||||
|
const uint8_t *mac ) {
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
|
||||||
|
/* Find peer cache entry */
|
||||||
|
list_for_each_entry ( peer, &eoib->peers, list ) {
|
||||||
|
if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
|
||||||
|
/* Move peer to start of list */
|
||||||
|
list_del ( &peer->list );
|
||||||
|
list_add ( &peer->list, &eoib->peers );
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create EoIB peer cache entry
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
* @v mac Ethernet MAC
|
||||||
|
* @ret peer EoIB peer, or NULL on error
|
||||||
|
*/
|
||||||
|
static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
|
||||||
|
const uint8_t *mac ) {
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
|
||||||
|
/* Allocate and initialise peer cache entry */
|
||||||
|
peer = zalloc ( sizeof ( *peer ) );
|
||||||
|
if ( peer ) {
|
||||||
|
memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
|
||||||
|
list_add ( &peer->list, &eoib->peers );
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush EoIB peer cache
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
*/
|
||||||
|
static void eoib_flush_peers ( struct eoib_device *eoib ) {
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
struct eoib_peer *tmp;
|
||||||
|
|
||||||
|
list_for_each_entry_safe ( peer, tmp, &eoib->peers, list ) {
|
||||||
|
list_del ( &peer->list );
|
||||||
|
free ( peer );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Discard some entries from the peer cache
|
||||||
|
*
|
||||||
|
* @ret discarded Number of cached items discarded
|
||||||
|
*/
|
||||||
|
static unsigned int eoib_discard ( void ) {
|
||||||
|
struct net_device *netdev;
|
||||||
|
struct eoib_device *eoib;
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
unsigned int discarded = 0;
|
||||||
|
|
||||||
|
/* Try to discard one cache entry for each EoIB device */
|
||||||
|
for_each_netdev ( netdev ) {
|
||||||
|
|
||||||
|
/* Skip non-EoIB devices */
|
||||||
|
if ( netdev->op != &eoib_operations )
|
||||||
|
continue;
|
||||||
|
eoib = netdev->priv;
|
||||||
|
|
||||||
|
/* Discard least recently used cache entry (if any) */
|
||||||
|
list_for_each_entry_reverse ( peer, &eoib->peers, list ) {
|
||||||
|
list_del ( &peer->list );
|
||||||
|
free ( peer );
|
||||||
|
discarded++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return discarded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** EoIB cache discarder */
|
||||||
|
struct cache_discarder eoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
|
||||||
|
.discard = eoib_discard,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find destination address vector
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
* @v mac Ethernet MAC
|
||||||
|
* @ret av Address vector, or NULL to send as broadcast
|
||||||
|
*/
|
||||||
|
static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
|
||||||
|
const uint8_t *mac ) {
|
||||||
|
struct ib_device *ibdev = eoib->ibdev;
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* If this is a broadcast or multicast MAC address, then send
|
||||||
|
* this packet as a broadcast.
|
||||||
|
*/
|
||||||
|
if ( is_multicast_ether_addr ( mac ) ) {
|
||||||
|
DBGCP ( eoib, "EoIB %s %s TX multicast\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we have no peer cache entry, then create one and send
|
||||||
|
* this packet as a broadcast.
|
||||||
|
*/
|
||||||
|
peer = eoib_find_peer ( eoib, mac );
|
||||||
|
if ( ! peer ) {
|
||||||
|
DBGC ( eoib, "EoIB %s %s TX unknown\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
eoib_create_peer ( eoib, mac );
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we have not yet recorded a received GID and QPN for this
|
||||||
|
* peer cache entry, then send this packet as a broadcast.
|
||||||
|
*/
|
||||||
|
if ( ! peer->av.gid_present ) {
|
||||||
|
DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we have not yet resolved a path to this peer, then send
|
||||||
|
* this packet as a broadcast.
|
||||||
|
*/
|
||||||
|
if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
|
||||||
|
DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Force use of GRH even for local destinations */
|
||||||
|
peer->av.gid_present = 1;
|
||||||
|
|
||||||
|
/* We have a fully resolved peer: send this packet as a
|
||||||
|
* unicast.
|
||||||
|
*/
|
||||||
|
DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
|
||||||
|
eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
|
||||||
|
return &peer->av;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record source address vector
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
* @v mac Ethernet MAC
|
||||||
|
* @v lid Infiniband LID
|
||||||
|
*/
|
||||||
|
static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
|
||||||
|
const struct ib_address_vector *av ) {
|
||||||
|
const union ib_gid *gid = &av->gid;
|
||||||
|
unsigned long qpn = av->qpn;
|
||||||
|
struct eoib_peer *peer;
|
||||||
|
|
||||||
|
/* Sanity checks */
|
||||||
|
if ( ! av->gid_present ) {
|
||||||
|
DBGC ( eoib, "EoIB %s %s RX with no GID\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find peer cache entry (if any) */
|
||||||
|
peer = eoib_find_peer ( eoib, mac );
|
||||||
|
if ( ! peer ) {
|
||||||
|
DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do nothing if peer cache entry is complete and correct */
|
||||||
|
if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
|
||||||
|
DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
|
||||||
|
eoib->name, eth_ntoa ( mac ) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update peer cache entry */
|
||||||
|
peer->av.qpn = qpn;
|
||||||
|
peer->av.qkey = eoib->broadcast.qkey;
|
||||||
|
peer->av.gid_present = 1;
|
||||||
|
memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
|
||||||
|
DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
|
||||||
|
eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* EoIB network device
|
||||||
|
*
|
||||||
|
****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transmit packet via EoIB network device
|
||||||
|
*
|
||||||
|
* @v netdev Network device
|
||||||
|
* @v iobuf I/O buffer
|
||||||
|
* @ret rc Return status code
|
||||||
|
*/
|
||||||
|
static int eoib_transmit ( struct net_device *netdev,
|
||||||
|
struct io_buffer *iobuf ) {
|
||||||
|
struct eoib_device *eoib = netdev->priv;
|
||||||
|
struct eoib_header *eoib_hdr;
|
||||||
|
struct ethhdr *ethhdr;
|
||||||
|
struct ib_address_vector *av;
|
||||||
|
size_t zlen;
|
||||||
|
|
||||||
|
/* Sanity checks */
|
||||||
|
assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
|
||||||
|
assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
|
||||||
|
|
||||||
|
/* Look up destination address vector */
|
||||||
|
ethhdr = iobuf->data;
|
||||||
|
av = eoib_tx_av ( eoib, ethhdr->h_dest );
|
||||||
|
|
||||||
|
/* Prepend EoIB header */
|
||||||
|
eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
|
||||||
|
eoib_hdr->magic = htons ( EOIB_MAGIC );
|
||||||
|
eoib_hdr->reserved = 0;
|
||||||
|
|
||||||
|
/* Pad buffer to minimum Ethernet frame size */
|
||||||
|
zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
|
||||||
|
assert ( zlen <= IOB_ZLEN );
|
||||||
|
if ( iob_len ( iobuf ) < zlen )
|
||||||
|
iob_pad ( iobuf, zlen );
|
||||||
|
|
||||||
|
/* If we have no unicast address then send as a broadcast */
|
||||||
|
if ( ! av )
|
||||||
|
av = &eoib->broadcast;
|
||||||
|
|
||||||
|
/* Post send work queue entry */
|
||||||
|
return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle EoIB send completion
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
* @v qp Queue pair
|
||||||
|
* @v iobuf I/O buffer
|
||||||
|
* @v rc Completion status code
|
||||||
|
*/
|
||||||
|
static void eoib_complete_send ( struct ib_device *ibdev __unused,
|
||||||
|
struct ib_queue_pair *qp,
|
||||||
|
struct io_buffer *iobuf, int rc ) {
|
||||||
|
struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
|
||||||
|
|
||||||
|
netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle EoIB receive completion
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
* @v qp Queue pair
|
||||||
|
* @v dest Destination address vector, or NULL
|
||||||
|
* @v source Source address vector, or NULL
|
||||||
|
* @v iobuf I/O buffer
|
||||||
|
* @v rc Completion status code
|
||||||
|
*/
|
||||||
|
static void eoib_complete_recv ( struct ib_device *ibdev __unused,
|
||||||
|
struct ib_queue_pair *qp,
|
||||||
|
struct ib_address_vector *dest __unused,
|
||||||
|
struct ib_address_vector *source,
|
||||||
|
struct io_buffer *iobuf, int rc ) {
|
||||||
|
struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
|
||||||
|
struct net_device *netdev = eoib->netdev;
|
||||||
|
struct eoib_header *eoib_hdr;
|
||||||
|
struct ethhdr *ethhdr;
|
||||||
|
|
||||||
|
/* Record errors */
|
||||||
|
if ( rc != 0 ) {
|
||||||
|
netdev_rx_err ( netdev, iobuf, rc );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sanity check */
|
||||||
|
if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
|
||||||
|
DBGC ( eoib, "EoIB %s received packet too short to "
|
||||||
|
"contain EoIB and Ethernet headers\n", eoib->name );
|
||||||
|
DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
|
||||||
|
netdev_rx_err ( netdev, iobuf, -EIO );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( ! source ) {
|
||||||
|
DBGC ( eoib, "EoIB %s received packet without address "
|
||||||
|
"vector\n", eoib->name );
|
||||||
|
netdev_rx_err ( netdev, iobuf, -ENOTTY );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Strip EoIB header */
|
||||||
|
iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
|
||||||
|
|
||||||
|
/* Update neighbour cache entry, if any */
|
||||||
|
ethhdr = iobuf->data;
|
||||||
|
eoib_rx_av ( eoib, ethhdr->h_source, source );
|
||||||
|
|
||||||
|
/* Hand off to network layer */
|
||||||
|
netdev_rx ( netdev, iobuf );
|
||||||
|
}
|
||||||
|
|
||||||
|
/** EoIB completion operations */
|
||||||
|
static struct ib_completion_queue_operations eoib_cq_op = {
|
||||||
|
.complete_send = eoib_complete_send,
|
||||||
|
.complete_recv = eoib_complete_recv,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** EoIB queue pair operations */
|
||||||
|
static struct ib_queue_pair_operations eoib_qp_op = {
|
||||||
|
.alloc_iob = alloc_iob,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Poll EoIB network device
|
||||||
|
*
|
||||||
|
* @v netdev Network device
|
||||||
|
*/
|
||||||
|
static void eoib_poll ( struct net_device *netdev ) {
|
||||||
|
struct eoib_device *eoib = netdev->priv;
|
||||||
|
struct ib_device *ibdev = eoib->ibdev;
|
||||||
|
|
||||||
|
/* Poll Infiniband device */
|
||||||
|
ib_poll_eq ( ibdev );
|
||||||
|
|
||||||
|
/* Poll the retry timers (required for EoIB multicast join) */
|
||||||
|
retry_poll();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle EoIB broadcast multicast group join completion
|
||||||
|
*
|
||||||
|
* @v membership Multicast group membership
|
||||||
|
* @v rc Status code
|
||||||
|
*/
|
||||||
|
static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
|
||||||
|
struct eoib_device *eoib =
|
||||||
|
container_of ( membership, struct eoib_device, membership );
|
||||||
|
|
||||||
|
/* Record join status as link status */
|
||||||
|
netdev_link_err ( eoib->netdev, rc );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Join EoIB broadcast multicast group
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
* @ret rc Return status code
|
||||||
|
*/
|
||||||
|
static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Join multicast group */
|
||||||
|
if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
|
||||||
|
&eoib->membership, &eoib->broadcast, 0,
|
||||||
|
eoib_join_complete ) ) != 0 ) {
|
||||||
|
DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
|
||||||
|
eoib->name, strerror ( rc ) );
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Leave EoIB broadcast multicast group
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
*/
|
||||||
|
static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
|
||||||
|
|
||||||
|
/* Leave multicast group */
|
||||||
|
ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle link status change
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
*/
|
||||||
|
static void eoib_link_state_changed ( struct eoib_device *eoib ) {
|
||||||
|
struct net_device *netdev = eoib->netdev;
|
||||||
|
struct ib_device *ibdev = eoib->ibdev;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Leave existing broadcast group */
|
||||||
|
if ( eoib->qp )
|
||||||
|
eoib_leave_broadcast_group ( eoib );
|
||||||
|
|
||||||
|
/* Update broadcast GID based on potentially-new partition key */
|
||||||
|
eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
|
||||||
|
|
||||||
|
/* Set net device link state to reflect Infiniband link state */
|
||||||
|
rc = ib_link_rc ( ibdev );
|
||||||
|
netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
|
||||||
|
|
||||||
|
/* Join new broadcast group */
|
||||||
|
if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
|
||||||
|
( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
|
||||||
|
DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
|
||||||
|
"%s\n", eoib->name, strerror ( rc ) );
|
||||||
|
netdev_link_err ( netdev, rc );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open EoIB network device
|
||||||
|
*
|
||||||
|
* @v netdev Network device
|
||||||
|
* @ret rc Return status code
|
||||||
|
*/
|
||||||
|
static int eoib_open ( struct net_device *netdev ) {
|
||||||
|
struct eoib_device *eoib = netdev->priv;
|
||||||
|
struct ib_device *ibdev = eoib->ibdev;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Open IB device */
|
||||||
|
if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
|
||||||
|
DBGC ( eoib, "EoIB %s could not open %s: %s\n",
|
||||||
|
eoib->name, ibdev->name, strerror ( rc ) );
|
||||||
|
goto err_ib_open;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate completion queue */
|
||||||
|
eoib->cq = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op );
|
||||||
|
if ( ! eoib->cq ) {
|
||||||
|
DBGC ( eoib, "EoIB %s could not allocate completion queue\n",
|
||||||
|
eoib->name );
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto err_create_cq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate queue pair */
|
||||||
|
eoib->qp = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
|
||||||
|
eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
|
||||||
|
&eoib_qp_op, netdev->name );
|
||||||
|
if ( ! eoib->qp ) {
|
||||||
|
DBGC ( eoib, "EoIB %s could not allocate queue pair\n",
|
||||||
|
eoib->name );
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto err_create_qp;
|
||||||
|
}
|
||||||
|
ib_qp_set_ownerdata ( eoib->qp, eoib );
|
||||||
|
|
||||||
|
/* Fill receive rings */
|
||||||
|
ib_refill_recv ( ibdev, eoib->qp );
|
||||||
|
|
||||||
|
/* Fake a link status change to join the broadcast group */
|
||||||
|
eoib_link_state_changed ( eoib );
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ib_destroy_qp ( ibdev, eoib->qp );
|
||||||
|
eoib->qp = NULL;
|
||||||
|
err_create_qp:
|
||||||
|
ib_destroy_cq ( ibdev, eoib->cq );
|
||||||
|
eoib->cq = NULL;
|
||||||
|
err_create_cq:
|
||||||
|
ib_close ( ibdev );
|
||||||
|
err_ib_open:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close EoIB network device
|
||||||
|
*
|
||||||
|
* @v netdev Network device
|
||||||
|
*/
|
||||||
|
static void eoib_close ( struct net_device *netdev ) {
|
||||||
|
struct eoib_device *eoib = netdev->priv;
|
||||||
|
struct ib_device *ibdev = eoib->ibdev;
|
||||||
|
|
||||||
|
/* Flush peer cache */
|
||||||
|
eoib_flush_peers ( eoib );
|
||||||
|
|
||||||
|
/* Leave broadcast group */
|
||||||
|
eoib_leave_broadcast_group ( eoib );
|
||||||
|
|
||||||
|
/* Tear down the queues */
|
||||||
|
ib_destroy_qp ( ibdev, eoib->qp );
|
||||||
|
eoib->qp = NULL;
|
||||||
|
ib_destroy_cq ( ibdev, eoib->cq );
|
||||||
|
eoib->cq = NULL;
|
||||||
|
|
||||||
|
/* Close IB device */
|
||||||
|
ib_close ( ibdev );
|
||||||
|
}
|
||||||
|
|
||||||
|
/** EoIB network device operations */
|
||||||
|
static struct net_device_operations eoib_operations = {
|
||||||
|
.open = eoib_open,
|
||||||
|
.close = eoib_close,
|
||||||
|
.transmit = eoib_transmit,
|
||||||
|
.poll = eoib_poll,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create EoIB device
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
* @v hw_addr Ethernet MAC
|
||||||
|
* @v broadcast Broadcast address vector
|
||||||
|
* @v name Interface name (or NULL to use default)
|
||||||
|
* @ret rc Return status code
|
||||||
|
*/
|
||||||
|
int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
|
||||||
|
struct ib_address_vector *broadcast, const char *name ) {
|
||||||
|
struct net_device *netdev;
|
||||||
|
struct eoib_device *eoib;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Allocate network device */
|
||||||
|
netdev = alloc_etherdev ( sizeof ( *eoib ) );
|
||||||
|
if ( ! netdev ) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto err_alloc;
|
||||||
|
}
|
||||||
|
netdev_init ( netdev, &eoib_operations );
|
||||||
|
eoib = netdev->priv;
|
||||||
|
netdev->dev = ibdev->dev;
|
||||||
|
eoib->netdev = netdev;
|
||||||
|
eoib->ibdev = ibdev_get ( ibdev );
|
||||||
|
memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
|
||||||
|
INIT_LIST_HEAD ( &eoib->peers );
|
||||||
|
|
||||||
|
/* Set MAC address */
|
||||||
|
memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
|
||||||
|
|
||||||
|
/* Set interface name, if applicable */
|
||||||
|
if ( name )
|
||||||
|
snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
|
||||||
|
eoib->name = netdev->name;
|
||||||
|
|
||||||
|
/* Add to list of EoIB devices */
|
||||||
|
list_add_tail ( &eoib->list, &eoib_devices );
|
||||||
|
|
||||||
|
/* Register network device */
|
||||||
|
if ( ( rc = register_netdev ( netdev ) ) != 0 )
|
||||||
|
goto err_register;
|
||||||
|
|
||||||
|
DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
|
||||||
|
eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
|
||||||
|
DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
|
||||||
|
eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unregister_netdev ( netdev );
|
||||||
|
err_register:
|
||||||
|
list_del ( &eoib->list );
|
||||||
|
ibdev_put ( ibdev );
|
||||||
|
netdev_nullify ( netdev );
|
||||||
|
netdev_put ( netdev );
|
||||||
|
err_alloc:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find EoIB device
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
* @v hw_addr Original Ethernet MAC
|
||||||
|
* @ret eoib EoIB device
|
||||||
|
*/
|
||||||
|
struct eoib_device * eoib_find ( struct ib_device *ibdev,
|
||||||
|
const uint8_t *hw_addr ) {
|
||||||
|
struct eoib_device *eoib;
|
||||||
|
|
||||||
|
list_for_each_entry ( eoib, &eoib_devices, list ) {
|
||||||
|
if ( ( eoib->ibdev == ibdev ) &&
|
||||||
|
( memcmp ( eoib->netdev->hw_addr, hw_addr,
|
||||||
|
ETH_ALEN ) == 0 ) )
|
||||||
|
return eoib;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove EoIB device
|
||||||
|
*
|
||||||
|
* @v eoib EoIB device
|
||||||
|
*/
|
||||||
|
void eoib_destroy ( struct eoib_device *eoib ) {
|
||||||
|
struct net_device *netdev = eoib->netdev;
|
||||||
|
|
||||||
|
/* Unregister network device */
|
||||||
|
unregister_netdev ( netdev );
|
||||||
|
|
||||||
|
/* Remove from list of network devices */
|
||||||
|
list_del ( &eoib->list );
|
||||||
|
|
||||||
|
/* Drop reference to Infiniband device */
|
||||||
|
ibdev_put ( eoib->ibdev );
|
||||||
|
|
||||||
|
/* Free network device */
|
||||||
|
DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
|
||||||
|
netdev_nullify ( netdev );
|
||||||
|
netdev_put ( netdev );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Probe EoIB device
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
* @ret rc Return status code
|
||||||
|
*/
|
||||||
|
static int eoib_probe ( struct ib_device *ibdev __unused ) {
|
||||||
|
|
||||||
|
/* EoIB devices are not created automatically */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle device or link status change
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
*/
|
||||||
|
static void eoib_notify ( struct ib_device *ibdev ) {
|
||||||
|
struct eoib_device *eoib;
|
||||||
|
|
||||||
|
/* Handle link status change for any attached EoIB devices */
|
||||||
|
list_for_each_entry ( eoib, &eoib_devices, list ) {
|
||||||
|
if ( eoib->ibdev != ibdev )
|
||||||
|
continue;
|
||||||
|
eoib_link_state_changed ( eoib );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove EoIB device
|
||||||
|
*
|
||||||
|
* @v ibdev Infiniband device
|
||||||
|
*/
|
||||||
|
static void eoib_remove ( struct ib_device *ibdev ) {
|
||||||
|
struct eoib_device *eoib;
|
||||||
|
struct eoib_device *tmp;
|
||||||
|
|
||||||
|
/* Remove any attached EoIB devices */
|
||||||
|
list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
|
||||||
|
if ( eoib->ibdev != ibdev )
|
||||||
|
continue;
|
||||||
|
eoib_destroy ( eoib );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** EoIB driver */
|
||||||
|
struct ib_driver eoib_driver __ib_driver = {
|
||||||
|
.name = "EoIB",
|
||||||
|
.probe = eoib_probe,
|
||||||
|
.notify = eoib_notify,
|
||||||
|
.remove = eoib_remove,
|
||||||
|
};
|
|
@ -0,0 +1,60 @@
|
||||||
|
#ifndef _IPXE_EOIB_H
|
||||||
|
#define _IPXE_EOIB_H
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
*
|
||||||
|
* Ethernet over Infiniband
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <byteswap.h>
|
||||||
|
#include <ipxe/netdevice.h>
|
||||||
|
#include <ipxe/infiniband.h>
|
||||||
|
#include <ipxe/ib_mcast.h>
|
||||||
|
|
||||||
|
/** An EoIB header */
|
||||||
|
struct eoib_header {
|
||||||
|
/** Signature */
|
||||||
|
uint16_t magic;
|
||||||
|
/** Reserved */
|
||||||
|
uint16_t reserved;
|
||||||
|
} __attribute__ (( packed ));
|
||||||
|
|
||||||
|
/** EoIB magic signature */
|
||||||
|
#define EOIB_MAGIC 0x8919
|
||||||
|
|
||||||
|
/** An EoIB device */
|
||||||
|
struct eoib_device {
|
||||||
|
/** Name */
|
||||||
|
const char *name;
|
||||||
|
/** Network device */
|
||||||
|
struct net_device *netdev;
|
||||||
|
/** Underlying Infiniband device */
|
||||||
|
struct ib_device *ibdev;
|
||||||
|
/** List of EoIB devices */
|
||||||
|
struct list_head list;
|
||||||
|
/** Broadcast address */
|
||||||
|
struct ib_address_vector broadcast;
|
||||||
|
|
||||||
|
/** Completion queue */
|
||||||
|
struct ib_completion_queue *cq;
|
||||||
|
/** Queue pair */
|
||||||
|
struct ib_queue_pair *qp;
|
||||||
|
/** Broadcast group membership */
|
||||||
|
struct ib_mc_membership membership;
|
||||||
|
|
||||||
|
/** Peer cache */
|
||||||
|
struct list_head peers;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
|
||||||
|
struct ib_address_vector *broadcast,
|
||||||
|
const char *name );
|
||||||
|
extern struct eoib_device * eoib_find ( struct ib_device *ibdev,
|
||||||
|
const uint8_t *hw_addr );
|
||||||
|
extern void eoib_destroy ( struct eoib_device *eoib );
|
||||||
|
|
||||||
|
#endif /* _IPXE_EOIB_H */
|
|
@ -185,6 +185,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||||
#define ERRFILE_intelxvf ( ERRFILE_DRIVER | 0x00790000 )
|
#define ERRFILE_intelxvf ( ERRFILE_DRIVER | 0x00790000 )
|
||||||
#define ERRFILE_smsc95xx ( ERRFILE_DRIVER | 0x007a0000 )
|
#define ERRFILE_smsc95xx ( ERRFILE_DRIVER | 0x007a0000 )
|
||||||
#define ERRFILE_acm ( ERRFILE_DRIVER | 0x007b0000 )
|
#define ERRFILE_acm ( ERRFILE_DRIVER | 0x007b0000 )
|
||||||
|
#define ERRFILE_eoib ( ERRFILE_DRIVER | 0x007c0000 )
|
||||||
|
|
||||||
#define ERRFILE_aoe ( ERRFILE_NET | 0x00000000 )
|
#define ERRFILE_aoe ( ERRFILE_NET | 0x00000000 )
|
||||||
#define ERRFILE_arp ( ERRFILE_NET | 0x00010000 )
|
#define ERRFILE_arp ( ERRFILE_NET | 0x00010000 )
|
||||||
|
|
Loading…
Reference in New Issue