mirror of https://github.com/ipxe/ipxe.git
[arm] Add optimised TCP/IP checksumming for 64-bit ARM
Signed-off-by: Michael Brown <mcb30@ipxe.org>pull/54/head^2
parent
95716ece91
commit
47931a4de5
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*
|
||||
* You can also choose to distribute this program under the terms of
|
||||
* the Unmodified Binary Distribution Licence (as given in the file
|
||||
* COPYING.UBDL), provided that you have satisfied its requirements.
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
/** @file
|
||||
*
|
||||
* TCP/IP checksum
|
||||
*
|
||||
*/
|
||||
|
||||
#include <strings.h>
|
||||
#include <ipxe/tcpip.h>
|
||||
|
||||
/** Alignment used by main checksumming loop */
|
||||
#define TCPIP_CHKSUM_ALIGN 16
|
||||
|
||||
/** Number of steps in each iteration of the unrolled main checksumming loop */
|
||||
#define TCPIP_CHKSUM_UNROLL 4
|
||||
|
||||
/**
|
||||
* Calculate continued TCP/IP checkum
|
||||
*
|
||||
* @v sum Checksum of already-summed data, in network byte order
|
||||
* @v data Data buffer
|
||||
* @v len Length of data buffer
|
||||
* @ret sum Updated checksum, in network byte order
|
||||
*/
|
||||
uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
|
||||
size_t len ) {
|
||||
intptr_t start;
|
||||
intptr_t end;
|
||||
intptr_t mid;
|
||||
unsigned int pre;
|
||||
unsigned int post;
|
||||
unsigned int first;
|
||||
uint64_t discard_low;
|
||||
uint64_t discard_high;
|
||||
|
||||
/* Avoid potentially undefined shift operation */
|
||||
if ( len == 0 )
|
||||
return sum;
|
||||
|
||||
/* Find maximally-aligned midpoint. For short blocks of data,
|
||||
* this may be aligned to fewer than 16 bytes.
|
||||
*/
|
||||
start = ( ( intptr_t ) data );
|
||||
end = ( start + len );
|
||||
mid = ( end &
|
||||
~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
|
||||
|
||||
/* Calculate pre- and post-alignment lengths */
|
||||
pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
|
||||
post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
|
||||
|
||||
/* Calculate number of steps in first iteration of unrolled loop */
|
||||
first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
|
||||
( TCPIP_CHKSUM_UNROLL - 1 ) );
|
||||
|
||||
/* Calculate checksum */
|
||||
__asm__ ( /* Invert sum */
|
||||
"eor %w0, %w0, #0xffff\n\t"
|
||||
/* Clear carry flag */
|
||||
"cmn xzr, xzr\n\t"
|
||||
/* Byteswap and sum pre-alignment byte, if applicable */
|
||||
"tbz %w4, #0, 1f\n\t"
|
||||
"ldrb %w2, [%1], #1\n\t"
|
||||
"rev16 %w0, %w0\n\t"
|
||||
"rev16 %w2, %w2\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum pre-alignment halfword, if applicable */
|
||||
"tbz %w4, #1, 1f\n\t"
|
||||
"ldrh %w2, [%1], #2\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum pre-alignment word, if applicable */
|
||||
"tbz %w4, #2, 1f\n\t"
|
||||
"ldr %w2, [%1], #4\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum pre-alignment doubleword, if applicable */
|
||||
"tbz %w4, #3, 1f\n\t"
|
||||
"ldr %2, [%1], #8\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Jump into unrolled (x4) main loop */
|
||||
"adr %2, 2f\n\t"
|
||||
"sub %2, %2, %5, lsl #3\n\t"
|
||||
"sub %2, %2, %5, lsl #2\n\t"
|
||||
"br %2\n\t"
|
||||
"\n1:\n\t"
|
||||
"ldp %2, %3, [%1], #16\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"adcs %0, %0, %3\n\t"
|
||||
"ldp %2, %3, [%1], #16\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"adcs %0, %0, %3\n\t"
|
||||
"ldp %2, %3, [%1], #16\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"adcs %0, %0, %3\n\t"
|
||||
"ldp %2, %3, [%1], #16\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"adcs %0, %0, %3\n\t"
|
||||
"\n2:\n\t"
|
||||
"sub %2, %1, %6\n\t"
|
||||
"cbnz %2, 1b\n\t"
|
||||
/* Sum post-alignment doubleword, if applicable */
|
||||
"tbz %w7, #3, 1f\n\t"
|
||||
"ldr %2, [%1], #8\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum post-alignment word, if applicable */
|
||||
"tbz %w7, #2, 1f\n\t"
|
||||
"ldr %w2, [%1], #4\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum post-alignment halfword, if applicable */
|
||||
"tbz %w7, #1, 1f\n\t"
|
||||
"ldrh %w2, [%1], #2\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Sum post-alignment byte, if applicable */
|
||||
"tbz %w7, #0, 1f\n\t"
|
||||
"ldrb %w2, [%1], #1\n\t"
|
||||
"adcs %0, %0, %2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Fold down to a uint32_t plus carry flag */
|
||||
"lsr %2, %0, #32\n\t"
|
||||
"adcs %w0, %w0, %w2\n\t"
|
||||
/* Fold down to a uint16_t plus carry in bit 16 */
|
||||
"ubfm %2, %0, #0, #15\n\t"
|
||||
"ubfm %3, %0, #16, #31\n\t"
|
||||
"adc %w0, %w2, %w3\n\t"
|
||||
/* Fold down to a uint16_t */
|
||||
"tbz %w0, #16, 1f\n\t"
|
||||
"mov %w2, #0xffff\n\t"
|
||||
"sub %w0, %w0, %w2\n\t"
|
||||
"tbz %w0, #16, 1f\n\t"
|
||||
"sub %w0, %w0, %w2\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Byteswap back, if applicable */
|
||||
"tbz %w4, #0, 1f\n\t"
|
||||
"rev16 %w0, %w0\n\t"
|
||||
"\n1:\n\t"
|
||||
/* Invert sum */
|
||||
"eor %w0, %w0, #0xffff\n\t"
|
||||
: "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
|
||||
"=&r" ( discard_high )
|
||||
: "r" ( pre ), "r" ( first ), "r" ( end - post ),
|
||||
"r" ( post )
|
||||
: "cc" );
|
||||
|
||||
return sum;
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef _BITS_TCPIP_H
|
||||
#define _BITS_TCPIP_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Transport-network layer interface
|
||||
*
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
|
||||
size_t len );
|
||||
|
||||
#endif /* _BITS_TCPIP_H */
|
Loading…
Reference in New Issue