From 47931a4de53ccdeda061c59aa0919f152cf0dfdf Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 10 May 2016 17:13:05 +0100 Subject: [PATCH] [arm] Add optimised TCP/IP checksumming for 64-bit ARM Signed-off-by: Michael Brown --- src/arch/{arm => arm32}/include/bits/tcpip.h | 0 src/arch/arm64/core/arm64_tcpip.c | 175 +++++++++++++++++++ src/arch/arm64/include/bits/tcpip.h | 15 ++ 3 files changed, 190 insertions(+) rename src/arch/{arm => arm32}/include/bits/tcpip.h (100%) create mode 100644 src/arch/arm64/core/arm64_tcpip.c create mode 100644 src/arch/arm64/include/bits/tcpip.h diff --git a/src/arch/arm/include/bits/tcpip.h b/src/arch/arm32/include/bits/tcpip.h similarity index 100% rename from src/arch/arm/include/bits/tcpip.h rename to src/arch/arm32/include/bits/tcpip.h diff --git a/src/arch/arm64/core/arm64_tcpip.c b/src/arch/arm64/core/arm64_tcpip.c new file mode 100644 index 000000000..0ef04ea42 --- /dev/null +++ b/src/arch/arm64/core/arm64_tcpip.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2016 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +/** @file + * + * TCP/IP checksum + * + */ + +#include +#include + +/** Alignment used by main checksumming loop */ +#define TCPIP_CHKSUM_ALIGN 16 + +/** Number of steps in each iteration of the unrolled main checksumming loop */ +#define TCPIP_CHKSUM_UNROLL 4 + +/** + * Calculate continued TCP/IP checkum + * + * @v sum Checksum of already-summed data, in network byte order + * @v data Data buffer + * @v len Length of data buffer + * @ret sum Updated checksum, in network byte order + */ +uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data, + size_t len ) { + intptr_t start; + intptr_t end; + intptr_t mid; + unsigned int pre; + unsigned int post; + unsigned int first; + uint64_t discard_low; + uint64_t discard_high; + + /* Avoid potentially undefined shift operation */ + if ( len == 0 ) + return sum; + + /* Find maximally-aligned midpoint. For short blocks of data, + * this may be aligned to fewer than 16 bytes. + */ + start = ( ( intptr_t ) data ); + end = ( start + len ); + mid = ( end & + ~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) ); + + /* Calculate pre- and post-alignment lengths */ + pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) ); + post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) ); + + /* Calculate number of steps in first iteration of unrolled loop */ + first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) & + ( TCPIP_CHKSUM_UNROLL - 1 ) ); + + /* Calculate checksum */ + __asm__ ( /* Invert sum */ + "eor %w0, %w0, #0xffff\n\t" + /* Clear carry flag */ + "cmn xzr, xzr\n\t" + /* Byteswap and sum pre-alignment byte, if applicable */ + "tbz %w4, #0, 1f\n\t" + "ldrb %w2, [%1], #1\n\t" + "rev16 %w0, %w0\n\t" + "rev16 %w2, %w2\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum pre-alignment halfword, if applicable */ + "tbz %w4, #1, 1f\n\t" + "ldrh %w2, [%1], #2\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum pre-alignment word, if applicable */ + "tbz %w4, #2, 1f\n\t" + "ldr %w2, [%1], #4\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum pre-alignment doubleword, if applicable */ + "tbz %w4, #3, 1f\n\t" + "ldr %2, [%1], #8\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Jump into unrolled (x4) main loop */ + "adr %2, 2f\n\t" + "sub %2, %2, %5, lsl #3\n\t" + "sub %2, %2, %5, lsl #2\n\t" + "br %2\n\t" + "\n1:\n\t" + "ldp %2, %3, [%1], #16\n\t" + "adcs %0, %0, %2\n\t" + "adcs %0, %0, %3\n\t" + "ldp %2, %3, [%1], #16\n\t" + "adcs %0, %0, %2\n\t" + "adcs %0, %0, %3\n\t" + "ldp %2, %3, [%1], #16\n\t" + "adcs %0, %0, %2\n\t" + "adcs %0, %0, %3\n\t" + "ldp %2, %3, [%1], #16\n\t" + "adcs %0, %0, %2\n\t" + "adcs %0, %0, %3\n\t" + "\n2:\n\t" + "sub %2, %1, %6\n\t" + "cbnz %2, 1b\n\t" + /* Sum post-alignment doubleword, if applicable */ + "tbz %w7, #3, 1f\n\t" + "ldr %2, [%1], #8\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum post-alignment word, if applicable */ + "tbz %w7, #2, 1f\n\t" + "ldr %w2, [%1], #4\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum post-alignment halfword, if applicable */ + "tbz %w7, #1, 1f\n\t" + "ldrh %w2, [%1], #2\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Sum post-alignment byte, if applicable */ + "tbz %w7, #0, 1f\n\t" + "ldrb %w2, [%1], #1\n\t" + "adcs %0, %0, %2\n\t" + "\n1:\n\t" + /* Fold down to a uint32_t plus carry flag */ + "lsr %2, %0, #32\n\t" + "adcs %w0, %w0, %w2\n\t" + /* Fold down to a uint16_t plus carry in bit 16 */ + "ubfm %2, %0, #0, #15\n\t" + "ubfm %3, %0, #16, #31\n\t" + "adc %w0, %w2, %w3\n\t" + /* Fold down to a uint16_t */ + "tbz %w0, #16, 1f\n\t" + "mov %w2, #0xffff\n\t" + "sub %w0, %w0, %w2\n\t" + "tbz %w0, #16, 1f\n\t" + "sub %w0, %w0, %w2\n\t" + "\n1:\n\t" + /* Byteswap back, if applicable */ + "tbz %w4, #0, 1f\n\t" + "rev16 %w0, %w0\n\t" + "\n1:\n\t" + /* Invert sum */ + "eor %w0, %w0, #0xffff\n\t" + : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ), + "=&r" ( discard_high ) + : "r" ( pre ), "r" ( first ), "r" ( end - post ), + "r" ( post ) + : "cc" ); + + return sum; +} diff --git a/src/arch/arm64/include/bits/tcpip.h b/src/arch/arm64/include/bits/tcpip.h new file mode 100644 index 000000000..68686534e --- /dev/null +++ b/src/arch/arm64/include/bits/tcpip.h @@ -0,0 +1,15 @@ +#ifndef _BITS_TCPIP_H +#define _BITS_TCPIP_H + +/** @file + * + * Transport-network layer interface + * + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data, + size_t len ); + +#endif /* _BITS_TCPIP_H */