mirror of https://github.com/ipxe/ipxe.git
[utf8] Add UTF-8 accumulation self-tests
Signed-off-by: Michael Brown <mcb30@ipxe.org>pull/631/head
parent
3cd3a73261
commit
7e9631b60f
|
@ -75,3 +75,4 @@ REQUIRE_OBJECT ( pem_test );
|
|||
REQUIRE_OBJECT ( ntlm_test );
|
||||
REQUIRE_OBJECT ( zlib_test );
|
||||
REQUIRE_OBJECT ( gzip_test );
|
||||
REQUIRE_OBJECT ( utf8_test );
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*
|
||||
* You can also choose to distribute this program under the terms of
|
||||
* the Unmodified Binary Distribution Licence (as given in the file
|
||||
* COPYING.UBDL), provided that you have satisfied its requirements.
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
/** @file
|
||||
*
|
||||
* UTF-8 Unicode encoding tests
|
||||
*
|
||||
*/
|
||||
|
||||
/* Forcibly enable assertions */
|
||||
#undef NDEBUG
|
||||
|
||||
#include <string.h>
|
||||
#include <ipxe/utf8.h>
|
||||
#include <ipxe/test.h>
|
||||
|
||||
/** A UTF-8 accumulation test */
|
||||
struct utf8_accumulate_test {
|
||||
/** UTF-8 byte string */
|
||||
const char *bytes;
|
||||
/** Expected character sequence */
|
||||
const unsigned int *expected;
|
||||
/** Length */
|
||||
size_t len;
|
||||
};
|
||||
|
||||
/** Define inline data */
|
||||
#define DATA(...) { __VA_ARGS__ }
|
||||
|
||||
/** Define a UTF-8 accumulation test */
|
||||
#define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \
|
||||
static const char name ## _bytes[] = BYTES; \
|
||||
static const unsigned int name ## _expected[] = EXPECTED; \
|
||||
static struct utf8_accumulate_test name = { \
|
||||
.bytes = name ## _bytes, \
|
||||
.expected = name ## _expected, \
|
||||
.len = ( sizeof ( name ## _expected ) / \
|
||||
sizeof ( name ## _expected[0] ) ), \
|
||||
};
|
||||
|
||||
/** Basic ASCII test */
|
||||
UTF8_ACCUMULATE ( ascii, "Hello world!",
|
||||
DATA ( 'H', 'e', 'l', 'l', 'o', ' ',
|
||||
'w', 'o', 'r', 'l', 'd', '!' ) );
|
||||
|
||||
/** Multi-byte character test */
|
||||
UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳",
|
||||
DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ',
|
||||
'w', 0, L'ö', 'r', 'l', 'd', ' ',
|
||||
0, 0, 0, 0x1f973 ) );
|
||||
|
||||
/** Stray continuation byte test */
|
||||
UTF8_ACCUMULATE ( stray_continuation,
|
||||
DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ),
|
||||
DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) );
|
||||
|
||||
/** Missing continuation byte test */
|
||||
UTF8_ACCUMULATE ( missing_continuation,
|
||||
DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ),
|
||||
DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) );
|
||||
|
||||
/** Illegal two-byte sequence test */
|
||||
UTF8_ACCUMULATE ( illegal_two,
|
||||
DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80,
|
||||
'd' ),
|
||||
DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) );
|
||||
|
||||
/** Illegal three-byte sequence test */
|
||||
UTF8_ACCUMULATE ( illegal_three,
|
||||
DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c',
|
||||
0xe0, 0x80, 0x80, 'd' ),
|
||||
DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c',
|
||||
0, 0, 0xfffd, 'd' ) );
|
||||
|
||||
/** Illegal four-byte sequence test */
|
||||
UTF8_ACCUMULATE ( illegal_four,
|
||||
DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf,
|
||||
0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ),
|
||||
DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c',
|
||||
0, 0, 0, 0xfffd, 'd' ) );
|
||||
|
||||
/** Illegal overlength sequence test */
|
||||
UTF8_ACCUMULATE ( illegal_length,
|
||||
DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf,
|
||||
0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf,
|
||||
0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf,
|
||||
0xbf, 0xbf, 0xbf, 'e' ),
|
||||
DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b',
|
||||
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c',
|
||||
0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
||||
0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
||||
0xfffd, 0xfffd, 0xfffd, 'e' ) );
|
||||
|
||||
/**
|
||||
* Report UTF-8 accumulation test result
|
||||
*
|
||||
* @v test UTF-8 accumulation test
|
||||
* @v file Test code file
|
||||
* @v line Test code line
|
||||
*/
|
||||
static void utf8_accumulate_okx ( struct utf8_accumulate_test *test,
|
||||
const char *file, unsigned int line ) {
|
||||
struct utf8_accumulator utf8;
|
||||
unsigned int character;
|
||||
unsigned int i;
|
||||
|
||||
/* Initialise accumulator */
|
||||
memset ( &utf8, 0, sizeof ( utf8 ) );
|
||||
|
||||
/* Test each byte in turn */
|
||||
for ( i = 0 ; i < test->len ; i++ ) {
|
||||
character = utf8_accumulate ( &utf8, test->bytes[i] );
|
||||
DBGC ( test, "UTF8 byte %02x character %02x\n",
|
||||
test->bytes[i], character );
|
||||
okx ( character == test->expected[i], file, line );
|
||||
}
|
||||
}
|
||||
#define utf8_accumulate_ok( test ) \
|
||||
utf8_accumulate_okx ( test, __FILE__, __LINE__ )
|
||||
|
||||
/**
|
||||
* Perform UTF-8 self-test
|
||||
*
|
||||
*/
|
||||
static void utf8_test_exec ( void ) {
|
||||
|
||||
/* Accumulation tests */
|
||||
utf8_accumulate_ok ( &ascii );
|
||||
utf8_accumulate_ok ( &multibyte );
|
||||
utf8_accumulate_ok ( &stray_continuation );
|
||||
utf8_accumulate_ok ( &missing_continuation );
|
||||
utf8_accumulate_ok ( &illegal_two );
|
||||
utf8_accumulate_ok ( &illegal_three );
|
||||
utf8_accumulate_ok ( &illegal_four );
|
||||
utf8_accumulate_ok ( &illegal_length );
|
||||
}
|
||||
|
||||
/** UTF-8 self-test */
|
||||
struct self_test utf8_test __self_test = {
|
||||
.name = "utf8",
|
||||
.exec = utf8_test_exec,
|
||||
};
|
Loading…
Reference in New Issue