set errno values for the new built-in UTF8 conversion functions

master
szaka 2008-12-22 00:11:28 +00:00
parent 0017cc31df
commit 704519c75e
1 changed files with 72 additions and 38 deletions

View File

@ -407,12 +407,14 @@ int ntfs_file_values_compare(const FILE_NAME_ATTR *file_name_attr1,
*/
/*
* Return the amount of 8-bit elements in UTF-8 needed (without
* the terminating null) to store a given UTF-16LE string.
* Return the amount of 8-bit elements in UTF-8 needed (without the terminating
* null) to store a given UTF-16LE string.
*
* Return -1 with errno set if string has invalid byte sequence or too long.
*/
static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
{
int i;
int i, ret = -1;
int count = 0;
BOOL surrog;
@ -423,7 +425,8 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
if ((c >= 0xdc00) && (c < 0xe000)) {
surrog = FALSE;
count += 4;
} else goto fail;
} else
goto fail;
} else
if (c < 0x80)
count++;
@ -439,14 +442,22 @@ static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_l
else if (c >= 0xe000)
#endif
count += 3;
else goto fail;
if (count > outs_len)
goto fail;
else
goto fail;
if (count > outs_len) {
errno = ENAMETOOLONG;
goto out;
}
}
if (surrog) goto fail;
return count;
if (surrog)
goto fail;
ret = count;
out:
return ret;
fail:
return -1;
errno = EILSEQ;
goto out;
}
/*
@ -455,12 +466,14 @@ fail:
* @ins_len: length of input string in utf16 characters
* @outs: on return contains the (allocated) output multibyte string
* @outs_len: length of output buffer in bytes
*
* Return -1 with errno set if string has invalid byte sequence or too long.
*/
static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
char **outs, int outs_len)
{
char *t;
int i, size;
int i, size, ret = -1;
ntfschar halfpair;
halfpair = 0;
@ -469,10 +482,9 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
size = utf16_to_utf8_size(ins, ins_len, outs_len);
if (size < 0) {
errno = ENAMETOOLONG;
goto fail;
}
if (size < 0)
goto out;
if (!*outs)
*outs = ntfs_malloc((outs_len = size + 1));
@ -488,7 +500,8 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
*t++ = 0x80 + (c & 63);
halfpair = 0;
} else goto fail;
} else
goto fail;
} else if (c < 0x80) {
*t++ = c;
} else {
@ -505,51 +518,70 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
*t++ = 0xe0 | (c >> 12);
*t++ = 0x80 | ((c >> 6) & 0x3f);
*t++ = 0x80 | (c & 0x3f);
} else goto fail;
} else
goto fail;
}
}
*t = '\0';
return t - *outs;
ret = t - *outs;
out:
return ret;
fail:
return -1;
errno = EILSEQ;
goto out;
}
/*
* Return the amount of 16-bit elements in UTF-16LE needed
* (without the terminating null) to store given UTF-8 string.
*
* Return -1 if it does not fit into PATH_MAX.
* Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
*
* Note: This does not check whether the input sequence is a valid utf8 string,
* and should be used only in context where such check is made!
*/
static int utf8_to_utf16_size(const char *s)
{
int ret = -1;
unsigned int byte;
size_t count = 0;
while ((byte = *((const unsigned char *)s++))) {
if (++count >= PATH_MAX || byte >= 0xF5)
if (++count >= PATH_MAX)
goto fail;
if (!*s) break;
if (byte >= 0xC0) s++;
if (!*s) break;
if (byte >= 0xE0) s++;
if (!*s) break;
if (byte >= 0xF5) {
errno = EILSEQ;
goto out;
}
if (!*s)
break;
if (byte >= 0xC0)
s++;
if (!*s)
break;
if (byte >= 0xE0)
s++;
if (!*s)
break;
if (byte >= 0xF0) {
s++;
if (++count >= PATH_MAX)
goto fail;
}
}
return count;
ret = count;
out:
return ret;
fail:
return -1;
errno = ENAMETOOLONG;
goto out;
}
/*
* This converts one UTF-8 sequence to cpu-endian Unicode value
* within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
* Returns the number of used utf8 bytes or -1 if sequence is invalid.
*
* Return the number of used utf8 bytes or -1 with errno set
* if sequence is invalid.
*/
static int utf8_to_unicode(u32 *wc, const char *s)
{
@ -611,6 +643,7 @@ static int utf8_to_unicode(u32 *wc, const char *s)
goto fail;
}
fail:
errno = EILSEQ;
return -1;
}
@ -619,18 +652,20 @@ fail:
* @ins: input multibyte string buffer
* @outs: on return contains the (allocated) output utf16 string
* @outs_len: length of output buffer in utf16 characters
*
* Return -1 with errno set.
*/
static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
{
const char *t = ins;
u32 wc;
ntfschar *outpos;
int shorts = utf8_to_utf16_size(ins);
int shorts, ret = -1;
if (shorts < 0) {
errno = EILSEQ;
shorts = utf8_to_utf16_size(ins);
if (shorts < 0)
goto fail;
}
if (!*outs)
*outs = ntfs_malloc((shorts+1) * sizeof(ntfschar));
@ -638,10 +673,8 @@ static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
while(1) {
int m = utf8_to_unicode(&wc, t);
if (m < 0) {
errno = EILSEQ;
if (m < 0)
goto fail;
}
if (wc < 0x10000)
*outpos++ = cpu_to_le16(wc);
else {
@ -653,9 +686,10 @@ static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
break;
t += m;
}
return --outpos - *outs;
ret = --outpos - *outs;
fail:
return -1;
return ret;
}
/**