Mac OS X Unicode normalization form conversion (Erik Larsson)

master
szaka 2009-05-21 19:42:36 +00:00
parent cfd23388b9
commit 955b1f3e18
4 changed files with 242 additions and 0 deletions

View File

@ -113,6 +113,18 @@ AC_ARG_ENABLE(
[enable_device_default_io_ops="yes"]
)
AC_ARG_ENABLE(
[nfconv],
[AS_HELP_STRING([--disable-nfconv],[disable the 'nfconv' patch, which adds support for Unicode normalization form conversion when built on Mac OS X @<:@default=enabled for Mac OS X@:>@])],
[enable_nfconv="no"],
[
case "${target_os}" in
darwin*) enable_nfconv="yes" ;;
*) enable_nfconv="no" ;;
esac
]
)
# pthread_rwlock_t requires _GNU_SOURCE
AC_GNU_SOURCE
@ -263,6 +275,26 @@ AC_CHECK_MEMBERS([struct stat.st_rdev])
AC_CHECK_MEMBERS([struct stat.st_atim])
AC_CHECK_MEMBERS([struct stat.st_atimespec])
# For the 'nfconv' patch (Mac OS X only):
case "${target_os}" in
darwin*)
if test "${enable_nfconv}" = "yes"; then
AC_CHECK_HEADER(
[CoreFoundation/CoreFoundation.h],
[
LDFLAGS="${LDFLAGS} -framework CoreFoundation"
AC_DEFINE(
[ENABLE_NFCONV],
[1],
[Define to 1 if the nfconv patch should be enabled]
)
],
AC_MSG_ERROR([[Cannot find CoreFoundation required for 'nfconv' functionality Mac OS X. You may use the --disable-nfconv 'configure' option to avoid this error.]])
)
fi
;;
esac
# Checks for library functions.
AC_FUNC_GETMNTENT
AC_FUNC_MBRTOWC

View File

@ -65,5 +65,45 @@ extern ntfschar *ntfs_str2ucs(const char *s, int *len);
extern void ntfs_ucsfree(ntfschar *ucs);
#if defined(__APPLE__) || defined(__DARWIN__)
/**
* Mac OS X only.
*
* Sets file name Unicode normalization form conversion on or off.
* normalize=0 : Off
* normalize=1 : On
* If set to on, all filenames returned by ntfs-3g will be converted to the NFD
* normalization form, while all filenames recieved by ntfs-3g will be converted to the NFC
* normalization form. Since Windows and most other OS:es use the NFC form while Mac OS X
* mostly uses NFD, this conversion increases compatibility between Mac applications and
* NTFS-3G.
*
* @param normalize decides whether or not the string functions will do automatic filename
* normalization when converting to and from UTF-8. 0 means normalization is disabled,
* 1 means it is enabled.
* @return -1 if the argument was invalid or an error occurred, 0 if all went well.
*/
extern int ntfs_macosx_normalize_filenames(int normalize);
/**
* Mac OS X only.
*
* Normalizes the input string "utf8_string" to one of the normalization forms NFD or NFC.
* The parameter "composed" decides whether output should be in composed, NFC, form
* (composed == 1) or decomposed, NFD, form (composed == 0).
* Input is assumed to be properly UTF-8 encoded and null-terminated. Output will be a newly
* ntfs_calloc'ed string encoded in UTF-8. It is the callers responsibility to free(...) the
* allocated string when it's no longer needed.
*
* @param utf8_string the input string, which may be in any normalization form.
* @param target a pointer where the resulting string will be stored.
* @param composed decides which composition form to normalize the input string to. 0 means
* composed form (NFC), 1 means decomposed form (NFD).
* @return -1 if the normalization failed for some reason, otherwise the length of the
* normalized string stored in target.
*/
extern int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target, int composed);
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
#endif /* defined _NTFS_UNISTR_H */

View File

@ -45,6 +45,12 @@
#include <locale.h>
#endif
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
#include <CoreFoundation/CoreFoundation.h>
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
#include "compat.h"
#include "attrib.h"
#include "types.h"
@ -65,6 +71,18 @@
static int use_utf8 = 1; /* use UTF-8 encoding for file names */
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
/**
* This variable controls whether or not automatic normalization form conversion
* should be performed when translating NTFS unicode file names to UTF-8.
* Defaults to on, but can be controlled from the outside using the function
* int ntfs_macosx_normalize_filenames(int normalize);
*/
static int nfconvert_utf8 = 1;
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
/*
* This is used by the name collation functions to quickly determine what
* characters are (in)valid.
@ -473,6 +491,13 @@ fail:
static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
char **outs, int outs_len)
{
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
char *original_outs_value = *outs;
int original_outs_len = outs_len;
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
char *t;
int i, size, ret = -1;
ntfschar halfpair;
@ -528,6 +553,36 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
}
}
*t = '\0';
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
if(nfconvert_utf8 && (t - *outs) > 0) {
char *new_outs = NULL;
int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
if(new_outs_len >= 0 && new_outs != NULL) {
if(original_outs_value != *outs) {
// We have allocated outs ourselves.
free(*outs);
*outs = new_outs;
t = *outs + new_outs_len;
}
else {
// We need to copy new_outs into the fixed outs buffer.
memset(*outs, 0, original_outs_len);
strncpy(*outs, new_outs, original_outs_len-1);
t = *outs + original_outs_len;
free(new_outs);
}
}
else {
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
ntfs_log_error(" new_outs=0x%p\n", new_outs);
ntfs_log_error(" new_outs_len=%d\n", new_outs_len);
}
}
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
ret = t - *outs;
out:
return ret;
@ -662,6 +717,19 @@ fail:
*/
static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
{
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
char *new_ins = NULL;
if(nfconvert_utf8) {
int new_ins_len;
new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
if(new_ins_len >= 0)
ins = new_ins;
else
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
}
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
const char *t = ins;
u32 wc;
ntfschar *outpos;
@ -697,6 +765,12 @@ static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
ret = --outpos - *outs;
fail:
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
if(new_ins != NULL)
free(new_ins);
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
return ret;
}
@ -1089,3 +1163,81 @@ int ntfs_set_char_encoding(const char *locale)
return 0; /* always successful */
}
#if defined(__APPLE__) || defined(__DARWIN__)
int ntfs_macosx_normalize_filenames(int normalize) {
#ifdef ENABLE_NFCONV
if(normalize == 0 || normalize == 1) {
nfconvert_utf8 = normalize;
return 0;
}
else
return -1;
#else
return -1;
#endif /* ENABLE_NFCONV */
}
int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
int composed) {
#ifdef ENABLE_NFCONV
/* For this code to compile, the CoreFoundation framework must be fed to the linker. */
CFStringRef cfSourceString;
CFMutableStringRef cfMutableString;
CFRange rangeToProcess;
CFIndex requiredBufferLength;
char *result = NULL;
int resultLength = -1;
/* Convert the UTF-8 string to a CFString. */
cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault, utf8_string, kCFStringEncodingUTF8);
if(cfSourceString == NULL) {
ntfs_log_error("CFStringCreateWithCString failed!\n");
return -2;
}
/* Create a mutable string from cfSourceString that we are free to modify. */
cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0, cfSourceString);
CFRelease(cfSourceString); /* End-of-life. */
if(cfMutableString == NULL) {
ntfs_log_error("CFStringCreateMutableCopy failed!\n");
return -3;
}
/* Normalize the mutable string to the desired normalization form. */
CFStringNormalize(cfMutableString, (composed != 0 ? kCFStringNormalizationFormC : kCFStringNormalizationFormD));
/* Store the resulting string in a '\0'-terminated UTF-8 encoded char* buffer. */
rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8, 0, false, NULL, 0, &requiredBufferLength) > 0) {
resultLength = sizeof(char)*(requiredBufferLength + 1);
result = ntfs_calloc(resultLength);
if(result != NULL) {
if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8,
0, false, (UInt8*)result, resultLength-1, &requiredBufferLength) <= 0) {
ntfs_log_error("Could not perform UTF-8 conversion of normalized CFMutableString.\n");
free(result);
result = NULL;
}
}
else
ntfs_log_error("Could not perform a ntfs_calloc of %d bytes for char *result.\n", resultLength);
}
else
ntfs_log_error("Could not perform check for required length of UTF-8 conversion of normalized CFMutableString.\n");
CFRelease(cfMutableString);
if(result != NULL) {
*target = result;
return resultLength - 1;
}
else
return -1;
#else
return -1;
#endif /* ENABLE_NFCONV */
}
#endif /* defined(__APPLE__) || defined(__DARWIN__) */

View File

@ -1933,6 +1933,24 @@ static char *parse_mount_options(const char *orig_opts)
if (missing_option_value(val, "locale"))
goto err_exit;
setlocale(LC_ALL, val);
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
} else if (!strcmp(opt, "nfconv")) {
if (bogus_option_value(val, "nfconv"))
goto err_exit;
if (ntfs_macosx_normalize_filenames(1)) {
ntfs_log_error("ntfs_macosx_normalize_filenames(1) failed!\n");
goto err_exit;
}
} else if (!strcmp(opt, "nonfconv")) {
if (bogus_option_value(val, "nonfconv"))
goto err_exit;
if (ntfs_macosx_normalize_filenames(0)) {
ntfs_log_error("ntfs_macosx_normalize_filenames(0) failed!\n");
goto err_exit;
}
#endif /* ENABLE_NFCONV */
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
} else if (!strcmp(opt, "streams_interface")) {
if (missing_option_value(val, "streams_interface"))
goto err_exit;