Mac OS X Unicode normalization form conversion (Erik Larsson)
parent
cfd23388b9
commit
955b1f3e18
32
configure.ac
32
configure.ac
|
@ -113,6 +113,18 @@ AC_ARG_ENABLE(
|
|||
[enable_device_default_io_ops="yes"]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(
|
||||
[nfconv],
|
||||
[AS_HELP_STRING([--disable-nfconv],[disable the 'nfconv' patch, which adds support for Unicode normalization form conversion when built on Mac OS X @<:@default=enabled for Mac OS X@:>@])],
|
||||
[enable_nfconv="no"],
|
||||
[
|
||||
case "${target_os}" in
|
||||
darwin*) enable_nfconv="yes" ;;
|
||||
*) enable_nfconv="no" ;;
|
||||
esac
|
||||
]
|
||||
)
|
||||
|
||||
# pthread_rwlock_t requires _GNU_SOURCE
|
||||
AC_GNU_SOURCE
|
||||
|
||||
|
@ -263,6 +275,26 @@ AC_CHECK_MEMBERS([struct stat.st_rdev])
|
|||
AC_CHECK_MEMBERS([struct stat.st_atim])
|
||||
AC_CHECK_MEMBERS([struct stat.st_atimespec])
|
||||
|
||||
# For the 'nfconv' patch (Mac OS X only):
|
||||
case "${target_os}" in
|
||||
darwin*)
|
||||
if test "${enable_nfconv}" = "yes"; then
|
||||
AC_CHECK_HEADER(
|
||||
[CoreFoundation/CoreFoundation.h],
|
||||
[
|
||||
LDFLAGS="${LDFLAGS} -framework CoreFoundation"
|
||||
AC_DEFINE(
|
||||
[ENABLE_NFCONV],
|
||||
[1],
|
||||
[Define to 1 if the nfconv patch should be enabled]
|
||||
)
|
||||
],
|
||||
AC_MSG_ERROR([[Cannot find CoreFoundation required for 'nfconv' functionality Mac OS X. You may use the --disable-nfconv 'configure' option to avoid this error.]])
|
||||
)
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# Checks for library functions.
|
||||
AC_FUNC_GETMNTENT
|
||||
AC_FUNC_MBRTOWC
|
||||
|
|
|
@ -65,5 +65,45 @@ extern ntfschar *ntfs_str2ucs(const char *s, int *len);
|
|||
|
||||
extern void ntfs_ucsfree(ntfschar *ucs);
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
/**
|
||||
* Mac OS X only.
|
||||
*
|
||||
* Sets file name Unicode normalization form conversion on or off.
|
||||
* normalize=0 : Off
|
||||
* normalize=1 : On
|
||||
* If set to on, all filenames returned by ntfs-3g will be converted to the NFD
|
||||
* normalization form, while all filenames recieved by ntfs-3g will be converted to the NFC
|
||||
* normalization form. Since Windows and most other OS:es use the NFC form while Mac OS X
|
||||
* mostly uses NFD, this conversion increases compatibility between Mac applications and
|
||||
* NTFS-3G.
|
||||
*
|
||||
* @param normalize decides whether or not the string functions will do automatic filename
|
||||
* normalization when converting to and from UTF-8. 0 means normalization is disabled,
|
||||
* 1 means it is enabled.
|
||||
* @return -1 if the argument was invalid or an error occurred, 0 if all went well.
|
||||
*/
|
||||
extern int ntfs_macosx_normalize_filenames(int normalize);
|
||||
|
||||
/**
|
||||
* Mac OS X only.
|
||||
*
|
||||
* Normalizes the input string "utf8_string" to one of the normalization forms NFD or NFC.
|
||||
* The parameter "composed" decides whether output should be in composed, NFC, form
|
||||
* (composed == 1) or decomposed, NFD, form (composed == 0).
|
||||
* Input is assumed to be properly UTF-8 encoded and null-terminated. Output will be a newly
|
||||
* ntfs_calloc'ed string encoded in UTF-8. It is the callers responsibility to free(...) the
|
||||
* allocated string when it's no longer needed.
|
||||
*
|
||||
* @param utf8_string the input string, which may be in any normalization form.
|
||||
* @param target a pointer where the resulting string will be stored.
|
||||
* @param composed decides which composition form to normalize the input string to. 0 means
|
||||
* composed form (NFC), 1 means decomposed form (NFD).
|
||||
* @return -1 if the normalization failed for some reason, otherwise the length of the
|
||||
* normalized string stored in target.
|
||||
*/
|
||||
extern int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target, int composed);
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
||||
#endif /* defined _NTFS_UNISTR_H */
|
||||
|
||||
|
|
|
@ -45,6 +45,12 @@
|
|||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
#include <CoreFoundation/CoreFoundation.h>
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
||||
#include "compat.h"
|
||||
#include "attrib.h"
|
||||
#include "types.h"
|
||||
|
@ -65,6 +71,18 @@
|
|||
|
||||
static int use_utf8 = 1; /* use UTF-8 encoding for file names */
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
/**
|
||||
* This variable controls whether or not automatic normalization form conversion
|
||||
* should be performed when translating NTFS unicode file names to UTF-8.
|
||||
* Defaults to on, but can be controlled from the outside using the function
|
||||
* int ntfs_macosx_normalize_filenames(int normalize);
|
||||
*/
|
||||
static int nfconvert_utf8 = 1;
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
||||
/*
|
||||
* This is used by the name collation functions to quickly determine what
|
||||
* characters are (in)valid.
|
||||
|
@ -473,6 +491,13 @@ fail:
|
|||
static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
||||
char **outs, int outs_len)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
char *original_outs_value = *outs;
|
||||
int original_outs_len = outs_len;
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
||||
char *t;
|
||||
int i, size, ret = -1;
|
||||
ntfschar halfpair;
|
||||
|
@ -528,6 +553,36 @@ static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
|
|||
}
|
||||
}
|
||||
*t = '\0';
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
if(nfconvert_utf8 && (t - *outs) > 0) {
|
||||
char *new_outs = NULL;
|
||||
int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
|
||||
if(new_outs_len >= 0 && new_outs != NULL) {
|
||||
if(original_outs_value != *outs) {
|
||||
// We have allocated outs ourselves.
|
||||
free(*outs);
|
||||
*outs = new_outs;
|
||||
t = *outs + new_outs_len;
|
||||
}
|
||||
else {
|
||||
// We need to copy new_outs into the fixed outs buffer.
|
||||
memset(*outs, 0, original_outs_len);
|
||||
strncpy(*outs, new_outs, original_outs_len-1);
|
||||
t = *outs + original_outs_len;
|
||||
free(new_outs);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
|
||||
ntfs_log_error(" new_outs=0x%p\n", new_outs);
|
||||
ntfs_log_error(" new_outs_len=%d\n", new_outs_len);
|
||||
}
|
||||
}
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
||||
ret = t - *outs;
|
||||
out:
|
||||
return ret;
|
||||
|
@ -662,6 +717,19 @@ fail:
|
|||
*/
|
||||
static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
char *new_ins = NULL;
|
||||
if(nfconvert_utf8) {
|
||||
int new_ins_len;
|
||||
new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
|
||||
if(new_ins_len >= 0)
|
||||
ins = new_ins;
|
||||
else
|
||||
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
|
||||
}
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
const char *t = ins;
|
||||
u32 wc;
|
||||
ntfschar *outpos;
|
||||
|
@ -697,6 +765,12 @@ static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
|
|||
|
||||
ret = --outpos - *outs;
|
||||
fail:
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
if(new_ins != NULL)
|
||||
free(new_ins);
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1089,3 +1163,81 @@ int ntfs_set_char_encoding(const char *locale)
|
|||
return 0; /* always successful */
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
|
||||
int ntfs_macosx_normalize_filenames(int normalize) {
|
||||
#ifdef ENABLE_NFCONV
|
||||
if(normalize == 0 || normalize == 1) {
|
||||
nfconvert_utf8 = normalize;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#else
|
||||
return -1;
|
||||
#endif /* ENABLE_NFCONV */
|
||||
}
|
||||
|
||||
int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
|
||||
int composed) {
|
||||
#ifdef ENABLE_NFCONV
|
||||
/* For this code to compile, the CoreFoundation framework must be fed to the linker. */
|
||||
CFStringRef cfSourceString;
|
||||
CFMutableStringRef cfMutableString;
|
||||
CFRange rangeToProcess;
|
||||
CFIndex requiredBufferLength;
|
||||
char *result = NULL;
|
||||
int resultLength = -1;
|
||||
|
||||
/* Convert the UTF-8 string to a CFString. */
|
||||
cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault, utf8_string, kCFStringEncodingUTF8);
|
||||
if(cfSourceString == NULL) {
|
||||
ntfs_log_error("CFStringCreateWithCString failed!\n");
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* Create a mutable string from cfSourceString that we are free to modify. */
|
||||
cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0, cfSourceString);
|
||||
CFRelease(cfSourceString); /* End-of-life. */
|
||||
if(cfMutableString == NULL) {
|
||||
ntfs_log_error("CFStringCreateMutableCopy failed!\n");
|
||||
return -3;
|
||||
}
|
||||
|
||||
/* Normalize the mutable string to the desired normalization form. */
|
||||
CFStringNormalize(cfMutableString, (composed != 0 ? kCFStringNormalizationFormC : kCFStringNormalizationFormD));
|
||||
|
||||
/* Store the resulting string in a '\0'-terminated UTF-8 encoded char* buffer. */
|
||||
rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
|
||||
if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8, 0, false, NULL, 0, &requiredBufferLength) > 0) {
|
||||
resultLength = sizeof(char)*(requiredBufferLength + 1);
|
||||
result = ntfs_calloc(resultLength);
|
||||
|
||||
if(result != NULL) {
|
||||
if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8,
|
||||
0, false, (UInt8*)result, resultLength-1, &requiredBufferLength) <= 0) {
|
||||
ntfs_log_error("Could not perform UTF-8 conversion of normalized CFMutableString.\n");
|
||||
free(result);
|
||||
result = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
ntfs_log_error("Could not perform a ntfs_calloc of %d bytes for char *result.\n", resultLength);
|
||||
}
|
||||
else
|
||||
ntfs_log_error("Could not perform check for required length of UTF-8 conversion of normalized CFMutableString.\n");
|
||||
|
||||
|
||||
CFRelease(cfMutableString);
|
||||
|
||||
if(result != NULL) {
|
||||
*target = result;
|
||||
return resultLength - 1;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#else
|
||||
return -1;
|
||||
#endif /* ENABLE_NFCONV */
|
||||
}
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
|
|
|
@ -1933,6 +1933,24 @@ static char *parse_mount_options(const char *orig_opts)
|
|||
if (missing_option_value(val, "locale"))
|
||||
goto err_exit;
|
||||
setlocale(LC_ALL, val);
|
||||
#if defined(__APPLE__) || defined(__DARWIN__)
|
||||
#ifdef ENABLE_NFCONV
|
||||
} else if (!strcmp(opt, "nfconv")) {
|
||||
if (bogus_option_value(val, "nfconv"))
|
||||
goto err_exit;
|
||||
if (ntfs_macosx_normalize_filenames(1)) {
|
||||
ntfs_log_error("ntfs_macosx_normalize_filenames(1) failed!\n");
|
||||
goto err_exit;
|
||||
}
|
||||
} else if (!strcmp(opt, "nonfconv")) {
|
||||
if (bogus_option_value(val, "nonfconv"))
|
||||
goto err_exit;
|
||||
if (ntfs_macosx_normalize_filenames(0)) {
|
||||
ntfs_log_error("ntfs_macosx_normalize_filenames(0) failed!\n");
|
||||
goto err_exit;
|
||||
}
|
||||
#endif /* ENABLE_NFCONV */
|
||||
#endif /* defined(__APPLE__) || defined(__DARWIN__) */
|
||||
} else if (!strcmp(opt, "streams_interface")) {
|
||||
if (missing_option_value(val, "streams_interface"))
|
||||
goto err_exit;
|
||||
|
|
Loading…
Reference in New Issue