From 195945cdc07e4a53ece3934e93e843b338d10778 Mon Sep 17 00:00:00 2001 From: jpandre Date: Wed, 16 Dec 2009 09:45:28 +0000 Subject: [PATCH] Evaluated file names collations in a single parsing --- include/ntfs-3g/unistr.h | 4 +- libntfs-3g/attrib.c | 88 ++++++++++-------------------- libntfs-3g/collate.c | 16 +++--- libntfs-3g/dir.c | 39 +++----------- libntfs-3g/reparse.c | 4 +- libntfs-3g/unistr.c | 114 +++++++++++++++++++-------------------- 6 files changed, 106 insertions(+), 159 deletions(-) diff --git a/include/ntfs-3g/unistr.h b/include/ntfs-3g/unistr.h index 115a8650..5f0b467b 100644 --- a/include/ntfs-3g/unistr.h +++ b/include/ntfs-3g/unistr.h @@ -30,9 +30,9 @@ extern BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len, const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_size); -extern int ntfs_names_collate(const ntfschar *name1, const u32 name1_len, +extern int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, const ntfschar *name2, const u32 name2_len, - const int err_val, const IGNORE_CASE_BOOL ic, + const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_len); extern int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n); diff --git a/libntfs-3g/attrib.c b/libntfs-3g/attrib.c index 9742e208..7b20c08b 100644 --- a/libntfs-3g/attrib.c +++ b/libntfs-3g/attrib.c @@ -2184,38 +2184,24 @@ static int ntfs_attr_find(const ATTR_TYPES type, const ntfschar *name, errno = ENOENT; return -1; } - } else if (name && !ntfs_names_are_equal(name, name_len, - (ntfschar*)((char*)a + le16_to_cpu(a->name_offset)), - a->name_length, ic, upcase, upcase_len)) { + } else { register int rc; - - rc = ntfs_names_collate(name, name_len, - (ntfschar*)((char*)a + - le16_to_cpu(a->name_offset)), - a->name_length, 1, IGNORE_CASE, - upcase, upcase_len); - /* - * If @name collates before a->name, there is no - * matching attribute. - */ - if (rc == -1) { - errno = ENOENT; - return -1; - } + if (name && ((rc = ntfs_names_full_collate(name, + name_len, (ntfschar*)((char*)a + + le16_to_cpu(a->name_offset)), + a->name_length, ic, + upcase, upcase_len)))) { + /* + * If @name collates before a->name, + * there is no matching attribute. + */ + if (rc < 0) { + errno = ENOENT; + return -1; + } /* If the strings are not equal, continue search. */ - if (rc) - continue; - rc = ntfs_names_collate(name, name_len, - (ntfschar*)((char*)a + - le16_to_cpu(a->name_offset)), - a->name_length, 1, CASE_SENSITIVE, - upcase, upcase_len); - if (rc == -1) { - errno = ENOENT; - return -1; + continue; } - if (rc) - continue; } /* * The names match or @name not present and attribute is @@ -2495,38 +2481,22 @@ find_attr_list_attr: if (name == AT_UNNAMED) { if (al_name_len) goto not_found; - } else if (name && !ntfs_names_are_equal(al_name, al_name_len, - name, name_len, ic, vol->upcase, - vol->upcase_len)) { - register int rc; + } else { + int rc; - rc = ntfs_names_collate(name, name_len, al_name, - al_name_len, 1, IGNORE_CASE, - vol->upcase, vol->upcase_len); - /* - * If @name collates before al_name, there is no - * matching attribute. - */ - if (rc == -1) - goto not_found; - /* If the strings are not equal, continue search. */ - if (rc) - continue; - /* - * FIXME: Reverse engineering showed 0, IGNORE_CASE but - * that is inconsistent with ntfs_attr_find(). The - * subsequent rc checks were also different. Perhaps I - * made a mistake in one of the two. Need to recheck - * which is correct or at least see what is going - * on... (AIA) - */ - rc = ntfs_names_collate(name, name_len, al_name, - al_name_len, 1, CASE_SENSITIVE, - vol->upcase, vol->upcase_len); - if (rc == -1) - goto not_found; - if (rc) + if (name && ((rc = ntfs_names_full_collate(name, + name_len, al_name, al_name_len, ic, + vol->upcase, vol->upcase_len)))) { + + /* + * If @name collates before al_name, + * there is no matching attribute. + */ + if (rc < 0) + goto not_found; + /* If the strings are not equal, continue search. */ continue; + } } /* * The names match or @name not present and attribute is diff --git a/libntfs-3g/collate.c b/libntfs-3g/collate.c index 856a07b8..95c21417 100644 --- a/libntfs-3g/collate.c +++ b/libntfs-3g/collate.c @@ -228,15 +228,19 @@ static int ntfs_collate_file_name(ntfs_volume *vol, const void *data1, const int data1_len __attribute__((unused)), const void *data2, const int data2_len __attribute__((unused))) { + const FILE_NAME_ATTR *file_name_attr1; + const FILE_NAME_ATTR *file_name_attr2; int rc; ntfs_log_trace("Entering.\n"); - rc = ntfs_file_values_compare(data1, data2, NTFS_COLLATION_ERROR, - IGNORE_CASE, vol->upcase, vol->upcase_len); - if (!rc) - rc = ntfs_file_values_compare(data1, data2, - NTFS_COLLATION_ERROR, CASE_SENSITIVE, - vol->upcase, vol->upcase_len); + file_name_attr1 = (const FILE_NAME_ATTR*)data1; + file_name_attr2 = (const FILE_NAME_ATTR*)data2; + rc = ntfs_names_full_collate( + (ntfschar*)&file_name_attr1->file_name, + file_name_attr1->file_name_length, + (ntfschar*)&file_name_attr2->file_name, + file_name_attr2->file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); ntfs_log_trace("Done, returning %i.\n", rc); return rc; } diff --git a/libntfs-3g/dir.c b/libntfs-3g/dir.c index 47ced720..dde14fce 100644 --- a/libntfs-3g/dir.c +++ b/libntfs-3g/dir.c @@ -226,10 +226,10 @@ u64 ntfs_inode_lookup_by_name(ntfs_inode *dir_ni, const ntfschar *uname, * Not a perfect match, need to do full blown collation so we * know which way in the B+tree we have to go. */ - rc = ntfs_names_collate(uname, uname_len, + rc = ntfs_names_full_collate(uname, uname_len, (ntfschar*)&ie->key.file_name.file_name, - ie->key.file_name.file_name_length, 1, - IGNORE_CASE, vol->upcase, vol->upcase_len); + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); /* * If uname collates before the name of the current entry, there * is definitely no such name in this index but we might need to @@ -238,19 +238,6 @@ u64 ntfs_inode_lookup_by_name(ntfs_inode *dir_ni, const ntfschar *uname, if (rc == -1) break; /* The names are not equal, continue the search. */ - if (rc) - continue; - /* - * Names match with case insensitive comparison, now try the - * case sensitive comparison, which is required for proper - * collation. - */ - rc = ntfs_names_collate(uname, uname_len, - (ntfschar*)&ie->key.file_name.file_name, - ie->key.file_name.file_name_length, 1, - CASE_SENSITIVE, vol->upcase, vol->upcase_len); - if (rc == -1) - break; if (rc) continue; /* @@ -381,10 +368,10 @@ descend_into_child_node: * Not a perfect match, need to do full blown collation so we * know which way in the B+tree we have to go. */ - rc = ntfs_names_collate(uname, uname_len, + rc = ntfs_names_full_collate(uname, uname_len, (ntfschar*)&ie->key.file_name.file_name, - ie->key.file_name.file_name_length, 1, - IGNORE_CASE, vol->upcase, vol->upcase_len); + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); /* * If uname collates before the name of the current entry, there * is definitely no such name in this index but we might need to @@ -395,20 +382,6 @@ descend_into_child_node: /* The names are not equal, continue the search. */ if (rc) continue; - /* - * Names match with case insensitive comparison, now try the - * case sensitive comparison, which is required for proper - * collation. - */ - rc = ntfs_names_collate(uname, uname_len, - (ntfschar*)&ie->key.file_name.file_name, - ie->key.file_name.file_name_length, 1, - CASE_SENSITIVE, vol->upcase, vol->upcase_len); - if (rc == -1) - break; - if (rc) - continue; - mref = le64_to_cpu(ie->indexed_file); free(ia); ntfs_attr_close(ia_na); diff --git a/libntfs-3g/reparse.c b/libntfs-3g/reparse.c index 81b4cfce..7364021d 100644 --- a/libntfs-3g/reparse.c +++ b/libntfs-3g/reparse.c @@ -188,10 +188,10 @@ static u64 ntfs_fix_file_name(ntfs_inode *dir_ni, ntfschar *uname, if (entry) { found = &entry->key.file_name; if (lkup - && !ntfs_names_collate(find.attr.file_name, + && ntfs_names_are_equal(find.attr.file_name, find.attr.file_name_length, found->file_name, found->file_name_length, - 1, IGNORE_CASE, + IGNORE_CASE, vol->upcase, vol->upcase_len)) lkup = 0; if (!lkup) { diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c index 16f25bb0..e65df992 100644 --- a/libntfs-3g/unistr.c +++ b/libntfs-3g/unistr.c @@ -132,37 +132,30 @@ BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len, TRUE; } -/** - * ntfs_names_collate - collate two Unicode names +/* + * ntfs_names_full_collate() fully collate two Unicode names + * * @name1: first Unicode name to compare * @name1_len: length of first Unicode name to compare * @name2: second Unicode name to compare * @name2_len: length of second Unicode name to compare - * @err_val: if @name1 contains an invalid character return this value * @ic: either CASE_SENSITIVE or IGNORE_CASE * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) * - * ntfs_names_collate() collates two Unicode names and returns: - * * -1 if the first name collates before the second one, * 0 if the names match, * 1 if the second name collates before the first one, or - * @err_val if an invalid character is found in @name1 during the comparison. * - * The following characters are considered invalid: '"', '*', '<', '>' and '?'. - * - * A few optimizations made by JPA */ - -int ntfs_names_collate(const ntfschar *name1, const u32 name1_len, +int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, const ntfschar *name2, const u32 name2_len, - const int err_val __attribute__((unused)), const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_len) { u32 cnt; u16 c1, c2; + u16 u1, u2; #ifdef DEBUG if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) { @@ -171,38 +164,70 @@ int ntfs_names_collate(const ntfschar *name1, const u32 name1_len, } #endif cnt = min(name1_len, name2_len); - /* JPA average loop count is 8 */ if (cnt > 0) { - if (ic) - /* JPA this loop in 76% cases */ + if (ic == CASE_SENSITIVE) { do { c1 = le16_to_cpu(*name1); name1++; c2 = le16_to_cpu(*name2); name2++; - if (c1 < upcase_len) - c1 = le16_to_cpu(upcase[c1]); - if (c2 < upcase_len) - c2 = le16_to_cpu(upcase[c2]); - } while ((c1 == c2) && --cnt); - else + } while (--cnt && (c1 == c2)); + u1 = c1; + u2 = c2; + if (u1 < upcase_len) + u1 = le16_to_cpu(upcase[u1]); + if (u2 < upcase_len) + u2 = le16_to_cpu(upcase[u2]); + if ((u1 == u2) && cnt) + do { + u1 = le16_to_cpu(*name1); + name1++; + u2 = le16_to_cpu(*name2); + name2++; + if (u1 < upcase_len) + u1 = le16_to_cpu(upcase[u1]); + if (u2 < upcase_len) + u2 = le16_to_cpu(upcase[u2]); + } while ((u1 == u2) && --cnt); + if (u1 < u2) + return -1; + if (u1 > u2) + return 1; + if (name1_len < name2_len) + return -1; + if (name1_len > name2_len) + return 1; + if (c1 < c2) + return -1; + if (c1 > c2) + return 1; + } else { do { - /* JPA this loop in 24% cases */ - c1 = le16_to_cpu(*name1); + u1 = c1 = le16_to_cpu(*name1); name1++; - c2 = le16_to_cpu(*name2); + u2 = c2 = le16_to_cpu(*name2); name2++; - } while ((c1 == c2) && --cnt); - if (c1 < c2) + if (u1 < upcase_len) + u1 = le16_to_cpu(upcase[u1]); + if (u2 < upcase_len) + u2 = le16_to_cpu(upcase[u2]); + } while ((u1 == u2) && --cnt); + if (u1 < u2) + return -1; + if (u1 > u2) + return 1; + if (name1_len < name2_len) + return -1; + if (name1_len > name2_len) + return 1; + } + } else { + if (name1_len < name2_len) return -1; - if (c1 > c2) + if (name1_len > name2_len) return 1; } - if (name1_len < name2_len) - return -1; - if (name1_len == name2_len) - return 0; - return 1; + return 0; } /** @@ -381,31 +406,6 @@ void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr, file_name_attr->file_name_length, upcase, upcase_len); } -/** - * ntfs_file_values_compare - Which of two filenames should be listed first - * @file_name_attr1: - * @file_name_attr2: - * @err_val: - * @ic: - * @upcase: - * @upcase_len: - * - * Description... - * - * Returns: - */ -int ntfs_file_values_compare(const FILE_NAME_ATTR *file_name_attr1, - const FILE_NAME_ATTR *file_name_attr2, - const int err_val, const IGNORE_CASE_BOOL ic, - const ntfschar *upcase, const u32 upcase_len) -{ - return ntfs_names_collate((ntfschar*)&file_name_attr1->file_name, - file_name_attr1->file_name_length, - (ntfschar*)&file_name_attr2->file_name, - file_name_attr2->file_name_length, - err_val, ic, upcase, upcase_len); -} - /* NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough for now]) for path names, but the Unicode code points need to be