Evaluated file names collations in a single parsing

2009-12-16 09:45:28 +00:00 · 2009-12-16 09:45:28 +00:00 · 195945cdc0
parent 2db43fc118
commit 195945cdc0
6 changed files with 106 additions and 159 deletions
--- a/include/ntfs-3g/unistr.h
+++ b/include/ntfs-3g/unistr.h
@ -30,9 +30,9 @@ extern BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
 		const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic,
 		const ntfschar *upcase, const u32 upcase_size);

-extern int ntfs_names_collate(const ntfschar *name1, const u32 name1_len,
+extern int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
 		const ntfschar *name2, const u32 name2_len,
-		const int err_val, const IGNORE_CASE_BOOL ic,
+		const IGNORE_CASE_BOOL ic,
 		const ntfschar *upcase, const u32 upcase_len);

 extern int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n);
--- a/libntfs-3g/attrib.c
+++ b/libntfs-3g/attrib.c
@ -2184,38 +2184,24 @@ static int ntfs_attr_find(const ATTR_TYPES type, const ntfschar *name,
 				errno = ENOENT;
 				return -1;
 			}
-		} else if (name && !ntfs_names_are_equal(name, name_len,
-			    (ntfschar*)((char*)a + le16_to_cpu(a->name_offset)),
-			    a->name_length, ic, upcase, upcase_len)) {
+		} else {
 			register int rc;
-
-			rc = ntfs_names_collate(name, name_len,
-					(ntfschar*)((char*)a +
-					le16_to_cpu(a->name_offset)),
-					a->name_length, 1, IGNORE_CASE,
-					upcase, upcase_len);
-			/*
-			 * If @name collates before a->name, there is no
-			 * matching attribute.
-			 */
-			if (rc == -1) {
-				errno = ENOENT;
-				return -1;
-			}
+			if (name && ((rc = ntfs_names_full_collate(name,
+					name_len, (ntfschar*)((char*)a +
+						le16_to_cpu(a->name_offset)),
+					a->name_length, ic,
+					upcase, upcase_len)))) {
+				/*
+				 * If @name collates before a->name,
+				 * there is no matching attribute.
+				 */
+				if (rc < 0) {
+					errno = ENOENT;
+					return -1;
+				}
 			/* If the strings are not equal, continue search. */
-			if (rc)
-				continue;
-			rc = ntfs_names_collate(name, name_len,
-					(ntfschar*)((char*)a +
-					le16_to_cpu(a->name_offset)),
-					a->name_length, 1, CASE_SENSITIVE,
-					upcase, upcase_len);
-			if (rc == -1) {
-				errno = ENOENT;
-				return -1;
+			continue;
 			}
-			if (rc)
-				continue;
 		}
 		/*
 		 * The names match or @name not present and attribute is
@ -2495,38 +2481,22 @@ find_attr_list_attr:
 		if (name == AT_UNNAMED) {
 			if (al_name_len)
 				goto not_found;
-		} else if (name && !ntfs_names_are_equal(al_name, al_name_len,
-				name, name_len, ic, vol->upcase,
-				vol->upcase_len)) {
-			register int rc;
+		} else {
+			int rc;

-			rc = ntfs_names_collate(name, name_len, al_name,
-					al_name_len, 1, IGNORE_CASE,
-					vol->upcase, vol->upcase_len);
-			/*
-			 * If @name collates before al_name, there is no
-			 * matching attribute.
-			 */
-			if (rc == -1)
-				goto not_found;
-			/* If the strings are not equal, continue search. */
-			if (rc)
-				continue;
-			/*
-			 * FIXME: Reverse engineering showed 0, IGNORE_CASE but
-			 * that is inconsistent with ntfs_attr_find(). The
-			 * subsequent rc checks were also different. Perhaps I
-			 * made a mistake in one of the two. Need to recheck
-			 * which is correct or at least see what is going
-			 * on... (AIA)
-			 */
-			rc = ntfs_names_collate(name, name_len, al_name,
-					al_name_len, 1, CASE_SENSITIVE,
-					vol->upcase, vol->upcase_len);
-			if (rc == -1)
-				goto not_found;
-			if (rc)
+			if (name && ((rc = ntfs_names_full_collate(name,
+					name_len, al_name, al_name_len, ic,
+					vol->upcase, vol->upcase_len)))) {
+
+				/*
+				 * If @name collates before al_name,
+				 * there is no matching attribute.
+				 */
+				if (rc < 0)
+					goto not_found;
+				/* If the strings are not equal, continue search. */
 				continue;
+			}
 		}
 		/*
 		 * The names match or @name not present and attribute is
--- a/libntfs-3g/collate.c
+++ b/libntfs-3g/collate.c
@ -228,15 +228,19 @@ static int ntfs_collate_file_name(ntfs_volume *vol,
 		const void *data1, const int data1_len __attribute__((unused)),
 		const void *data2, const int data2_len __attribute__((unused)))
 {
+	const FILE_NAME_ATTR *file_name_attr1;
+	const FILE_NAME_ATTR *file_name_attr2;
 	int rc;

 	ntfs_log_trace("Entering.\n");
-	rc = ntfs_file_values_compare(data1, data2, NTFS_COLLATION_ERROR,
-			IGNORE_CASE, vol->upcase, vol->upcase_len);
-	if (!rc)
-		rc = ntfs_file_values_compare(data1, data2,
-				NTFS_COLLATION_ERROR, CASE_SENSITIVE,
-				vol->upcase, vol->upcase_len);
+	file_name_attr1 = (const FILE_NAME_ATTR*)data1;
+	file_name_attr2 = (const FILE_NAME_ATTR*)data2;
+	rc = ntfs_names_full_collate(
+			(ntfschar*)&file_name_attr1->file_name,
+			file_name_attr1->file_name_length,
+			(ntfschar*)&file_name_attr2->file_name,
+			file_name_attr2->file_name_length,
+			CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 	ntfs_log_trace("Done, returning %i.\n", rc);
 	return rc;
 }
--- a/libntfs-3g/dir.c
+++ b/libntfs-3g/dir.c
@ -226,10 +226,10 @@ u64 ntfs_inode_lookup_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * Not a perfect match, need to do full blown collation so we
 		 * know which way in the B+tree we have to go.
 		 */
-		rc = ntfs_names_collate(uname, uname_len,
+		rc = ntfs_names_full_collate(uname, uname_len,
 				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				IGNORE_CASE, vol->upcase, vol->upcase_len);
+				ie->key.file_name.file_name_length,
+				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 		/*
 		 * If uname collates before the name of the current entry, there
 		 * is definitely no such name in this index but we might need to
@ -238,19 +238,6 @@ u64 ntfs_inode_lookup_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		if (rc == -1)
 			break;
 		/* The names are not equal, continue the search. */
-		if (rc)
-			continue;
-		/*
-		 * Names match with case insensitive comparison, now try the
-		 * case sensitive comparison, which is required for proper
-		 * collation.
-		 */
-		rc = ntfs_names_collate(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
-		if (rc == -1)
-			break;
 		if (rc)
 			continue;
 		/*
@ -381,10 +368,10 @@ descend_into_child_node:
 		 * Not a perfect match, need to do full blown collation so we
 		 * know which way in the B+tree we have to go.
 		 */
-		rc = ntfs_names_collate(uname, uname_len,
+		rc = ntfs_names_full_collate(uname, uname_len,
 				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				IGNORE_CASE, vol->upcase, vol->upcase_len);
+				ie->key.file_name.file_name_length,
+				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 		/*
 		 * If uname collates before the name of the current entry, there
 		 * is definitely no such name in this index but we might need to
@ -395,20 +382,6 @@ descend_into_child_node:
 		/* The names are not equal, continue the search. */
 		if (rc)
 			continue;
-		/*
-		 * Names match with case insensitive comparison, now try the
-		 * case sensitive comparison, which is required for proper
-		 * collation.
-		 */
-		rc = ntfs_names_collate(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
-		if (rc == -1)
-			break;
-		if (rc)
-			continue;
-		
 		mref = le64_to_cpu(ie->indexed_file);
 		free(ia);
 		ntfs_attr_close(ia_na);
--- a/libntfs-3g/reparse.c
+++ b/libntfs-3g/reparse.c
@ -188,10 +188,10 @@ static u64 ntfs_fix_file_name(ntfs_inode *dir_ni, ntfschar *uname,
 		if (entry) {
 			found = &entry->key.file_name;
 			if (lkup
-			   && !ntfs_names_collate(find.attr.file_name,
+			   && ntfs_names_are_equal(find.attr.file_name,
 				find.attr.file_name_length,
 				found->file_name, found->file_name_length,
-				1, IGNORE_CASE,
+				IGNORE_CASE,
 				vol->upcase, vol->upcase_len))
 					lkup = 0;
 			if (!lkup) {
--- a/libntfs-3g/unistr.c
+++ b/libntfs-3g/unistr.c
@ -132,37 +132,30 @@ BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
 								       TRUE;
 }

-/**
- * ntfs_names_collate - collate two Unicode names
+/*
+ * ntfs_names_full_collate() fully collate two Unicode names
+ *
 * @name1:	first Unicode name to compare
 * @name1_len:	length of first Unicode name to compare
 * @name2:	second Unicode name to compare
 * @name2_len:	length of second Unicode name to compare
- * @err_val:	if @name1 contains an invalid character return this value
 * @ic:		either CASE_SENSITIVE or IGNORE_CASE
 * @upcase:	upcase table (ignored if @ic is CASE_SENSITIVE)
 * @upcase_len:	upcase table size (ignored if @ic is CASE_SENSITIVE)
 *
- * ntfs_names_collate() collates two Unicode names and returns:
- *
 *  -1 if the first name collates before the second one,
 *   0 if the names match,
 *   1 if the second name collates before the first one, or
- * @err_val if an invalid character is found in @name1 during the comparison.
 *
- * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
- *
- * A few optimizations made by JPA
 */
-
-int ntfs_names_collate(const ntfschar *name1, const u32 name1_len,
+int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
 		const ntfschar *name2, const u32 name2_len,
-		const int err_val __attribute__((unused)),
 		const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
 		const u32 upcase_len)
 {
 	u32 cnt;
 	u16 c1, c2;
+	u16 u1, u2;

 #ifdef DEBUG
 	if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) {
@ -171,38 +164,70 @@ int ntfs_names_collate(const ntfschar *name1, const u32 name1_len,
 	}
 #endif
 	cnt = min(name1_len, name2_len);
-		/* JPA average loop count is 8 */
 	if (cnt > 0) {
-		if (ic)
-				/* JPA this loop in 76% cases */
+		if (ic == CASE_SENSITIVE) {
 			do {
 				c1 = le16_to_cpu(*name1);
 				name1++;
 				c2 = le16_to_cpu(*name2);
 				name2++;
-				if (c1 < upcase_len)
-					c1 = le16_to_cpu(upcase[c1]);
-				if (c2 < upcase_len)
-					c2 = le16_to_cpu(upcase[c2]);
-			} while ((c1 == c2) && --cnt);
-		else
+			} while (--cnt && (c1 == c2));
+			u1 = c1;
+			u2 = c2;
+			if (u1 < upcase_len)
+				u1 = le16_to_cpu(upcase[u1]);
+			if (u2 < upcase_len)
+				u2 = le16_to_cpu(upcase[u2]);
+			if ((u1 == u2) && cnt)
+				do {
+					u1 = le16_to_cpu(*name1);
+					name1++;
+					u2 = le16_to_cpu(*name2);
+					name2++;
+					if (u1 < upcase_len)
+						u1 = le16_to_cpu(upcase[u1]);
+					if (u2 < upcase_len)
+						u2 = le16_to_cpu(upcase[u2]);
+				} while ((u1 == u2) && --cnt);
+			if (u1 < u2)
+				return -1;
+			if (u1 > u2)
+				return 1;
+			if (name1_len < name2_len)
+				return -1;
+			if (name1_len > name2_len)
+				return 1;
+			if (c1 < c2)
+				return -1;
+			if (c1 > c2)
+				return 1;
+		} else {
 			do {
-				/* JPA this loop in 24% cases */
-				c1 = le16_to_cpu(*name1);
+				u1 = c1 = le16_to_cpu(*name1);
 				name1++;
-				c2 = le16_to_cpu(*name2);
+				u2 = c2 = le16_to_cpu(*name2);
 				name2++;
-			} while ((c1 == c2) && --cnt);
-		if (c1 < c2)
+				if (u1 < upcase_len)
+					u1 = le16_to_cpu(upcase[u1]);
+				if (u2 < upcase_len)
+					u2 = le16_to_cpu(upcase[u2]);
+			} while ((u1 == u2) && --cnt);
+			if (u1 < u2)
+				return -1;
+			if (u1 > u2)
+				return 1;
+			if (name1_len < name2_len)
+				return -1;
+			if (name1_len > name2_len)
+				return 1;
+		}
+	} else {
+		if (name1_len < name2_len)
 			return -1;
-		if (c1 > c2)
+		if (name1_len > name2_len)
 			return 1;
 	}
-	if (name1_len < name2_len)
-		return -1;
-	if (name1_len == name2_len)
-		return 0;
-	return 1;
+	return 0;
 }

 /**
@ -381,31 +406,6 @@ void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
 			file_name_attr->file_name_length, upcase, upcase_len);
 }

-/**
- * ntfs_file_values_compare - Which of two filenames should be listed first
- * @file_name_attr1:
- * @file_name_attr2:
- * @err_val:
- * @ic:
- * @upcase:
- * @upcase_len:
- *
- * Description...
- *
- * Returns:
- */
-int ntfs_file_values_compare(const FILE_NAME_ATTR *file_name_attr1,
-		const FILE_NAME_ATTR *file_name_attr2,
-		const int err_val, const IGNORE_CASE_BOOL ic,
-		const ntfschar *upcase, const u32 upcase_len)
-{
-	return ntfs_names_collate((ntfschar*)&file_name_attr1->file_name,
-			file_name_attr1->file_name_length,
-			(ntfschar*)&file_name_attr2->file_name,
-			file_name_attr2->file_name_length,
-			err_val, ic, upcase, upcase_len);
-}
-
 /*
   NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
   for now]) for path names, but the Unicode code points need to be