firebase · milaGGL · Mar 3, 2025 · Feb 18, 2025 · Feb 19, 2025 · Feb 19, 2025
diff --git a/firebase-firestore/CHANGELOG.md b/firebase-firestore/CHANGELOG.md
@@ -1,4 +1,5 @@
 # Unreleased
+* [fixed] Use lazy encoding in UTF-8 encoded byte comparison for strings to solve performance issues. [#6706](//github.com/firebase/firebase-android-sdk/pull/6706)
 * [changed] Updated `protolite-well-known-types` dependency to `18.0.1`. [#6716]
 
 

diff --git a/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java b/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java
@@ -1658,17 +1658,33 @@ public void sdkOrdersQueryByDocumentIdTheSameWayOnlineAndOffline() {
   public void snapshotListenerSortsUnicodeStringsAsServer() {
     Map<String, Map<String, Object>> testDocs =
         map(
-            "a", map("value", "Łukasiewicz"),
-            "b", map("value", "Sierpiński"),
-            "c", map("value", "岩澤"),
-            "d", map("value", "🄟"),
-            "e", map("value", "Ｐ"),
-            "f", map("value", "︒"),
-            "g", map("value", "🐵"));
+            "a",
+            map("value", "Łukasiewicz"),
+            "b",
+            map("value", "Sierpiński"),
+            "c",
+            map("value", "岩澤"),
+            "d",
+            map("value", "🄟"),
+            "e",
+            map("value", "Ｐ"),
+            "f",
+            map("value", "︒"),
+            "g",
+            map("value", "🐵"),
+            "h",
+            map("value", "你好"),
+            "i",
+            map("value", "你顥"),
+            "j",
+            map("value", "😁"),
+            "k",
+            map("value", "😀"));
 
     CollectionReference colRef = testCollectionWithDocs(testDocs);
     Query orderedQuery = colRef.orderBy("value");
-    List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
+    List<String> expectedDocIds =
+        Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
 
     QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
     List<String> getSnapshotDocIds =
@@ -1699,17 +1715,33 @@ public void snapshotListenerSortsUnicodeStringsAsServer() {
   public void snapshotListenerSortsUnicodeStringsInArrayAsServer() {
     Map<String, Map<String, Object>> testDocs =
         map(
-            "a", map("value", Arrays.asList("Łukasiewicz")),
-            "b", map("value", Arrays.asList("Sierpiński")),
-            "c", map("value", Arrays.asList("岩澤")),
-            "d", map("value", Arrays.asList("🄟")),
-            "e", map("value", Arrays.asList("Ｐ")),
-            "f", map("value", Arrays.asList("︒")),
-            "g", map("value", Arrays.asList("🐵")));
+            "a",
+            map("value", Arrays.asList("Łukasiewicz")),
+            "b",
+            map("value", Arrays.asList("Sierpiński")),
+            "c",
+            map("value", Arrays.asList("岩澤")),
+            "d",
+            map("value", Arrays.asList("🄟")),
+            "e",
+            map("value", Arrays.asList("Ｐ")),
+            "f",
+            map("value", Arrays.asList("︒")),
+            "g",
+            map("value", Arrays.asList("🐵")),
+            "h",
+            map("value", Arrays.asList("你好")),
+            "i",
+            map("value", Arrays.asList("你顥")),
+            "j",
+            map("value", Arrays.asList("😁")),
+            "k",
+            map("value", Arrays.asList("😀")));
 
     CollectionReference colRef = testCollectionWithDocs(testDocs);
     Query orderedQuery = colRef.orderBy("value");
-    List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
+    List<String> expectedDocIds =
+        Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
 
     QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
     List<String> getSnapshotDocIds =
@@ -1740,17 +1772,33 @@ public void snapshotListenerSortsUnicodeStringsInArrayAsServer() {
   public void snapshotListenerSortsUnicodeStringsInMapAsServer() {
     Map<String, Map<String, Object>> testDocs =
         map(
-            "a", map("value", map("foo", "Łukasiewicz")),
-            "b", map("value", map("foo", "Sierpiński")),
-            "c", map("value", map("foo", "岩澤")),
-            "d", map("value", map("foo", "🄟")),
-            "e", map("value", map("foo", "Ｐ")),
-            "f", map("value", map("foo", "︒")),
-            "g", map("value", map("foo", "🐵")));
+            "a",
+            map("value", map("foo", "Łukasiewicz")),
+            "b",
+            map("value", map("foo", "Sierpiński")),
+            "c",
+            map("value", map("foo", "岩澤")),
+            "d",
+            map("value", map("foo", "🄟")),
+            "e",
+            map("value", map("foo", "Ｐ")),
+            "f",
+            map("value", map("foo", "︒")),
+            "g",
+            map("value", map("foo", "🐵")),
+            "h",
+            map("value", map("foo", "你好")),
+            "i",
+            map("value", map("foo", "你顥")),
+            "j",
+            map("value", map("foo", "😁")),
+            "k",
+            map("value", map("foo", "😀")));
 
     CollectionReference colRef = testCollectionWithDocs(testDocs);
     Query orderedQuery = colRef.orderBy("value");
-    List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
+    List<String> expectedDocIds =
+        Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
 
     QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
     List<String> getSnapshotDocIds =
@@ -1781,17 +1829,33 @@ public void snapshotListenerSortsUnicodeStringsInMapAsServer() {
   public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() {
     Map<String, Map<String, Object>> testDocs =
         map(
-            "a", map("value", map("Łukasiewicz", "foo")),
-            "b", map("value", map("Sierpiński", "foo")),
-            "c", map("value", map("岩澤", "foo")),
-            "d", map("value", map("🄟", "foo")),
-            "e", map("value", map("Ｐ", "foo")),
-            "f", map("value", map("︒", "foo")),
-            "g", map("value", map("🐵", "foo")));
+            "a",
+            map("value", map("Łukasiewicz", "foo")),
+            "b",
+            map("value", map("Sierpiński", "foo")),
+            "c",
+            map("value", map("岩澤", "foo")),
+            "d",
+            map("value", map("🄟", "foo")),
+            "e",
+            map("value", map("Ｐ", "foo")),
+            "f",
+            map("value", map("︒", "foo")),
+            "g",
+            map("value", map("🐵", "foo")),
+            "h",
+            map("value", map("你好", "foo")),
+            "i",
+            map("value", map("你顥", "foo")),
+            "j",
+            map("value", map("😁", "foo")),
+            "k",
+            map("value", map("😀", "foo")));
 
     CollectionReference colRef = testCollectionWithDocs(testDocs);
     Query orderedQuery = colRef.orderBy("value");
-    List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
+    List<String> expectedDocIds =
+        Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
 
     QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
     List<String> getSnapshotDocIds =
@@ -1822,18 +1886,83 @@ public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() {
   public void snapshotListenerSortsUnicodeStringsInDocumentKeyAsServer() {
     Map<String, Map<String, Object>> testDocs =
         map(
-            "Łukasiewicz", map("value", "foo"),
-            "Sierpiński", map("value", "foo"),
-            "岩澤", map("value", "foo"),
-            "🄟", map("value", "foo"),
-            "Ｐ", map("value", "foo"),
-            "︒", map("value", "foo"),
-            "🐵", map("value", "foo"));
+            "Łukasiewicz",
+            map("value", "foo"),
+            "Sierpiński",
+            map("value", "foo"),
+            "岩澤",
+            map("value", "foo"),
+            "🄟",
+            map("value", "foo"),
+            "Ｐ",
+            map("value", "foo"),
+            "︒",
+            map("value", "foo"),
+            "🐵",
+            map("value", "foo"),
+            "你好",
+            map("value", "foo"),
+            "你顥",
+            map("value", "foo"),
+            "😁",
+            map("value", "foo"),
+            "😀",
+            map("value", "foo"));
 
     CollectionReference colRef = testCollectionWithDocs(testDocs);
     Query orderedQuery = colRef.orderBy(FieldPath.documentId());
     List<String> expectedDocIds =
-        Arrays.asList("Sierpiński", "Łukasiewicz", "岩澤", "︒", "Ｐ", "🄟", "🐵");
+        Arrays.asList(
+            "Sierpiński", "Łukasiewicz", "你好", "你顥", "岩澤", "︒", "Ｐ", "🄟", "🐵", "😀", "😁");
+
+    QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
+    List<String> getSnapshotDocIds =
+        getSnapshot.getDocuments().stream().map(ds -> ds.getId()).collect(Collectors.toList());
+
+    EventAccumulator<QuerySnapshot> eventAccumulator = new EventAccumulator<QuerySnapshot>();
+    ListenerRegistration registration =
+        orderedQuery.addSnapshotListener(eventAccumulator.listener());
+
+    List<String> watchSnapshotDocIds = new ArrayList<>();
+    try {
+      QuerySnapshot watchSnapshot = eventAccumulator.await();
+      watchSnapshotDocIds =
+          watchSnapshot.getDocuments().stream()
+              .map(documentSnapshot -> documentSnapshot.getId())
+              .collect(Collectors.toList());
+    } finally {
+      registration.remove();
+    }
+
+    assertTrue(getSnapshotDocIds.equals(expectedDocIds));
+    assertTrue(watchSnapshotDocIds.equals(expectedDocIds));
+
+    checkOnlineAndOfflineResultsMatch(orderedQuery, expectedDocIds.toArray(new String[0]));
+  }
+
+  @Test
+  public void snapshotListenerSortsInvalidUnicodeStringsAsServer() {
+    // Note: Protocol Buffer converts any invalid surrogates to "?".
+    Map<String, Map<String, Object>> testDocs =
+        map(
+            "a",
+            map("value", "Z"),
+            "b",
+            map("value", "你好"),
+            "c",
+            map("value", "😀"),
+            "d",
+            map("value", "ab\uD800"), // Lone high surrogate
+            "e",
+            map("value", "ab\uDC00"), // Lone low surrogate
+            "f",
+            map("value", "ab\uD800\uD800"), // Unpaired high surrogate
+            "g",
+            map("value", "ab\uDC00\uDC00")); // Unpaired low surrogate
+
+    CollectionReference colRef = testCollectionWithDocs(testDocs);
+    Query orderedQuery = colRef.orderBy("value");
+    List<String> expectedDocIds = Arrays.asList("a", "d", "e", "f", "g", "b", "c");
 
     QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
     List<String> getSnapshotDocIds =

diff --git a/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java b/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java
@@ -87,9 +87,44 @@ public static int compareIntegers(int i1, int i2) {
 
   /** Compare strings in UTF-8 encoded byte order */
   public static int compareUtf8Strings(String left, String right) {
-    ByteString leftBytes = ByteString.copyFromUtf8(left);
-    ByteString rightBytes = ByteString.copyFromUtf8(right);
-    return compareByteStrings(leftBytes, rightBytes);
+    int i = 0;
+    while (i < left.length() && i < right.length()) {
+      int leftCodePoint = left.codePointAt(i);
+      int rightCodePoint = right.codePointAt(i);
+
+      if (leftCodePoint != rightCodePoint) {
+        if (leftCodePoint < 128 && rightCodePoint < 128) {
+          // ASCII comparison
+          return Integer.compare(leftCodePoint, rightCodePoint);
+        } else {
+          // substring and do UTF-8 encoded byte comparison
+          ByteString leftBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(left, i));
+          ByteString rightBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(right, i));
+          int comp = compareByteStrings(leftBytes, rightBytes);
+          if (comp != 0) {
+            return comp;
+          } else {
+            // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte representations are
+            // identical. This can happen with malformed input (invalid surrogate pairs), where
+            // Java's encoding leads to unexpected byte sequences. Meanwhile, any invalid surrogate
+            // inputs get converted to "?" by protocol buffer while round tripping, so we almost
+            // never receive invalid strings from backend.
+            // Fallback to code point comparison for graceful handling.
+            return Integer.compare(leftCodePoint, rightCodePoint);
+          }
+        }
+      }
+      // Increment by 2 for surrogate pairs, 1 otherwise.
+      i += Character.charCount(leftCodePoint);
+    }
+
+    // Compare lengths if all characters are equal
+    return Integer.compare(left.length(), right.length());
+  }
+
+  private static String getUtf8SafeBytes(String str, int index) {
+    int firstCodePoint = str.codePointAt(index);
+    return str.substring(index, index + Character.charCount(firstCodePoint));
   }
 
   /**