Skip to content

Commit ec26a52

Browse files
authored
Use lazy encoding in utf-8 encoded string comparison (#6706)
1 parent ad7618b commit ec26a52

File tree

4 files changed

+390
-43
lines changed

4 files changed

+390
-43
lines changed

firebase-firestore/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Unreleased
2+
* [fixed] Use lazy encoding in UTF-8 encoded byte comparison for strings to solve performance issues. [#6706](//github.com/firebase/firebase-android-sdk/pull/6706)
23
* [changed] Updated `protolite-well-known-types` dependency to `18.0.1`. [#6716]
34

45

firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java

Lines changed: 169 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,17 +1658,33 @@ public void sdkOrdersQueryByDocumentIdTheSameWayOnlineAndOffline() {
16581658
public void snapshotListenerSortsUnicodeStringsAsServer() {
16591659
Map<String, Map<String, Object>> testDocs =
16601660
map(
1661-
"a", map("value", "Łukasiewicz"),
1662-
"b", map("value", "Sierpiński"),
1663-
"c", map("value", "岩澤"),
1664-
"d", map("value", "🄟"),
1665-
"e", map("value", "P"),
1666-
"f", map("value", "︒"),
1667-
"g", map("value", "🐵"));
1661+
"a",
1662+
map("value", "Łukasiewicz"),
1663+
"b",
1664+
map("value", "Sierpiński"),
1665+
"c",
1666+
map("value", "岩澤"),
1667+
"d",
1668+
map("value", "🄟"),
1669+
"e",
1670+
map("value", "P"),
1671+
"f",
1672+
map("value", "︒"),
1673+
"g",
1674+
map("value", "🐵"),
1675+
"h",
1676+
map("value", "你好"),
1677+
"i",
1678+
map("value", "你顥"),
1679+
"j",
1680+
map("value", "😁"),
1681+
"k",
1682+
map("value", "😀"));
16681683

16691684
CollectionReference colRef = testCollectionWithDocs(testDocs);
16701685
Query orderedQuery = colRef.orderBy("value");
1671-
List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
1686+
List<String> expectedDocIds =
1687+
Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
16721688

16731689
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
16741690
List<String> getSnapshotDocIds =
@@ -1699,17 +1715,33 @@ public void snapshotListenerSortsUnicodeStringsAsServer() {
16991715
public void snapshotListenerSortsUnicodeStringsInArrayAsServer() {
17001716
Map<String, Map<String, Object>> testDocs =
17011717
map(
1702-
"a", map("value", Arrays.asList("Łukasiewicz")),
1703-
"b", map("value", Arrays.asList("Sierpiński")),
1704-
"c", map("value", Arrays.asList("岩澤")),
1705-
"d", map("value", Arrays.asList("🄟")),
1706-
"e", map("value", Arrays.asList("P")),
1707-
"f", map("value", Arrays.asList("︒")),
1708-
"g", map("value", Arrays.asList("🐵")));
1718+
"a",
1719+
map("value", Arrays.asList("Łukasiewicz")),
1720+
"b",
1721+
map("value", Arrays.asList("Sierpiński")),
1722+
"c",
1723+
map("value", Arrays.asList("岩澤")),
1724+
"d",
1725+
map("value", Arrays.asList("🄟")),
1726+
"e",
1727+
map("value", Arrays.asList("P")),
1728+
"f",
1729+
map("value", Arrays.asList("︒")),
1730+
"g",
1731+
map("value", Arrays.asList("🐵")),
1732+
"h",
1733+
map("value", Arrays.asList("你好")),
1734+
"i",
1735+
map("value", Arrays.asList("你顥")),
1736+
"j",
1737+
map("value", Arrays.asList("😁")),
1738+
"k",
1739+
map("value", Arrays.asList("😀")));
17091740

17101741
CollectionReference colRef = testCollectionWithDocs(testDocs);
17111742
Query orderedQuery = colRef.orderBy("value");
1712-
List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
1743+
List<String> expectedDocIds =
1744+
Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
17131745

17141746
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
17151747
List<String> getSnapshotDocIds =
@@ -1740,17 +1772,33 @@ public void snapshotListenerSortsUnicodeStringsInArrayAsServer() {
17401772
public void snapshotListenerSortsUnicodeStringsInMapAsServer() {
17411773
Map<String, Map<String, Object>> testDocs =
17421774
map(
1743-
"a", map("value", map("foo", "Łukasiewicz")),
1744-
"b", map("value", map("foo", "Sierpiński")),
1745-
"c", map("value", map("foo", "岩澤")),
1746-
"d", map("value", map("foo", "🄟")),
1747-
"e", map("value", map("foo", "P")),
1748-
"f", map("value", map("foo", "︒")),
1749-
"g", map("value", map("foo", "🐵")));
1775+
"a",
1776+
map("value", map("foo", "Łukasiewicz")),
1777+
"b",
1778+
map("value", map("foo", "Sierpiński")),
1779+
"c",
1780+
map("value", map("foo", "岩澤")),
1781+
"d",
1782+
map("value", map("foo", "🄟")),
1783+
"e",
1784+
map("value", map("foo", "P")),
1785+
"f",
1786+
map("value", map("foo", "︒")),
1787+
"g",
1788+
map("value", map("foo", "🐵")),
1789+
"h",
1790+
map("value", map("foo", "你好")),
1791+
"i",
1792+
map("value", map("foo", "你顥")),
1793+
"j",
1794+
map("value", map("foo", "😁")),
1795+
"k",
1796+
map("value", map("foo", "😀")));
17501797

17511798
CollectionReference colRef = testCollectionWithDocs(testDocs);
17521799
Query orderedQuery = colRef.orderBy("value");
1753-
List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
1800+
List<String> expectedDocIds =
1801+
Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
17541802

17551803
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
17561804
List<String> getSnapshotDocIds =
@@ -1781,17 +1829,33 @@ public void snapshotListenerSortsUnicodeStringsInMapAsServer() {
17811829
public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() {
17821830
Map<String, Map<String, Object>> testDocs =
17831831
map(
1784-
"a", map("value", map("Łukasiewicz", "foo")),
1785-
"b", map("value", map("Sierpiński", "foo")),
1786-
"c", map("value", map("岩澤", "foo")),
1787-
"d", map("value", map("🄟", "foo")),
1788-
"e", map("value", map("P", "foo")),
1789-
"f", map("value", map("︒", "foo")),
1790-
"g", map("value", map("🐵", "foo")));
1832+
"a",
1833+
map("value", map("Łukasiewicz", "foo")),
1834+
"b",
1835+
map("value", map("Sierpiński", "foo")),
1836+
"c",
1837+
map("value", map("岩澤", "foo")),
1838+
"d",
1839+
map("value", map("🄟", "foo")),
1840+
"e",
1841+
map("value", map("P", "foo")),
1842+
"f",
1843+
map("value", map("︒", "foo")),
1844+
"g",
1845+
map("value", map("🐵", "foo")),
1846+
"h",
1847+
map("value", map("你好", "foo")),
1848+
"i",
1849+
map("value", map("你顥", "foo")),
1850+
"j",
1851+
map("value", map("😁", "foo")),
1852+
"k",
1853+
map("value", map("😀", "foo")));
17911854

17921855
CollectionReference colRef = testCollectionWithDocs(testDocs);
17931856
Query orderedQuery = colRef.orderBy("value");
1794-
List<String> expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g");
1857+
List<String> expectedDocIds =
1858+
Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j");
17951859

17961860
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
17971861
List<String> getSnapshotDocIds =
@@ -1822,18 +1886,83 @@ public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() {
18221886
public void snapshotListenerSortsUnicodeStringsInDocumentKeyAsServer() {
18231887
Map<String, Map<String, Object>> testDocs =
18241888
map(
1825-
"Łukasiewicz", map("value", "foo"),
1826-
"Sierpiński", map("value", "foo"),
1827-
"岩澤", map("value", "foo"),
1828-
"🄟", map("value", "foo"),
1829-
"P", map("value", "foo"),
1830-
"︒", map("value", "foo"),
1831-
"🐵", map("value", "foo"));
1889+
"Łukasiewicz",
1890+
map("value", "foo"),
1891+
"Sierpiński",
1892+
map("value", "foo"),
1893+
"岩澤",
1894+
map("value", "foo"),
1895+
"🄟",
1896+
map("value", "foo"),
1897+
"P",
1898+
map("value", "foo"),
1899+
"︒",
1900+
map("value", "foo"),
1901+
"🐵",
1902+
map("value", "foo"),
1903+
"你好",
1904+
map("value", "foo"),
1905+
"你顥",
1906+
map("value", "foo"),
1907+
"😁",
1908+
map("value", "foo"),
1909+
"😀",
1910+
map("value", "foo"));
18321911

18331912
CollectionReference colRef = testCollectionWithDocs(testDocs);
18341913
Query orderedQuery = colRef.orderBy(FieldPath.documentId());
18351914
List<String> expectedDocIds =
1836-
Arrays.asList("Sierpiński", "Łukasiewicz", "岩澤", "︒", "P", "🄟", "🐵");
1915+
Arrays.asList(
1916+
"Sierpiński", "Łukasiewicz", "你好", "你顥", "岩澤", "︒", "P", "🄟", "🐵", "😀", "😁");
1917+
1918+
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
1919+
List<String> getSnapshotDocIds =
1920+
getSnapshot.getDocuments().stream().map(ds -> ds.getId()).collect(Collectors.toList());
1921+
1922+
EventAccumulator<QuerySnapshot> eventAccumulator = new EventAccumulator<QuerySnapshot>();
1923+
ListenerRegistration registration =
1924+
orderedQuery.addSnapshotListener(eventAccumulator.listener());
1925+
1926+
List<String> watchSnapshotDocIds = new ArrayList<>();
1927+
try {
1928+
QuerySnapshot watchSnapshot = eventAccumulator.await();
1929+
watchSnapshotDocIds =
1930+
watchSnapshot.getDocuments().stream()
1931+
.map(documentSnapshot -> documentSnapshot.getId())
1932+
.collect(Collectors.toList());
1933+
} finally {
1934+
registration.remove();
1935+
}
1936+
1937+
assertTrue(getSnapshotDocIds.equals(expectedDocIds));
1938+
assertTrue(watchSnapshotDocIds.equals(expectedDocIds));
1939+
1940+
checkOnlineAndOfflineResultsMatch(orderedQuery, expectedDocIds.toArray(new String[0]));
1941+
}
1942+
1943+
@Test
1944+
public void snapshotListenerSortsInvalidUnicodeStringsAsServer() {
1945+
// Note: Protocol Buffer converts any invalid surrogates to "?".
1946+
Map<String, Map<String, Object>> testDocs =
1947+
map(
1948+
"a",
1949+
map("value", "Z"),
1950+
"b",
1951+
map("value", "你好"),
1952+
"c",
1953+
map("value", "😀"),
1954+
"d",
1955+
map("value", "ab\uD800"), // Lone high surrogate
1956+
"e",
1957+
map("value", "ab\uDC00"), // Lone low surrogate
1958+
"f",
1959+
map("value", "ab\uD800\uD800"), // Unpaired high surrogate
1960+
"g",
1961+
map("value", "ab\uDC00\uDC00")); // Unpaired low surrogate
1962+
1963+
CollectionReference colRef = testCollectionWithDocs(testDocs);
1964+
Query orderedQuery = colRef.orderBy("value");
1965+
List<String> expectedDocIds = Arrays.asList("a", "d", "e", "f", "g", "b", "c");
18371966

18381967
QuerySnapshot getSnapshot = waitFor(orderedQuery.get());
18391968
List<String> getSnapshotDocIds =

firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,44 @@ public static int compareIntegers(int i1, int i2) {
8787

8888
/** Compare strings in UTF-8 encoded byte order */
8989
public static int compareUtf8Strings(String left, String right) {
90-
ByteString leftBytes = ByteString.copyFromUtf8(left);
91-
ByteString rightBytes = ByteString.copyFromUtf8(right);
92-
return compareByteStrings(leftBytes, rightBytes);
90+
int i = 0;
91+
while (i < left.length() && i < right.length()) {
92+
int leftCodePoint = left.codePointAt(i);
93+
int rightCodePoint = right.codePointAt(i);
94+
95+
if (leftCodePoint != rightCodePoint) {
96+
if (leftCodePoint < 128 && rightCodePoint < 128) {
97+
// ASCII comparison
98+
return Integer.compare(leftCodePoint, rightCodePoint);
99+
} else {
100+
// substring and do UTF-8 encoded byte comparison
101+
ByteString leftBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(left, i));
102+
ByteString rightBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(right, i));
103+
int comp = compareByteStrings(leftBytes, rightBytes);
104+
if (comp != 0) {
105+
return comp;
106+
} else {
107+
// EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte representations are
108+
// identical. This can happen with malformed input (invalid surrogate pairs), where
109+
// Java's encoding leads to unexpected byte sequences. Meanwhile, any invalid surrogate
110+
// inputs get converted to "?" by protocol buffer while round tripping, so we almost
111+
// never receive invalid strings from backend.
112+
// Fallback to code point comparison for graceful handling.
113+
return Integer.compare(leftCodePoint, rightCodePoint);
114+
}
115+
}
116+
}
117+
// Increment by 2 for surrogate pairs, 1 otherwise.
118+
i += Character.charCount(leftCodePoint);
119+
}
120+
121+
// Compare lengths if all characters are equal
122+
return Integer.compare(left.length(), right.length());
123+
}
124+
125+
private static String getUtf8SafeBytes(String str, int index) {
126+
int firstCodePoint = str.codePointAt(index);
127+
return str.substring(index, index + Character.charCount(firstCodePoint));
93128
}
94129

95130
/**

0 commit comments

Comments
 (0)