28
28
class StringExpressionsFunctionalTest extends AbstractExpressionsFunctionalTest {
29
29
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/#string-expression-operators
30
30
31
- private final String sushi = "\u5BFF \u53F8 " ;
32
31
private final String jalapeno = "jalape\u00F1 o" ;
32
+ private final String sushi = "\u5BFF \u53F8 " ;
33
+ private final String fish = "\uD83D \uDC1F " ;
33
34
34
35
@ Test
35
36
public void literalsTest () {
36
37
assertExpression ("" , of ("" ), "''" );
37
38
assertExpression ("abc" , of ("abc" ), "'abc'" );
38
39
assertThrows (IllegalArgumentException .class , () -> of ((String ) null ));
39
- assertExpression (sushi , of (sushi ), "'" + sushi + "'" );
40
+ assertExpression (fish , of (fish ), "'" + fish + "'" );
40
41
}
41
42
42
43
@ Test
@@ -69,21 +70,24 @@ public void toUpperTest() {
69
70
@ Test
70
71
public void strLenTest () {
71
72
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/strLenCP/ (?)
72
- // TODO naming: just strLen, lengthCP, lengthCodePoints, stringLengthInCodePoints?...
73
73
assertExpression (
74
- "abc" .length ( ),
74
+ "abc" .codePointCount ( 0 , 3 ),
75
75
of ("abc" ).strLen (),
76
76
"{'$strLenCP': 'abc'}" );
77
77
78
78
// unicode
79
79
assertExpression (
80
- jalapeno .length (),
80
+ jalapeno .codePointCount ( 0 , jalapeno . length () ),
81
81
of (jalapeno ).strLen (),
82
82
"{'$strLenCP': '" + jalapeno + "'}" );
83
83
assertExpression (
84
- sushi .length (),
84
+ sushi .codePointCount ( 0 , sushi . length () ),
85
85
of (sushi ).strLen (),
86
86
"{'$strLenCP': '" + sushi + "'}" );
87
+ assertExpression (
88
+ fish .codePointCount (0 , fish .length ()),
89
+ of (fish ).strLen (),
90
+ "{'$strLenCP': '" + fish + "'}" );
87
91
}
88
92
89
93
@ Test
@@ -103,35 +107,40 @@ public void strLenBytesTest() {
103
107
sushi .getBytes (StandardCharsets .UTF_8 ).length ,
104
108
of (sushi ).strLenBytes (),
105
109
"{'$strLenBytes': '" + sushi + "'}" );
110
+ assertExpression (
111
+ fish .getBytes (StandardCharsets .UTF_8 ).length ,
112
+ of (fish ).strLenBytes (),
113
+ "{'$strLenBytes': '" + fish + "'}" );
106
114
107
115
// comparison
108
- assertExpression (2 , of (sushi ).strLen ());
109
- assertExpression (6 , of (sushi ).strLenBytes ());
110
116
assertExpression (8 , of (jalapeno ).strLen ());
111
117
assertExpression (9 , of (jalapeno ).strLenBytes ());
118
+ assertExpression (2 , of (sushi ).strLen ());
119
+ assertExpression (6 , of (sushi ).strLenBytes ());
120
+ assertExpression (1 , of (fish ).strLen ());
121
+ assertExpression (4 , of (fish ).strLenBytes ());
112
122
}
113
123
114
124
@ Test
115
125
public void substrTest () {
116
126
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substr/
117
- // https://www.mongodb.com/docs/manual/reference/operator/aggregation/substrBytes/ (?)
118
127
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substrCP/ (?)
119
128
// substr is deprecated, an alias for bytes
120
- // TODO here, it is an alias for code-points
121
129
assertExpression (
122
130
"abc" .substring (1 , 1 + 1 ),
123
- of ("abc" ).substr (1 , 1 ),
131
+ of ("abc" ).substr (of ( 1 ), of ( 1 ) ),
124
132
"{'$substrCP': ['abc', 1, 1]}" );
125
133
126
134
// unicode
127
135
assertExpression (
128
136
jalapeno .substring (5 , 5 + 3 ),
129
- of (jalapeno ).substr (5 , 3 ),
137
+ of (jalapeno ).substr (of ( 5 ), of ( 3 ) ),
130
138
"{'$substrCP': ['" + jalapeno + "', 5, 3]}" );
131
139
assertExpression (
132
140
"e\u00F1 o" ,
133
- of (jalapeno ).substr (5 , 3 ));
141
+ of (jalapeno ).substr (of ( 5 ), of ( 3 ) ));
134
142
143
+ // bounds; convenience
135
144
assertExpression ("abc" , of ("abc" ).substr (0 , 99 ));
136
145
assertExpression ("ab" , of ("abc" ).substr (0 , 2 ));
137
146
assertExpression ("b" , of ("abc" ).substr (1 , 1 ));
@@ -143,14 +152,17 @@ public void substrBytesTest() {
143
152
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substrBytes/ (?)
144
153
assertExpression (
145
154
"b" ,
146
- of ("abc" ).substrBytes (1 , 1 ),
155
+ of ("abc" ).substrBytes (of ( 1 ), of ( 1 ) ),
147
156
"{'$substrBytes': ['abc', 1, 1]}" );
148
157
149
158
// unicode
150
- byte [] bytes = Arrays .copyOfRange (sushi .getBytes (), 0 , 3 );
159
+ byte [] bytes = Arrays .copyOfRange (sushi .getBytes (StandardCharsets . UTF_8 ), 0 , 3 );
151
160
String expected = new String (bytes , StandardCharsets .UTF_8 );
152
161
assertExpression (expected ,
153
- of (sushi ).substrBytes (0 , 3 ));
154
- // "starting index is a UTF-8 continuation byte" if from 1 length 1
162
+ of (sushi ).substrBytes (of (0 ), of (3 )));
163
+ // server returns "starting index is a UTF-8 continuation byte" error when substrBytes(1, 1)
164
+
165
+ // convenience
166
+ assertExpression ("b" , of ("abc" ).substrBytes (1 , 1 ));
155
167
}
156
168
}
0 commit comments