Skip to content

Implement string expressions #1036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -84,6 +83,7 @@ public static DateExpression of(final Instant of) {
* @return the string expression
*/
public static StringExpression of(final String of) {
Assertions.notNull("String", of);
return new MqlExpression<>((codecRegistry) -> new BsonString(of));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,41 @@ public T reduce(final T initialValue, final BinaryOperator<T> in) {
.append("in", extractBsonValue(cr, in.apply(varThis, varValue)))).apply(cr));
}


/** @see StringExpression */

@Override
public StringExpression toLower() {
return new MqlExpression<>(ast("$toLower"));
}

@Override
public StringExpression toUpper() {
return new MqlExpression<>(ast("$toUpper"));
}

@Override
public StringExpression concat(final StringExpression concat) {
return new MqlExpression<>(ast("$concat", concat));
}

@Override
public IntegerExpression strLen() {
return new MqlExpression<>(ast("$strLenCP"));
}

@Override
public IntegerExpression strLenBytes() {
return new MqlExpression<>(ast("$strLenBytes"));
}

@Override
public StringExpression substr(final IntegerExpression start, final IntegerExpression length) {
return new MqlExpression<>(ast("$substrCP", start, length));
}

@Override
public StringExpression substrBytes(final IntegerExpression start, final IntegerExpression length) {
return new MqlExpression<>(ast("$substrBytes", start, length));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,32 @@

package com.mongodb.client.model.expressions;

import static com.mongodb.client.model.expressions.Expressions.of;

/**
* Expresses a string value.
*/
public interface StringExpression extends Expression {

StringExpression toLower();

StringExpression toUpper();

StringExpression concat(StringExpression concat);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appends the string to the current string. Should we have a prepend method? If so, should this method be renamed?

Copy link
Member

@stIncMale stIncMale Nov 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a question is posed this way, it forces others to think about it from scratch, then describe pros/cons, potential usages, and suggest a decision. I think that all of this work should be done by the one who asks the question so that others could analyze the proposed options and express their opinion.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is an example where prepend (first line) is much easier to naturally write, compared to the equivalent append:

[a, b, c, d].map(v -> v.toString().trim().first(3).prepend(">"))
[a, b, c, d].map(v -> of(">").append(v.toString().trim().first(3)))

We discussed this and are leaning towards using prepend/append here, though the reductive operation would be concat (see #1032 (comment) ). Additional input welcome.


IntegerExpression strLen();

IntegerExpression strLenBytes();

StringExpression substr(IntegerExpression start, IntegerExpression length);

default StringExpression substr(int start, int length) {
return this.substr(of(start), of(length));
}

StringExpression substrBytes(IntegerExpression start, IntegerExpression length);

default StringExpression substrBytes(int start, int length) {
return this.substrBytes(of(start), of(length));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import com.mongodb.client.model.Field;
import com.mongodb.client.model.OperationTest;
import com.mongodb.lang.Nullable;
import org.bson.BsonArray;
import org.bson.BsonDocument;
import org.bson.BsonReader;
Expand Down Expand Up @@ -53,8 +54,16 @@ public void tearDown() {
getCollectionHelper().drop();
}

protected void assertExpression(final Object expectedResult, final Expression expression, final String expectedMql) {
assertEval(expectedResult, expression);
protected void assertExpression(final Object expected, final Expression expression) {
assertExpression(expected, expression, null);
}

protected void assertExpression(@Nullable final Object expected, final Expression expression, @Nullable final String expectedMql) {
assertEval(expected, expression);

if (expectedMql == null) {
return;
}

BsonValue expressionValue = ((MqlExpression<?>) expression).toBsonValue(fromProviders(new BsonValueCodecProvider()));
BsonValue bsonValue = new BsonDocumentFragmentCodec().readValue(
Expand All @@ -63,9 +72,17 @@ protected void assertExpression(final Object expectedResult, final Expression ex
assertEquals(bsonValue, expressionValue, expressionValue.toString().replace("\"", "'"));
}

private void assertEval(final Object expected, final Expression toEvaluate) {
private void assertEval(@Nullable final Object expected, final Expression toEvaluate) {
BsonValue evaluated = evaluate(toEvaluate);
assertEquals(new Document("val", expected).toBsonDocument().get("val"), evaluated);
BsonValue expected1 = toBsonValue(expected);
assertEquals(expected1, evaluated);
}

protected BsonValue toBsonValue(@Nullable final Object value) {
if (value instanceof BsonValue) {
return (BsonValue) value;
}
return new Document("val", value).toBsonDocument().get("val");
}

protected BsonValue evaluate(final Expression toEvaluate) {
Expand All @@ -87,8 +104,7 @@ protected BsonValue evaluate(final Expression toEvaluate) {
} else {
results = getCollectionHelper().aggregate(stages);
}
BsonValue evaluated = results.get(0).get("val");
return evaluated;
return results.get(0).get("val");
}

private static class BsonDocumentFragmentCodec extends BsonDocumentCodec {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import java.time.Instant;
import java.util.Arrays;
import java.util.Date;
import java.util.List;

import static com.mongodb.client.model.expressions.Expressions.of;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.mongodb.client.model.expressions;

import org.junit.jupiter.api.Test;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;

import static com.mongodb.client.model.expressions.Expressions.of;
import static org.junit.jupiter.api.Assertions.assertThrows;

@SuppressWarnings({"ConstantConditions"})
class StringExpressionsFunctionalTest extends AbstractExpressionsFunctionalTest {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/#string-expression-operators

private final String jalapeno = "jalape\u00F1o";
private final String sushi = "\u5BFF\u53F8";
private final String fish = "\uD83D\uDC1F";

@Test
public void literalsTest() {
assertExpression("", of(""), "''");
assertExpression("abc", of("abc"), "'abc'");
assertThrows(IllegalArgumentException.class, () -> of((String) null));
assertExpression(fish, of(fish), "'" + fish + "'");
}

@Test
public void concatTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/concat/
assertExpression(
"abc".concat("de"),
of("abc").concat(of("de")),
"{'$concat': ['abc', 'de']}");
}

@Test
public void toLowerTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/toLower/
assertExpression(
"ABC".toLowerCase(),
of("ABC").toLower(),
"{'$toLower': 'ABC'}");
}

@Test
public void toUpperTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/toUpper/ (?)
assertExpression(
"abc".toUpperCase(),
of("abc").toUpper(),
"{'$toUpper': 'abc'}");
}

@Test
public void strLenTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/strLenCP/ (?)
assertExpression(
"abc".codePointCount(0, 3),
of("abc").strLen(),
"{'$strLenCP': 'abc'}");

// unicode
assertExpression(
jalapeno.codePointCount(0, jalapeno.length()),
of(jalapeno).strLen(),
"{'$strLenCP': '" + jalapeno + "'}");
assertExpression(
sushi.codePointCount(0, sushi.length()),
of(sushi).strLen(),
"{'$strLenCP': '" + sushi + "'}");
assertExpression(
fish.codePointCount(0, fish.length()),
of(fish).strLen(),
"{'$strLenCP': '" + fish + "'}");
}

@Test
public void strLenBytesTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/strLenBytes/ (?)
assertExpression(
"abc".getBytes(StandardCharsets.UTF_8).length,
of("abc").strLenBytes(),
"{'$strLenBytes': 'abc'}");

// unicode
assertExpression(
jalapeno.getBytes(StandardCharsets.UTF_8).length,
of(jalapeno).strLenBytes(),
"{'$strLenBytes': '" + jalapeno + "'}");
assertExpression(
sushi.getBytes(StandardCharsets.UTF_8).length,
of(sushi).strLenBytes(),
"{'$strLenBytes': '" + sushi + "'}");
assertExpression(
fish.getBytes(StandardCharsets.UTF_8).length,
of(fish).strLenBytes(),
"{'$strLenBytes': '" + fish + "'}");

// comparison
assertExpression(8, of(jalapeno).strLen());
assertExpression(9, of(jalapeno).strLenBytes());
assertExpression(2, of(sushi).strLen());
assertExpression(6, of(sushi).strLenBytes());
assertExpression(1, of(fish).strLen());
assertExpression(4, of(fish).strLenBytes());
}

@Test
public void substrTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substr/
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substrCP/ (?)
// substr is deprecated, an alias for bytes
assertExpression(
"abc".substring(1, 1 + 1),
of("abc").substr(of(1), of(1)),
"{'$substrCP': ['abc', 1, 1]}");

// unicode
assertExpression(
jalapeno.substring(5, 5 + 3),
of(jalapeno).substr(of(5), of(3)),
"{'$substrCP': ['" + jalapeno + "', 5, 3]}");
assertExpression(
"e\u00F1o",
of(jalapeno).substr(of(5), of(3)));

// bounds; convenience
assertExpression("abc", of("abc").substr(0, 99));
assertExpression("ab", of("abc").substr(0, 2));
assertExpression("b", of("abc").substr(1, 1));
assertExpression("", of("abc").substr(1, 0));
}

@Test
public void substrBytesTest() {
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/substrBytes/ (?)
assertExpression(
"b",
of("abc").substrBytes(of(1), of(1)),
"{'$substrBytes': ['abc', 1, 1]}");

// unicode
byte[] bytes = Arrays.copyOfRange(sushi.getBytes(StandardCharsets.UTF_8), 0, 3);
String expected = new String(bytes, StandardCharsets.UTF_8);
assertExpression(expected,
of(sushi).substrBytes(of(0), of(3)));
// server returns "starting index is a UTF-8 continuation byte" error when substrBytes(1, 1)

// convenience
assertExpression("b", of("abc").substrBytes(1, 1));
}
}