Skip to content

Commit 332c92a

Browse files
tzolovmarkpollack
authored andcommitted
Support portable filter expressions with AzureVectorStore
1 parent eadcae7 commit 332c92a

File tree

8 files changed

+679
-43
lines changed

8 files changed

+679
-43
lines changed

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/azure/AzureVectorStoreAutoConfiguration.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
2222

2323
import org.springframework.ai.embedding.EmbeddingClient;
24-
import org.springframework.ai.vectorstore.AzureVectorStore;
24+
import org.springframework.ai.vectorstore.azure.AzureVectorStore;
2525
import org.springframework.ai.vectorstore.VectorStore;
2626
import org.springframework.boot.autoconfigure.AutoConfiguration;
2727
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/azure/AzureVectorStoreProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
package org.springframework.ai.autoconfigure.vectorstore.azure;
1818

19-
import org.springframework.ai.vectorstore.AzureVectorStore;
19+
import org.springframework.ai.vectorstore.azure.AzureVectorStore;
2020
import org.springframework.boot.context.properties.ConfigurationProperties;
2121

2222
/**

spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/azure/AzureVectorStoreAutoConfigurationIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import org.springframework.ai.document.Document;
3232
import org.springframework.ai.embedding.EmbeddingClient;
3333
import org.springframework.ai.embedding.TransformersEmbeddingClient;
34-
import org.springframework.ai.vectorstore.AzureVectorStore;
34+
import org.springframework.ai.vectorstore.azure.AzureVectorStore;
3535
import org.springframework.ai.vectorstore.SearchRequest;
3636
import org.springframework.ai.vectorstore.VectorStore;
3737
import org.springframework.boot.autoconfigure.AutoConfigurations;

vector-stores/spring-ai-azure/README.md

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Add these dependencies to your project:
4343
1. Select an Embeddings interface implementation.
4444
You can choose between:
4545

46-
* OpenAI Embedding:
46+
* or OpenAI Embedding:
4747

4848
```xml
4949
<dependency>
@@ -102,10 +102,21 @@ To create a vector store, you can use the following code by injecting the `Searc
102102
```java
103103
@Bean
104104
public VectorStore vectorStore(SearchIndexClient searchIndexClient, EmbeddingClient embeddingClient) {
105-
return new AzureVectorStore(searchIndexClient, embeddingClient);
105+
return new AzureVectorStore(searchIndexClient, embeddingClient,
106+
// Define the metadata fields to be used
107+
// in the similarity search filters.
108+
List.of(MetadataField.text("country"),
109+
MetadataField.int64("year"),
110+
MetadataField.bool("active")));
106111
}
107112
```
108113

114+
> [!NOTE]
115+
> You must list explicitly all metadata field names and types for any metadata key used in filter expression.
116+
>The list above registers filterable metadata fields: `country` of type `TEXT`, `year` of type `INT64` and `active` of type `BOOLEAN`.
117+
>
118+
> If the filterable metadata fields is expanded with new entires, you have to (re)upload/update the documents with this metadata.
119+
109120
In your main code, create some documents
110121

111122
```java
@@ -124,18 +135,64 @@ vectorStore.add(List.of(document));
124135
And finally, retrieve documents similar to a query:
125136

126137
```java
127-
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5));
138+
List<Document> results = vectorStore.similaritySearch(
139+
SearchRequest
140+
.query("Spring")
141+
.withTopK(5));
128142
```
129143

130144
If all goes well, you should retrieve the document containing the text "Spring AI rocks!!".
131145

146+
### Metadata filtering
147+
148+
You can leverage the generic, portable [metadata filters](https://docs.spring.io/spring-ai/reference/api/vectordbs.html#_metadata_filters) with AzureVectorStore as well.
149+
150+
For example you can use either the text expression language:
151+
152+
```java
153+
vectorStore.similaritySearch(
154+
SearchRequest
155+
.query("The World")
156+
.withTopK(TOP_K)
157+
.withSimilarityThreshold(SIMILARITY_THRESHOLD)
158+
.withFilterExpression("country in ['UK', 'NL'] && year >= 2020"));
159+
```
160+
161+
or programmatically using the expression DSL:
162+
163+
```java
164+
FilterExpressionBuilder b = Filter.builder();
165+
166+
vectorStore.similaritySearch(
167+
SearchRequest
168+
.query("The World")
169+
.withTopK(TOP_K)
170+
.withSimilarityThreshold(SIMILARITY_THRESHOLD)
171+
.withFilterExpression(b.and(
172+
b.in("country", "UK", "NL"),
173+
b.gte("year", 2020)).build()));
174+
```
175+
176+
The, portable, filter expressions get automatically converted into the proprietary Azure Search [OData filters](https://learn.microsoft.com/en-us/azure/search/search-query-odata-filter).
177+
For example the following, portable, filter expression
178+
179+
```sql
180+
country in ['UK', 'NL'] && year >= 2020
181+
```
182+
183+
is converted into Azure, OData, [filter expression](https://learn.microsoft.com/en-us/azure/search/search-query-odata-filter):
184+
185+
```graphQL
186+
$filter search.in(meta_country, 'UK,NL', ',') and meta_year ge 2020
187+
```
188+
132189
## Integration With Azure OpenAI Studio Data Ingestion
133190

134191
Azure Open AI services provides a convenient method to upload documents into an Index as described in this Microsoft
135192
[learning document](https://learn.microsoft.com/en-us/azure/ai-services/openai/use-your-data-quickstart?tabs=command-line&pivots=programming-language-csharp).
136193
The `AzureVectorStore` implementation is compatible with indexes that use this methodology facilitating an *easier* way to integrate with your existing documents for the purpose of searching and integrating with the AI system.
137194

138-
## <a name="appendix_a" /> Appendix A: Create Vector Store Search Index
195+
## <a name="appendix_a" /> Appendix A: Create Vector Store Search Index </a>
139196

140197
The easiest way to crate a search index manually, is to create one from a JSON document.
141198
This can be done by clicking on the `Indexes` link under the `Search management` section.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Copyright 2023-2023 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.vectorstore.azure;
18+
19+
import java.text.ParseException;
20+
import java.text.SimpleDateFormat;
21+
import java.util.Date;
22+
import java.util.List;
23+
import java.util.TimeZone;
24+
import java.util.regex.Pattern;
25+
26+
import org.springframework.ai.vectorstore.azure.AzureVectorStore.MetadataField;
27+
import org.springframework.ai.vectorstore.filter.Filter;
28+
import org.springframework.ai.vectorstore.filter.Filter.Expression;
29+
import org.springframework.ai.vectorstore.filter.Filter.ExpressionType;
30+
import org.springframework.ai.vectorstore.filter.Filter.Group;
31+
import org.springframework.ai.vectorstore.filter.Filter.Key;
32+
import org.springframework.ai.vectorstore.filter.converter.AbstractFilterExpressionConverter;
33+
import org.springframework.util.Assert;
34+
35+
/**
36+
* Converts {@link Expression} into Azure Search OData filter syntax.
37+
* https://learn.microsoft.com/en-us/azure/search/search-query-odata-filter
38+
*
39+
* @author Christian Tzolov
40+
*/
41+
public class AzureAiSearchFilterExpressionConverter extends AbstractFilterExpressionConverter {
42+
43+
private static Pattern DATE_FORMAT_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z");
44+
45+
private final List<MetadataField> filterMetadataFields;
46+
47+
private final SimpleDateFormat dateFormat;
48+
49+
private List<String> allowedIdentifierNames;
50+
51+
public AzureAiSearchFilterExpressionConverter(List<MetadataField> filterMetadataFields) {
52+
Assert.notNull(filterMetadataFields, "The filterMetadataFields can not null.");
53+
54+
this.allowedIdentifierNames = filterMetadataFields.stream().map(MetadataField::name).toList();
55+
this.filterMetadataFields = filterMetadataFields;
56+
this.dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
57+
// this.dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZZZZ");
58+
this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
59+
}
60+
61+
@Override
62+
protected void doExpression(Expression expression, StringBuilder context) {
63+
if (expression.type() == ExpressionType.IN || expression.type() == ExpressionType.NIN) {
64+
context.append(getOperationSymbol(expression));
65+
context.append("(");
66+
this.convertOperand(expression.left(), context);
67+
context.append(", ");
68+
this.convertOperand(expression.right(), context);
69+
context.append(", ',')");
70+
}
71+
else {
72+
this.convertOperand(expression.left(), context);
73+
context.append(getOperationSymbol(expression));
74+
this.convertOperand(expression.right(), context);
75+
}
76+
}
77+
78+
protected void doStartValueRange(Filter.Value listValue, StringBuilder context) {
79+
context.append("'");
80+
}
81+
82+
protected void doEndValueRange(Filter.Value listValue, StringBuilder context) {
83+
context.append("'");
84+
}
85+
86+
private String getOperationSymbol(Expression exp) {
87+
switch (exp.type()) {
88+
case AND:
89+
return " and ";
90+
case OR:
91+
return " or ";
92+
case EQ:
93+
return " eq ";
94+
case NE:
95+
return " ne ";
96+
case LT:
97+
return " lt ";
98+
case LTE:
99+
return " le ";
100+
case GT:
101+
return " gt ";
102+
case GTE:
103+
return " ge ";
104+
case IN:
105+
return " search.in";
106+
case NIN:
107+
return " not search.in";
108+
default:
109+
throw new RuntimeException("Not supported expression type: " + exp.type());
110+
}
111+
}
112+
113+
@Override
114+
public void doKey(Key key, StringBuilder context) {
115+
var hasOuterQuotes = hasOuterQuotes(key.key());
116+
var identifier = (hasOuterQuotes) ? removeOuterQuotes(key.key()) : key.key();
117+
var prefixedIdentifier = withMetaPrefix(identifier);
118+
if (hasOuterQuotes) {
119+
prefixedIdentifier = "'" + prefixedIdentifier.trim() + "'";
120+
}
121+
context.append(prefixedIdentifier);
122+
}
123+
124+
public String withMetaPrefix(String identifier) {
125+
126+
if (this.allowedIdentifierNames.contains(identifier)) {
127+
return "meta_" + identifier;
128+
}
129+
130+
throw new IllegalArgumentException("Not allowed filter identifier name: " + identifier);
131+
}
132+
133+
@Override
134+
protected void doValue(Filter.Value filterValue, StringBuilder context) {
135+
if (filterValue.value() instanceof List list) {
136+
doStartValueRange(filterValue, context);
137+
int c = 0;
138+
for (Object v : list) {
139+
// this.doSingleValue(v, context);
140+
context.append(v);
141+
if (c++ < list.size() - 1) {
142+
this.doAddValueRangeSpitter(filterValue, context);
143+
}
144+
}
145+
this.doEndValueRange(filterValue, context);
146+
}
147+
else {
148+
this.doSingleValue(filterValue.value(), context);
149+
}
150+
}
151+
152+
@Override
153+
protected void doSingleValue(Object value, StringBuilder context) {
154+
if (value instanceof Date date) {
155+
context.append(this.dateFormat.format(date));
156+
}
157+
else if (value instanceof String text) {
158+
if (DATE_FORMAT_PATTERN.matcher(text).matches()) {
159+
try {
160+
Date date = this.dateFormat.parse(text);
161+
context.append(this.dateFormat.format(date));
162+
}
163+
catch (ParseException e) {
164+
throw new IllegalArgumentException("Invalid date type:" + text, e);
165+
}
166+
}
167+
else {
168+
context.append(String.format("'%s'", text));
169+
}
170+
}
171+
else {
172+
context.append(value);
173+
}
174+
}
175+
176+
@Override
177+
public void doStartGroup(Group group, StringBuilder context) {
178+
context.append("(");
179+
}
180+
181+
@Override
182+
public void doEndGroup(Group group, StringBuilder context) {
183+
context.append(")");
184+
}
185+
186+
}

0 commit comments

Comments
 (0)