Skip to content

Commit 22a363d

Browse files
Add index empty and missing support (#341)
PR implements support to index missing fields as well as empty fields.
1 parent 01fd8b3 commit 22a363d

19 files changed

+1109
-173
lines changed

redisvl/query/filter.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,24 @@ def _set_value(
7373
self._value = val
7474
self._operator = operator
7575

76+
def is_missing(self) -> "FilterExpression":
77+
"""Create a filter expression for documents missing this field.
78+
79+
Returns:
80+
FilterExpression: A filter expression that matches documents where the field is missing.
81+
82+
.. code-block:: python
83+
84+
from redisvl.query.filter import Tag, Text, Num, Geo, Timestamp
85+
86+
f = Tag("brand").is_missing()
87+
f = Text("title").is_missing()
88+
f = Num("price").is_missing()
89+
f = Geo("location").is_missing()
90+
f = Timestamp("created_at").is_missing()
91+
"""
92+
return FilterExpression(f"ismissing(@{self._field})")
93+
7694

7795
def check_operator_misuse(func: Callable) -> Callable:
7896
@wraps(func)

redisvl/redis/connection.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from redisvl.version import __version__
2121

2222

23-
def compare_versions(version1, version2):
23+
def compare_versions(version1: str, version2: str):
2424
"""
2525
Compare two Redis version strings numerically.
2626
@@ -105,19 +105,27 @@ def parse_attrs(attrs):
105105
# TODO 'WITHSUFFIXTRIE' is another boolean attr, but is not returned by ft.info
106106
original = attrs.copy()
107107
parsed_attrs = {}
108-
if "NOSTEM" in attrs:
109-
parsed_attrs["no_stem"] = True
110-
attrs.remove("NOSTEM")
111-
if "CASESENSITIVE" in attrs:
112-
parsed_attrs["case_sensitive"] = True
113-
attrs.remove("CASESENSITIVE")
114-
if "SORTABLE" in attrs:
115-
parsed_attrs["sortable"] = True
116-
attrs.remove("SORTABLE")
117-
if "UNF" in attrs:
118-
attrs.remove("UNF") # UNF present on sortable numeric fields only
108+
109+
# Handle all boolean attributes first, regardless of position
110+
boolean_attrs = {
111+
"NOSTEM": "no_stem",
112+
"CASESENSITIVE": "case_sensitive",
113+
"SORTABLE": "sortable",
114+
"INDEXMISSING": "index_missing",
115+
"INDEXEMPTY": "index_empty",
116+
}
117+
118+
for redis_attr, python_attr in boolean_attrs.items():
119+
if redis_attr in attrs:
120+
parsed_attrs[python_attr] = True
121+
attrs.remove(redis_attr)
122+
123+
# Handle UNF which is associated with SORTABLE
124+
if "UNF" in attrs:
125+
attrs.remove("UNF") # UNF present on sortable numeric fields only
119126

120127
try:
128+
# Parse remaining attributes as key-value pairs starting from index 6
121129
parsed_attrs.update(
122130
{attrs[i].lower(): attrs[i + 1] for i in range(6, len(attrs), 2)}
123131
)

redisvl/schema/fields.py

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class BaseFieldAttributes(BaseModel):
6161

6262
sortable: bool = Field(default=False)
6363
"""Enable faster result sorting on the field at runtime"""
64+
index_missing: bool = Field(default=False)
65+
"""Allow indexing and searching for missing values (documents without the field)"""
6466

6567

6668
class TextFieldAttributes(BaseFieldAttributes):
@@ -74,6 +76,8 @@ class TextFieldAttributes(BaseFieldAttributes):
7476
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
7577
phonetic_matcher: Optional[str] = None
7678
"""Used to perform phonetic matching during search"""
79+
index_empty: bool = Field(default=False)
80+
"""Allow indexing and searching for empty strings"""
7781

7882

7983
class TagFieldAttributes(BaseFieldAttributes):
@@ -85,6 +89,8 @@ class TagFieldAttributes(BaseFieldAttributes):
8589
"""Treat text as case sensitive or not. By default, tag characters are converted to lowercase"""
8690
withsuffixtrie: bool = Field(default=False)
8791
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
92+
index_empty: bool = Field(default=False)
93+
"""Allow indexing and searching for empty strings"""
8894

8995

9096
class NumericFieldAttributes(BaseFieldAttributes):
@@ -112,6 +118,8 @@ class BaseVectorFieldAttributes(BaseModel):
112118
"""The distance metric used to measure query relevance"""
113119
initial_cap: Optional[int] = None
114120
"""Initial vector capacity in the index affecting memory allocation size of the index"""
121+
index_missing: bool = Field(default=False)
122+
"""Allow indexing and searching for missing values (documents without the field)"""
115123

116124
@field_validator("algorithm", "datatype", "distance_metric", mode="before")
117125
@classmethod
@@ -129,6 +137,8 @@ def field_data(self) -> Dict[str, Any]:
129137
}
130138
if self.initial_cap is not None: # Only include it if it's set
131139
field_data["INITIAL_CAP"] = self.initial_cap
140+
if self.index_missing: # Only include it if it's set
141+
field_data["INDEXMISSING"] = True
132142
return field_data
133143

134144

@@ -190,14 +200,30 @@ class TextField(BaseField):
190200

191201
def as_redis_field(self) -> RedisField:
192202
name, as_name = self._handle_names()
193-
return RedisTextField(
194-
name,
195-
as_name=as_name,
196-
weight=self.attrs.weight, # type: ignore
197-
no_stem=self.attrs.no_stem, # type: ignore
198-
phonetic_matcher=self.attrs.phonetic_matcher, # type: ignore
199-
sortable=self.attrs.sortable,
200-
)
203+
# Build arguments for RedisTextField
204+
kwargs: Dict[str, Any] = {
205+
"weight": self.attrs.weight, # type: ignore
206+
"no_stem": self.attrs.no_stem, # type: ignore
207+
"sortable": self.attrs.sortable,
208+
}
209+
210+
# Only add as_name if it's not None
211+
if as_name is not None:
212+
kwargs["as_name"] = as_name
213+
214+
# Only add phonetic_matcher if it's not None
215+
if self.attrs.phonetic_matcher is not None: # type: ignore
216+
kwargs["phonetic_matcher"] = self.attrs.phonetic_matcher # type: ignore
217+
218+
# Add INDEXMISSING if enabled
219+
if self.attrs.index_missing: # type: ignore
220+
kwargs["index_missing"] = True
221+
222+
# Add INDEXEMPTY if enabled
223+
if self.attrs.index_empty: # type: ignore
224+
kwargs["index_empty"] = True
225+
226+
return RedisTextField(name, **kwargs)
201227

202228

203229
class TagField(BaseField):
@@ -208,13 +234,26 @@ class TagField(BaseField):
208234

209235
def as_redis_field(self) -> RedisField:
210236
name, as_name = self._handle_names()
211-
return RedisTagField(
212-
name,
213-
as_name=as_name,
214-
separator=self.attrs.separator, # type: ignore
215-
case_sensitive=self.attrs.case_sensitive, # type: ignore
216-
sortable=self.attrs.sortable,
217-
)
237+
# Build arguments for RedisTagField
238+
kwargs: Dict[str, Any] = {
239+
"separator": self.attrs.separator, # type: ignore
240+
"case_sensitive": self.attrs.case_sensitive, # type: ignore
241+
"sortable": self.attrs.sortable,
242+
}
243+
244+
# Only add as_name if it's not None
245+
if as_name is not None:
246+
kwargs["as_name"] = as_name
247+
248+
# Add INDEXMISSING if enabled
249+
if self.attrs.index_missing: # type: ignore
250+
kwargs["index_missing"] = True
251+
252+
# Add INDEXEMPTY if enabled
253+
if self.attrs.index_empty: # type: ignore
254+
kwargs["index_empty"] = True
255+
256+
return RedisTagField(name, **kwargs)
218257

219258

220259
class NumericField(BaseField):
@@ -225,11 +264,20 @@ class NumericField(BaseField):
225264

226265
def as_redis_field(self) -> RedisField:
227266
name, as_name = self._handle_names()
228-
return RedisNumericField(
229-
name,
230-
as_name=as_name,
231-
sortable=self.attrs.sortable,
232-
)
267+
# Build arguments for RedisNumericField
268+
kwargs: Dict[str, Any] = {
269+
"sortable": self.attrs.sortable,
270+
}
271+
272+
# Only add as_name if it's not None
273+
if as_name is not None:
274+
kwargs["as_name"] = as_name
275+
276+
# Add INDEXMISSING if enabled
277+
if self.attrs.index_missing: # type: ignore
278+
kwargs["index_missing"] = True
279+
280+
return RedisNumericField(name, **kwargs)
233281

234282

235283
class GeoField(BaseField):
@@ -240,11 +288,20 @@ class GeoField(BaseField):
240288

241289
def as_redis_field(self) -> RedisField:
242290
name, as_name = self._handle_names()
243-
return RedisGeoField(
244-
name,
245-
as_name=as_name,
246-
sortable=self.attrs.sortable,
247-
)
291+
# Build arguments for RedisGeoField
292+
kwargs: Dict[str, Any] = {
293+
"sortable": self.attrs.sortable,
294+
}
295+
296+
# Only add as_name if it's not None
297+
if as_name is not None:
298+
kwargs["as_name"] = as_name
299+
300+
# Add INDEXMISSING if enabled
301+
if self.attrs.index_missing: # type: ignore
302+
kwargs["index_missing"] = True
303+
304+
return RedisGeoField(name, **kwargs)
248305

249306

250307
class FlatVectorField(BaseField):

redisvl/schema/schema.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -432,11 +432,14 @@ def to_dict(self) -> Dict[str, Any]:
432432
Returns:
433433
Dict[str, Any]: The index schema as a dictionary.
434434
"""
435-
dict_schema = model_to_dict(self)
436-
# cast fields back to a pure list
437-
dict_schema["fields"] = [
438-
field for field_name, field in dict_schema["fields"].items()
439-
]
435+
# Manually serialize to ensure all field attributes are preserved
436+
dict_schema = {
437+
"index": model_to_dict(self.index),
438+
"fields": [
439+
model_to_dict(field) for field_name, field in self.fields.items()
440+
],
441+
"version": self.version,
442+
}
440443
return dict_schema
441444

442445
def to_yaml(self, file_path: str, overwrite: bool = True) -> None:

redisvl/utils/utils.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,19 @@ def model_to_dict(model: BaseModel) -> Dict[str, Any]:
3838
def serialize_item(item):
3939
if isinstance(item, Enum):
4040
return item.value.lower()
41+
elif isinstance(item, BaseModel):
42+
# Recursively serialize nested BaseModel instances with exclude_defaults=False
43+
nested_data = item.model_dump(exclude_none=True, exclude_defaults=False)
44+
return {key: serialize_item(value) for key, value in nested_data.items()}
4145
elif isinstance(item, dict):
4246
return {key: serialize_item(value) for key, value in item.items()}
4347
elif isinstance(item, list):
4448
return [serialize_item(element) for element in item]
4549
else:
4650
return item
4751

48-
serialized_data = model.model_dump(exclude_none=True)
52+
# Use exclude_defaults=False to preserve all field attributes including new ones
53+
serialized_data = model.model_dump(exclude_none=True, exclude_defaults=False)
4954
for key, value in serialized_data.items():
5055
serialized_data[key] = serialize_item(value)
5156
return serialized_data
@@ -170,29 +175,51 @@ def wrapper(*args, **kwargs):
170175

171176
def sync_wrapper(fn: Callable[[], Coroutine[Any, Any, Any]]) -> Callable[[], None]:
172177
def wrapper():
178+
# Check if the interpreter is shutting down
179+
if sys is None or getattr(sys, "_getframe", None) is None:
180+
# Interpreter is shutting down, skip cleanup
181+
return
182+
173183
try:
174184
loop = asyncio.get_running_loop()
175185
except RuntimeError:
176186
loop = None
187+
except Exception:
188+
# Any other exception during loop detection means we should skip cleanup
189+
return
190+
177191
try:
178192
if loop is None or not loop.is_running():
193+
# Check if asyncio module is still available
194+
if asyncio is None:
195+
return
196+
179197
loop = asyncio.new_event_loop()
180198
asyncio.set_event_loop(loop)
181199
task = loop.create_task(fn())
182200
loop.run_until_complete(task)
183-
except RuntimeError:
201+
except (RuntimeError, AttributeError, TypeError) as e:
184202
# This could happen if an object stored an event loop and now
185-
# that event loop is closed. There's nothing we can do other than
186-
# advise the user to use explicit cleanup methods.
203+
# that event loop is closed, or if asyncio modules are being
204+
# torn down during interpreter shutdown.
187205
#
188206
# Uses logging module instead of get_logger() to avoid I/O errors
189207
# if the wrapped function is called as a finalizer.
190-
logging.info(
191-
f"Could not run the async function {fn.__name__} because the event loop is closed. "
192-
"This usually means the object was not properly cleaned up. Please use explicit "
193-
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
194-
"context manager.",
195-
)
208+
if logging is not None:
209+
try:
210+
logging.info(
211+
f"Could not run the async function {fn.__name__} because the event loop is closed "
212+
"or the interpreter is shutting down. "
213+
"This usually means the object was not properly cleaned up. Please use explicit "
214+
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
215+
"context manager.",
216+
)
217+
except Exception:
218+
# Even logging failed, interpreter is really shutting down
219+
pass
220+
return
221+
except Exception:
222+
# Any other unexpected exception should be silently ignored during shutdown
196223
return
197224

198225
return wrapper

0 commit comments

Comments
 (0)