Skip to content

Add index empty and missing support #341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 29, 2025
18 changes: 18 additions & 0 deletions redisvl/query/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,24 @@ def _set_value(
self._value = val
self._operator = operator

def is_missing(self) -> "FilterExpression":
"""Create a filter expression for documents missing this field.

Returns:
FilterExpression: A filter expression that matches documents where the field is missing.

.. code-block:: python

from redisvl.query.filter import Tag, Text, Num, Geo, Timestamp

f = Tag("brand").is_missing()
f = Text("title").is_missing()
f = Num("price").is_missing()
f = Geo("location").is_missing()
f = Timestamp("created_at").is_missing()
"""
return FilterExpression(f"ismissing(@{self._field})")


def check_operator_misuse(func: Callable) -> Callable:
@wraps(func)
Expand Down
32 changes: 20 additions & 12 deletions redisvl/redis/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from redisvl.version import __version__


def compare_versions(version1, version2):
def compare_versions(version1: str, version2: str):
"""
Compare two Redis version strings numerically.

Expand Down Expand Up @@ -105,19 +105,27 @@ def parse_attrs(attrs):
# TODO 'WITHSUFFIXTRIE' is another boolean attr, but is not returned by ft.info
original = attrs.copy()
parsed_attrs = {}
if "NOSTEM" in attrs:
parsed_attrs["no_stem"] = True
attrs.remove("NOSTEM")
if "CASESENSITIVE" in attrs:
parsed_attrs["case_sensitive"] = True
attrs.remove("CASESENSITIVE")
if "SORTABLE" in attrs:
parsed_attrs["sortable"] = True
attrs.remove("SORTABLE")
if "UNF" in attrs:
attrs.remove("UNF") # UNF present on sortable numeric fields only

# Handle all boolean attributes first, regardless of position
boolean_attrs = {
"NOSTEM": "no_stem",
"CASESENSITIVE": "case_sensitive",
"SORTABLE": "sortable",
"INDEXMISSING": "index_missing",
"INDEXEMPTY": "index_empty",
}

for redis_attr, python_attr in boolean_attrs.items():
if redis_attr in attrs:
parsed_attrs[python_attr] = True
attrs.remove(redis_attr)

# Handle UNF which is associated with SORTABLE
if "UNF" in attrs:
attrs.remove("UNF") # UNF present on sortable numeric fields only

try:
# Parse remaining attributes as key-value pairs starting from index 6
parsed_attrs.update(
{attrs[i].lower(): attrs[i + 1] for i in range(6, len(attrs), 2)}
)
Expand Down
107 changes: 82 additions & 25 deletions redisvl/schema/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class BaseFieldAttributes(BaseModel):

sortable: bool = Field(default=False)
"""Enable faster result sorting on the field at runtime"""
index_missing: bool = Field(default=False)
"""Allow indexing and searching for missing values (documents without the field)"""


class TextFieldAttributes(BaseFieldAttributes):
Expand All @@ -74,6 +76,8 @@ class TextFieldAttributes(BaseFieldAttributes):
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
phonetic_matcher: Optional[str] = None
"""Used to perform phonetic matching during search"""
index_empty: bool = Field(default=False)
"""Allow indexing and searching for empty strings"""


class TagFieldAttributes(BaseFieldAttributes):
Expand All @@ -85,6 +89,8 @@ class TagFieldAttributes(BaseFieldAttributes):
"""Treat text as case sensitive or not. By default, tag characters are converted to lowercase"""
withsuffixtrie: bool = Field(default=False)
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
index_empty: bool = Field(default=False)
"""Allow indexing and searching for empty strings"""


class NumericFieldAttributes(BaseFieldAttributes):
Expand Down Expand Up @@ -112,6 +118,8 @@ class BaseVectorFieldAttributes(BaseModel):
"""The distance metric used to measure query relevance"""
initial_cap: Optional[int] = None
"""Initial vector capacity in the index affecting memory allocation size of the index"""
index_missing: bool = Field(default=False)
"""Allow indexing and searching for missing values (documents without the field)"""

@field_validator("algorithm", "datatype", "distance_metric", mode="before")
@classmethod
Expand All @@ -129,6 +137,8 @@ def field_data(self) -> Dict[str, Any]:
}
if self.initial_cap is not None: # Only include it if it's set
field_data["INITIAL_CAP"] = self.initial_cap
if self.index_missing: # Only include it if it's set
field_data["INDEXMISSING"] = True
return field_data


Expand Down Expand Up @@ -190,14 +200,30 @@ class TextField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisTextField(
name,
as_name=as_name,
weight=self.attrs.weight, # type: ignore
no_stem=self.attrs.no_stem, # type: ignore
phonetic_matcher=self.attrs.phonetic_matcher, # type: ignore
sortable=self.attrs.sortable,
)
# Build arguments for RedisTextField
kwargs: Dict[str, Any] = {
"weight": self.attrs.weight, # type: ignore
"no_stem": self.attrs.no_stem, # type: ignore
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Only add phonetic_matcher if it's not None
if self.attrs.phonetic_matcher is not None: # type: ignore
kwargs["phonetic_matcher"] = self.attrs.phonetic_matcher # type: ignore

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

# Add INDEXEMPTY if enabled
if self.attrs.index_empty: # type: ignore
kwargs["index_empty"] = True

return RedisTextField(name, **kwargs)


class TagField(BaseField):
Expand All @@ -208,13 +234,26 @@ class TagField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisTagField(
name,
as_name=as_name,
separator=self.attrs.separator, # type: ignore
case_sensitive=self.attrs.case_sensitive, # type: ignore
sortable=self.attrs.sortable,
)
# Build arguments for RedisTagField
kwargs: Dict[str, Any] = {
"separator": self.attrs.separator, # type: ignore
"case_sensitive": self.attrs.case_sensitive, # type: ignore
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

# Add INDEXEMPTY if enabled
if self.attrs.index_empty: # type: ignore
kwargs["index_empty"] = True

return RedisTagField(name, **kwargs)


class NumericField(BaseField):
Expand All @@ -225,11 +264,20 @@ class NumericField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisNumericField(
name,
as_name=as_name,
sortable=self.attrs.sortable,
)
# Build arguments for RedisNumericField
kwargs: Dict[str, Any] = {
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

return RedisNumericField(name, **kwargs)


class GeoField(BaseField):
Expand All @@ -240,11 +288,20 @@ class GeoField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisGeoField(
name,
as_name=as_name,
sortable=self.attrs.sortable,
)
# Build arguments for RedisGeoField
kwargs: Dict[str, Any] = {
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

return RedisGeoField(name, **kwargs)


class FlatVectorField(BaseField):
Expand Down
13 changes: 8 additions & 5 deletions redisvl/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,14 @@ def to_dict(self) -> Dict[str, Any]:
Returns:
Dict[str, Any]: The index schema as a dictionary.
"""
dict_schema = model_to_dict(self)
# cast fields back to a pure list
dict_schema["fields"] = [
field for field_name, field in dict_schema["fields"].items()
]
# Manually serialize to ensure all field attributes are preserved
dict_schema = {
"index": model_to_dict(self.index),
"fields": [
model_to_dict(field) for field_name, field in self.fields.items()
],
"version": self.version,
}
return dict_schema

def to_yaml(self, file_path: str, overwrite: bool = True) -> None:
Expand Down
47 changes: 37 additions & 10 deletions redisvl/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ def model_to_dict(model: BaseModel) -> Dict[str, Any]:
def serialize_item(item):
if isinstance(item, Enum):
return item.value.lower()
elif isinstance(item, BaseModel):
# Recursively serialize nested BaseModel instances with exclude_defaults=False
nested_data = item.model_dump(exclude_none=True, exclude_defaults=False)
return {key: serialize_item(value) for key, value in nested_data.items()}
elif isinstance(item, dict):
return {key: serialize_item(value) for key, value in item.items()}
elif isinstance(item, list):
return [serialize_item(element) for element in item]
else:
return item

serialized_data = model.model_dump(exclude_none=True)
# Use exclude_defaults=False to preserve all field attributes including new ones
serialized_data = model.model_dump(exclude_none=True, exclude_defaults=False)
for key, value in serialized_data.items():
serialized_data[key] = serialize_item(value)
return serialized_data
Expand Down Expand Up @@ -170,29 +175,51 @@ def wrapper(*args, **kwargs):

def sync_wrapper(fn: Callable[[], Coroutine[Any, Any, Any]]) -> Callable[[], None]:
def wrapper():
# Check if the interpreter is shutting down
if sys is None or getattr(sys, "_getframe", None) is None:
# Interpreter is shutting down, skip cleanup
return

try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
except Exception:
# Any other exception during loop detection means we should skip cleanup
return

try:
if loop is None or not loop.is_running():
# Check if asyncio module is still available
if asyncio is None:
return

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
task = loop.create_task(fn())
loop.run_until_complete(task)
except RuntimeError:
except (RuntimeError, AttributeError, TypeError) as e:
# This could happen if an object stored an event loop and now
# that event loop is closed. There's nothing we can do other than
# advise the user to use explicit cleanup methods.
# that event loop is closed, or if asyncio modules are being
# torn down during interpreter shutdown.
#
# Uses logging module instead of get_logger() to avoid I/O errors
# if the wrapped function is called as a finalizer.
logging.info(
f"Could not run the async function {fn.__name__} because the event loop is closed. "
"This usually means the object was not properly cleaned up. Please use explicit "
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
"context manager.",
)
if logging is not None:
try:
logging.info(
f"Could not run the async function {fn.__name__} because the event loop is closed "
"or the interpreter is shutting down. "
"This usually means the object was not properly cleaned up. Please use explicit "
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
"context manager.",
)
except Exception:
# Even logging failed, interpreter is really shutting down
pass
return
except Exception:
# Any other unexpected exception should be silently ignored during shutdown
return

return wrapper
Expand Down
Loading