Skip to content

Memory usage optimization via reuse of SchemaValidator and SchemaSerializer #1616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/serializers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ mod fields;
mod filter;
mod infer;
mod ob_type;
mod prebuilt;
pub mod ser;
mod shared;
mod type_serializers;
Expand Down
94 changes: 94 additions & 0 deletions src/serializers/prebuilt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use std::borrow::Cow;

use pyo3::exceptions::PyValueError;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyType};

use crate::definitions::DefinitionsBuilder;
use crate::tools::SchemaDict;
use crate::SchemaSerializer;

use super::extra::Extra;
use super::shared::{BuildSerializer, CombinedSerializer, TypeSerializer};

#[derive(Debug)]
pub struct PrebuiltSerializer {
serializer: Py<SchemaSerializer>,
}

impl BuildSerializer for PrebuiltSerializer {
const EXPECTED_TYPE: &'static str = "prebuilt";

fn build(
schema: &Bound<'_, PyDict>,
_config: Option<&Bound<'_, PyDict>>,
_definitions: &mut DefinitionsBuilder<CombinedSerializer>,
) -> PyResult<CombinedSerializer> {
let py = schema.py();
let class: Bound<'_, PyType> = schema.get_as_req(intern!(py, "cls"))?;

// Note: we NEED to use the __dict__ here (and perform get_item calls rather than getattr)
// because we don't want to fetch prebuilt validators from parent classes.
// We don't downcast here because __dict__ on a class is a readonly mappingproxy,
// so we can just leave it as is and do get_item checks.
let class_dict = class.getattr(intern!(py, "__dict__"))?;

let is_complete: bool = class_dict
.get_item(intern!(py, "__pydantic_complete__"))
.is_ok_and(|b| b.extract().unwrap_or(false));

if !is_complete {
return Err(PyValueError::new_err("Prebuilt serializer not found."));
}

// Retrieve the prebuilt validator if available
let prebuilt_serializer: Bound<'_, PyAny> = class_dict.get_item(intern!(py, "__pydantic_serializer__"))?;
let serializer: Py<SchemaSerializer> = prebuilt_serializer.extract()?;

Ok(Self { serializer }.into())
}
}

impl_py_gc_traverse!(PrebuiltSerializer { serializer });

impl TypeSerializer for PrebuiltSerializer {
fn to_python(
&self,
value: &Bound<'_, PyAny>,
include: Option<&Bound<'_, PyAny>>,
exclude: Option<&Bound<'_, PyAny>>,
extra: &Extra,
) -> PyResult<PyObject> {
self.serializer
.get()
.serializer
.to_python(value, include, exclude, extra)
}

fn json_key<'a>(&self, key: &'a Bound<'_, PyAny>, extra: &Extra) -> PyResult<Cow<'a, str>> {
self.serializer.get().serializer.json_key(key, extra)
}

fn serde_serialize<S: serde::ser::Serializer>(
&self,
value: &Bound<'_, PyAny>,
serializer: S,
include: Option<&Bound<'_, PyAny>>,
exclude: Option<&Bound<'_, PyAny>>,
extra: &Extra,
) -> Result<S::Ok, S::Error> {
self.serializer
.get()
.serializer
.serde_serialize(value, serializer, include, exclude, extra)
}

fn get_name(&self) -> &str {
self.serializer.get().serializer.get_name()
}

fn retry_with_lax_check(&self) -> bool {
self.serializer.get().serializer.retry_with_lax_check()
}
}
18 changes: 17 additions & 1 deletion src/serializers/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ combined_serializer! {
Function: super::type_serializers::function::FunctionPlainSerializer;
FunctionWrap: super::type_serializers::function::FunctionWrapSerializer;
Fields: super::fields::GeneralFieldsSerializer;
// prebuilt serializers are manually constructed, and thus manually added to the `CombinedSerializer` enum
Prebuilt: super::prebuilt::PrebuiltSerializer;
}
// `find_only` is for type_serializers which are built directly via the `type` key and `find_serializer`
// but aren't actually used for serialization, e.g. their `build` method must return another serializer
Expand Down Expand Up @@ -195,7 +197,20 @@ impl CombinedSerializer {
}

let type_: Bound<'_, PyString> = schema.get_as_req(type_key)?;
Self::find_serializer(type_.to_str()?, schema, config, definitions)
let type_ = type_.to_str()?;

// if we have a SchemaValidator on the type already, use it
// however, we don't want to use a prebuilt validator for dataclasses if we have a generic_origin
// because __pydantic_serializer__ is cached on the unparametrized dataclass
if matches!(type_, "model" | "typed-dict")
|| matches!(type_, "dataclass") && !schema.contains(intern!(py, "generic_origin"))?
{
if let Ok(prebuilt_serializer) = super::prebuilt::PrebuiltSerializer::build(schema, config, definitions) {
return Ok(prebuilt_serializer);
}
}

Self::find_serializer(type_, schema, config, definitions)
}
}

Expand All @@ -219,6 +234,7 @@ impl PyGcTraverse for CombinedSerializer {
CombinedSerializer::Function(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::FunctionWrap(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Fields(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Prebuilt(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::None(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Nullable(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Int(inner) => inner.py_gc_traverse(visit),
Expand Down
18 changes: 17 additions & 1 deletion src/validators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ mod model;
mod model_fields;
mod none;
mod nullable;
mod prebuilt;
mod set;
mod string;
mod time;
Expand Down Expand Up @@ -515,8 +516,21 @@ pub fn build_validator(
definitions: &mut DefinitionsBuilder<CombinedValidator>,
) -> PyResult<CombinedValidator> {
let dict = schema.downcast::<PyDict>()?;
let type_: Bound<'_, PyString> = dict.get_as_req(intern!(schema.py(), "type"))?;
let py = schema.py();
let type_: Bound<'_, PyString> = dict.get_as_req(intern!(py, "type"))?;
let type_ = type_.to_str()?;

// if we have a SchemaValidator on the type already, use it
// however, we don't want to use a prebuilt validator for dataclasses if we have a generic_origin
// because __pydantic_validator__ is cached on the unparametrized dataclass
if matches!(type_, "model" | "typed-dict")
|| matches!(type_, "dataclass") && !dict.contains(intern!(py, "generic_origin"))?
{
if let Ok(prebuilt_validator) = prebuilt::PrebuiltValidator::build(dict, config, definitions) {
return Ok(prebuilt_validator);
}
}

validator_match!(
type_,
dict,
Expand Down Expand Up @@ -763,6 +777,8 @@ pub enum CombinedValidator {
// input dependent
JsonOrPython(json_or_python::JsonOrPython),
Complex(complex::ComplexValidator),
// uses a reference to an existing SchemaValidator to reduce memory usage
Prebuilt(prebuilt::PrebuiltValidator),
}

/// This trait must be implemented by all validators, it allows various validators to be accessed consistently,
Expand Down
68 changes: 68 additions & 0 deletions src/validators/prebuilt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use pyo3::exceptions::PyValueError;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyType};

use crate::errors::ValResult;
use crate::input::Input;
use crate::tools::SchemaDict;

use super::ValidationState;
use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, SchemaValidator, Validator};

#[derive(Debug)]
pub struct PrebuiltValidator {
schema_validator: Py<SchemaValidator>,
name: String,
}

impl BuildValidator for PrebuiltValidator {
const EXPECTED_TYPE: &'static str = "prebuilt";

fn build(
schema: &Bound<'_, PyDict>,
_config: Option<&Bound<'_, PyDict>>,
_definitions: &mut DefinitionsBuilder<CombinedValidator>,
) -> PyResult<CombinedValidator> {
let py = schema.py();
let class: Bound<'_, PyType> = schema.get_as_req(intern!(py, "cls"))?;

// Note: we NEED to use the __dict__ here (and perform get_item calls rather than getattr)
// because we don't want to fetch prebuilt validators from parent classes.
// We don't downcast here because __dict__ on a class is a readonly mappingproxy,
// so we can just leave it as is and do get_item checks.
let class_dict = class.getattr(intern!(py, "__dict__"))?;

let is_complete: bool = class_dict
.get_item(intern!(py, "__pydantic_complete__"))
.is_ok_and(|b| b.extract().unwrap_or(false));

if !is_complete {
return Err(PyValueError::new_err("Prebuilt validator not found."));
}

// Retrieve the prebuilt validator if available
let prebuilt_validator = class_dict.get_item(intern!(py, "__pydantic_validator__"))?;
let schema_validator = prebuilt_validator.extract::<Py<SchemaValidator>>()?;
let name: String = class.getattr(intern!(py, "__name__"))?.extract()?;

Ok(Self { schema_validator, name }.into())
}
}

impl_py_gc_traverse!(PrebuiltValidator { schema_validator });

impl Validator for PrebuiltValidator {
fn validate<'py>(
&self,
py: Python<'py>,
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
self.schema_validator.get().validator.validate(py, input, state)
}

fn get_name(&self) -> &str {
&self.name
}
}