Skip to content

Commit bba389e

Browse files
feat(spans): track and report spans that were dropped (#4005)
`_SpanRecorder` now keeps track of `dropped_spans`, i.e. when above `max_spans`. When spans were dropped, the `"spans"` property will be wrapped in an `AnnotatedValue`, reporting the mutation.
1 parent 2724d65 commit bba389e

File tree

7 files changed

+143
-99
lines changed

7 files changed

+143
-99
lines changed

sentry_sdk/_types.py

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,88 @@
1-
from typing import TYPE_CHECKING
1+
from typing import TYPE_CHECKING, TypeVar, Union
22

33

44
# Re-exported for compat, since code out there in the wild might use this variable.
55
MYPY = TYPE_CHECKING
66

77

8+
SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
9+
10+
11+
class AnnotatedValue:
12+
"""
13+
Meta information for a data field in the event payload.
14+
This is to tell Relay that we have tampered with the fields value.
15+
See:
16+
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
17+
"""
18+
19+
__slots__ = ("value", "metadata")
20+
21+
def __init__(self, value, metadata):
22+
# type: (Optional[Any], Dict[str, Any]) -> None
23+
self.value = value
24+
self.metadata = metadata
25+
26+
def __eq__(self, other):
27+
# type: (Any) -> bool
28+
if not isinstance(other, AnnotatedValue):
29+
return False
30+
31+
return self.value == other.value and self.metadata == other.metadata
32+
33+
@classmethod
34+
def removed_because_raw_data(cls):
35+
# type: () -> AnnotatedValue
36+
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
37+
return AnnotatedValue(
38+
value="",
39+
metadata={
40+
"rem": [ # Remark
41+
[
42+
"!raw", # Unparsable raw data
43+
"x", # The fields original value was removed
44+
]
45+
]
46+
},
47+
)
48+
49+
@classmethod
50+
def removed_because_over_size_limit(cls):
51+
# type: () -> AnnotatedValue
52+
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the max_request_body_size sdk option)"""
53+
return AnnotatedValue(
54+
value="",
55+
metadata={
56+
"rem": [ # Remark
57+
[
58+
"!config", # Because of configured maximum size
59+
"x", # The fields original value was removed
60+
]
61+
]
62+
},
63+
)
64+
65+
@classmethod
66+
def substituted_because_contains_sensitive_data(cls):
67+
# type: () -> AnnotatedValue
68+
"""The actual value was removed because it contained sensitive information."""
69+
return AnnotatedValue(
70+
value=SENSITIVE_DATA_SUBSTITUTE,
71+
metadata={
72+
"rem": [ # Remark
73+
[
74+
"!config", # Because of SDK configuration (in this case the config is the hard coded removal of certain django cookies)
75+
"s", # The fields original value was substituted
76+
]
77+
]
78+
},
79+
)
80+
81+
82+
T = TypeVar("T")
83+
Annotated = Union[AnnotatedValue, T]
84+
85+
886
if TYPE_CHECKING:
987
from collections.abc import Container, MutableMapping, Sequence
1088

@@ -19,7 +97,6 @@
1997
from typing import Optional
2098
from typing import Tuple
2199
from typing import Type
22-
from typing import Union
23100
from typing_extensions import Literal, TypedDict
24101

25102
class SDKInfo(TypedDict):
@@ -101,7 +178,7 @@ class SDKInfo(TypedDict):
101178
"request": dict[str, object],
102179
"sdk": Mapping[str, object],
103180
"server_name": str,
104-
"spans": list[dict[str, object]],
181+
"spans": Annotated[list[dict[str, object]]],
105182
"stacktrace": dict[
106183
str, object
107184
], # We access this key in the code, but I am unsure whether we ever set it
@@ -118,6 +195,7 @@ class SDKInfo(TypedDict):
118195
"transaction_info": Mapping[str, Any], # TODO: We can expand on this type
119196
"type": Literal["check_in", "transaction"],
120197
"user": dict[str, object],
198+
"_dropped_spans": int,
121199
"_metrics_summary": dict[str, object],
122200
},
123201
total=False,

sentry_sdk/client.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
from collections.abc import Mapping
66
from datetime import datetime, timezone
77
from importlib import import_module
8-
from typing import cast, overload
8+
from typing import TYPE_CHECKING, List, Dict, cast, overload
99
import warnings
1010

1111
from sentry_sdk._compat import PY37, check_uwsgi_thread_support
1212
from sentry_sdk.utils import (
13+
AnnotatedValue,
1314
ContextVar,
1415
capture_internal_exceptions,
1516
current_stacktrace,
@@ -45,12 +46,9 @@
4546
from sentry_sdk.monitor import Monitor
4647
from sentry_sdk.spotlight import setup_spotlight
4748

48-
from typing import TYPE_CHECKING
49-
5049
if TYPE_CHECKING:
5150
from typing import Any
5251
from typing import Callable
53-
from typing import Dict
5452
from typing import Optional
5553
from typing import Sequence
5654
from typing import Type
@@ -483,12 +481,14 @@ def _prepare_event(
483481
):
484482
# type: (...) -> Optional[Event]
485483

484+
previous_total_spans = None # type: Optional[int]
485+
486486
if event.get("timestamp") is None:
487487
event["timestamp"] = datetime.now(timezone.utc)
488488

489489
if scope is not None:
490490
is_transaction = event.get("type") == "transaction"
491-
spans_before = len(event.get("spans", []))
491+
spans_before = len(cast(List[Dict[str, object]], event.get("spans", [])))
492492
event_ = scope.apply_to_event(event, hint, self.options)
493493

494494
# one of the event/error processors returned None
@@ -507,13 +507,18 @@ def _prepare_event(
507507
return None
508508

509509
event = event_
510-
511-
spans_delta = spans_before - len(event.get("spans", []))
510+
spans_delta = spans_before - len(
511+
cast(List[Dict[str, object]], event.get("spans", []))
512+
)
512513
if is_transaction and spans_delta > 0 and self.transport is not None:
513514
self.transport.record_lost_event(
514515
"event_processor", data_category="span", quantity=spans_delta
515516
)
516517

518+
dropped_spans = event.pop("_dropped_spans", 0) + spans_delta # type: int
519+
if dropped_spans > 0:
520+
previous_total_spans = spans_before + dropped_spans
521+
517522
if (
518523
self.options["attach_stacktrace"]
519524
and "exception" not in event
@@ -561,6 +566,11 @@ def _prepare_event(
561566
if event_scrubber:
562567
event_scrubber.scrub_event(event)
563568

569+
if previous_total_spans is not None:
570+
event["spans"] = AnnotatedValue(
571+
event.get("spans", []), {"len": previous_total_spans}
572+
)
573+
564574
# Postprocess the event here so that annotated types do
565575
# generally not surface in before_send
566576
if event is not None:
@@ -598,7 +608,7 @@ def _prepare_event(
598608
and event.get("type") == "transaction"
599609
):
600610
new_event = None
601-
spans_before = len(event.get("spans", []))
611+
spans_before = len(cast(List[Dict[str, object]], event.get("spans", [])))
602612
with capture_internal_exceptions():
603613
new_event = before_send_transaction(event, hint or {})
604614
if new_event is None:

sentry_sdk/scrubber.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
iter_event_frames,
55
)
66

7-
from typing import TYPE_CHECKING
7+
from typing import TYPE_CHECKING, cast, List, Dict
88

99
if TYPE_CHECKING:
1010
from sentry_sdk._types import Event
11-
from typing import List
1211
from typing import Optional
1312

1413

@@ -161,7 +160,7 @@ def scrub_spans(self, event):
161160
# type: (Event) -> None
162161
with capture_internal_exceptions():
163162
if "spans" in event:
164-
for span in event["spans"]:
163+
for span in cast(List[Dict[str, object]], event["spans"]):
165164
if "data" in span:
166165
self.scrub_dict(span["data"])
167166

sentry_sdk/tracing.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def get_span_status_from_http_code(http_status_code):
193193
class _SpanRecorder:
194194
"""Limits the number of spans recorded in a transaction."""
195195

196-
__slots__ = ("maxlen", "spans")
196+
__slots__ = ("maxlen", "spans", "dropped_spans")
197197

198198
def __init__(self, maxlen):
199199
# type: (int) -> None
@@ -204,11 +204,13 @@ def __init__(self, maxlen):
204204
# limits: either transaction+spans or only child spans.
205205
self.maxlen = maxlen - 1
206206
self.spans = [] # type: List[Span]
207+
self.dropped_spans = 0 # type: int
207208

208209
def add(self, span):
209210
# type: (Span) -> None
210211
if len(self.spans) > self.maxlen:
211212
span._span_recorder = None
213+
self.dropped_spans += 1
212214
else:
213215
self.spans.append(span)
214216

@@ -972,6 +974,9 @@ def finish(
972974
if span.timestamp is not None
973975
]
974976

977+
len_diff = len(self._span_recorder.spans) - len(finished_spans)
978+
dropped_spans = len_diff + self._span_recorder.dropped_spans
979+
975980
# we do this to break the circular reference of transaction -> span
976981
# recorder -> span -> containing transaction (which is where we started)
977982
# before either the spans or the transaction goes out of scope and has
@@ -996,6 +1001,9 @@ def finish(
9961001
"spans": finished_spans,
9971002
} # type: Event
9981003

1004+
if dropped_spans > 0:
1005+
event["_dropped_spans"] = dropped_spans
1006+
9991007
if self._profile is not None and self._profile.valid():
10001008
event["profile"] = self._profile
10011009
self._profile = None

sentry_sdk/transport.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@
2424
from sentry_sdk.worker import BackgroundWorker
2525
from sentry_sdk.envelope import Envelope, Item, PayloadRef
2626

27-
from typing import TYPE_CHECKING
27+
from typing import TYPE_CHECKING, cast, List, Dict
2828

2929
if TYPE_CHECKING:
3030
from typing import Any
3131
from typing import Callable
32-
from typing import Dict
3332
from typing import DefaultDict
3433
from typing import Iterable
35-
from typing import List
3634
from typing import Mapping
3735
from typing import Optional
3836
from typing import Self
@@ -280,7 +278,9 @@ def record_lost_event(
280278
event = item.get_transaction_event() or {}
281279

282280
# +1 for the transaction itself
283-
span_count = len(event.get("spans") or []) + 1
281+
span_count = (
282+
len(cast(List[Dict[str, object]], event.get("spans") or [])) + 1
283+
)
284284
self.record_lost_event(reason, "span", quantity=span_count)
285285

286286
elif data_category == "attachment":

sentry_sdk/utils.py

Lines changed: 1 addition & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
DEFAULT_MAX_VALUE_LENGTH,
3333
EndpointType,
3434
)
35+
from sentry_sdk._types import Annotated, AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
3536

3637
from typing import TYPE_CHECKING
3738

@@ -73,8 +74,6 @@
7374

7475
BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$")
7576

76-
SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
77-
7877
FALSY_ENV_VALUES = frozenset(("false", "f", "n", "no", "off", "0"))
7978
TRUTHY_ENV_VALUES = frozenset(("true", "t", "y", "yes", "on", "1"))
8079

@@ -404,84 +403,6 @@ def to_header(self):
404403
return "Sentry " + ", ".join("%s=%s" % (key, value) for key, value in rv)
405404

406405

407-
class AnnotatedValue:
408-
"""
409-
Meta information for a data field in the event payload.
410-
This is to tell Relay that we have tampered with the fields value.
411-
See:
412-
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
413-
"""
414-
415-
__slots__ = ("value", "metadata")
416-
417-
def __init__(self, value, metadata):
418-
# type: (Optional[Any], Dict[str, Any]) -> None
419-
self.value = value
420-
self.metadata = metadata
421-
422-
def __eq__(self, other):
423-
# type: (Any) -> bool
424-
if not isinstance(other, AnnotatedValue):
425-
return False
426-
427-
return self.value == other.value and self.metadata == other.metadata
428-
429-
@classmethod
430-
def removed_because_raw_data(cls):
431-
# type: () -> AnnotatedValue
432-
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
433-
return AnnotatedValue(
434-
value="",
435-
metadata={
436-
"rem": [ # Remark
437-
[
438-
"!raw", # Unparsable raw data
439-
"x", # The fields original value was removed
440-
]
441-
]
442-
},
443-
)
444-
445-
@classmethod
446-
def removed_because_over_size_limit(cls):
447-
# type: () -> AnnotatedValue
448-
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the max_request_body_size sdk option)"""
449-
return AnnotatedValue(
450-
value="",
451-
metadata={
452-
"rem": [ # Remark
453-
[
454-
"!config", # Because of configured maximum size
455-
"x", # The fields original value was removed
456-
]
457-
]
458-
},
459-
)
460-
461-
@classmethod
462-
def substituted_because_contains_sensitive_data(cls):
463-
# type: () -> AnnotatedValue
464-
"""The actual value was removed because it contained sensitive information."""
465-
return AnnotatedValue(
466-
value=SENSITIVE_DATA_SUBSTITUTE,
467-
metadata={
468-
"rem": [ # Remark
469-
[
470-
"!config", # Because of SDK configuration (in this case the config is the hard coded removal of certain django cookies)
471-
"s", # The fields original value was substituted
472-
]
473-
]
474-
},
475-
)
476-
477-
478-
if TYPE_CHECKING:
479-
from typing import TypeVar
480-
481-
T = TypeVar("T")
482-
Annotated = Union[AnnotatedValue, T]
483-
484-
485406
def get_type_name(cls):
486407
# type: (Optional[type]) -> Optional[str]
487408
return getattr(cls, "__qualname__", None) or getattr(cls, "__name__", None)

0 commit comments

Comments
 (0)