Skip to content

Commit 2b3f4f7

Browse files
feat(tracing): Backfill missing sample_rand on PropagationContext
Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions. Ref #3998
1 parent 189e4a9 commit 2b3f4f7

File tree

4 files changed

+224
-2
lines changed

4 files changed

+224
-2
lines changed

sentry_sdk/tracing_utils.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import contextlib
2+
from decimal import ROUND_DOWN, Decimal
23
import inspect
34
import os
45
import re
56
import sys
67
from collections.abc import Mapping
78
from datetime import timedelta
89
from functools import wraps
10+
from random import Random
911
from urllib.parse import quote, unquote
1012
import uuid
1113

@@ -19,6 +21,7 @@
1921
match_regex_list,
2022
qualname_from_function,
2123
to_string,
24+
try_convert,
2225
is_sentry_url,
2326
_is_external_source,
2427
_is_in_project_root,
@@ -45,6 +48,7 @@
4548
"[ \t]*$" # whitespace
4649
)
4750

51+
4852
# This is a normal base64 regex, modified to reflect that fact that we strip the
4953
# trailing = or == off
5054
base64_stripped = (
@@ -418,13 +422,17 @@ def from_incoming_data(cls, incoming_data):
418422
propagation_context = PropagationContext()
419423
propagation_context.update(sentrytrace_data)
420424

425+
if propagation_context is not None:
426+
propagation_context._fill_sample_rand()
427+
421428
return propagation_context
422429

423430
@property
424431
def trace_id(self):
425432
# type: () -> str
426433
"""The trace id of the Sentry trace."""
427434
if not self._trace_id:
435+
# New trace, don't fill in sample_rand
428436
self._trace_id = uuid.uuid4().hex
429437

430438
return self._trace_id
@@ -469,6 +477,60 @@ def __repr__(self):
469477
self.dynamic_sampling_context,
470478
)
471479

480+
def _fill_sample_rand(self):
481+
# type: () -> None
482+
"""
483+
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.
484+
485+
If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
486+
Otherwise, we generate a sample_rand value according to the following:
487+
488+
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
489+
a sample_rand value randomly in the range:
490+
- [0, sample_rate) if parent_sampled is True,
491+
- or, in the range [sample_rate, 1) if parent_sampled is False.
492+
493+
- If either parent_sampled or sample_rate is missing, we generate a random
494+
value in the range [0, 1).
495+
496+
The sample_rand is deterministically generated from the trace_id, if present.
497+
498+
This function does nothing if there is no dynamic_sampling_context.
499+
"""
500+
if self.dynamic_sampling_context is None:
501+
return
502+
503+
sample_rand = try_convert(
504+
Decimal, self.dynamic_sampling_context.get("sample_rand")
505+
)
506+
if sample_rand is not None and 0 <= sample_rand < 1:
507+
# sample_rand is present and valid, so don't overwrite it
508+
return
509+
510+
# Get the sample rate and compute the transformation that will map the random value
511+
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
512+
sample_rate = try_convert(
513+
float, self.dynamic_sampling_context.get("sample_rate")
514+
)
515+
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)
516+
517+
try:
518+
sample_rand = _generate_sample_rand(self.trace_id, interval=(lower, upper))
519+
except ValueError:
520+
# ValueError is raised if the interval is invalid, i.e. lower >= upper.
521+
# lower >= upper might happen if the incoming trace's sampled flag
522+
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
523+
# We cannot generate a sensible sample_rand value in this case.
524+
logger.debug(
525+
f"Could not backfill sample_rand, since parent_sampled={self.parent_sampled} "
526+
f"and sample_rate={sample_rate}."
527+
)
528+
return
529+
530+
self.dynamic_sampling_context["sample_rand"] = (
531+
f"{sample_rand:.6f}" # noqa: E231
532+
)
533+
472534

473535
class Baggage:
474536
"""
@@ -748,6 +810,49 @@ def get_current_span(scope=None):
748810
return current_span
749811

750812

813+
def _generate_sample_rand(
814+
trace_id, # type: Optional[str]
815+
*,
816+
interval=(0.0, 1.0), # type: tuple[float, float]
817+
):
818+
# type: (...) -> Decimal
819+
"""Generate a sample_rand value from a trace ID.
820+
821+
The generated value will be pseudorandomly chosen from the provided
822+
interval. Specifically, given (lower, upper) = interval, the generated
823+
value will be in the range [lower, upper). The value has 6-digit precision,
824+
so when printing with .6f, the value will never be rounded up.
825+
826+
The pseudorandom number generator is seeded with the trace ID.
827+
"""
828+
lower, upper = interval
829+
if not lower < upper: # using `if lower >= upper` would handle NaNs incorrectly
830+
raise ValueError("Invalid interval: lower must be less than upper")
831+
832+
rng = Random(trace_id)
833+
sample_rand = upper
834+
while sample_rand >= upper:
835+
sample_rand = rng.uniform(lower, upper)
836+
837+
# Round down to exactly six decimal-digit precision.
838+
return Decimal(sample_rand).quantize(Decimal("0.000001"), rounding=ROUND_DOWN)
839+
840+
841+
def _sample_rand_range(parent_sampled, sample_rate):
842+
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
843+
"""
844+
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
845+
that a generated sample_rand value must fall into, given the parent_sampled and
846+
sample_rate values.
847+
"""
848+
if parent_sampled is None or sample_rate is None:
849+
return 0.0, 1.0
850+
elif parent_sampled is True:
851+
return 0.0, sample_rate
852+
else: # parent_sampled is False
853+
return sample_rate, 1.0
854+
855+
751856
# Circular imports
752857
from sentry_sdk.tracing import (
753858
BAGGAGE_HEADER_NAME,

sentry_sdk/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,3 +1888,20 @@ def should_be_treated_as_error(ty, value):
18881888
return False
18891889

18901890
return True
1891+
1892+
1893+
if TYPE_CHECKING:
1894+
T = TypeVar("T")
1895+
1896+
1897+
def try_convert(convert_func, value):
1898+
# type: (Callable[[Any], T], Any) -> Optional[T]
1899+
"""
1900+
Attempt to convert from an unknown type to a specific type, using the
1901+
given function. Return None if the conversion fails, i.e. if the function
1902+
raises an exception.
1903+
"""
1904+
try:
1905+
return convert_func(value)
1906+
except Exception:
1907+
return None

tests/test_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_continue_trace(sentry_init):
111111
transaction = continue_trace(
112112
{
113113
"sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled),
114-
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19",
114+
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.123456",
115115
},
116116
name="some name",
117117
)
@@ -123,7 +123,8 @@ def test_continue_trace(sentry_init):
123123
assert propagation_context.parent_span_id == parent_span_id
124124
assert propagation_context.parent_sampled == parent_sampled
125125
assert propagation_context.dynamic_sampling_context == {
126-
"trace_id": "566e3688a61d4bc888951642d6f14a19"
126+
"trace_id": "566e3688a61d4bc888951642d6f14a19",
127+
"sample_rand": "0.123456",
127128
}
128129

129130

tests/test_propagationcontext.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
1+
from unittest import mock
2+
from unittest.mock import Mock
3+
4+
import pytest
5+
16
from sentry_sdk.tracing_utils import PropagationContext
27

38

9+
SAMPLED_FLAG = {
10+
None: "",
11+
False: "-0",
12+
True: "-1",
13+
}
14+
"""Maps the `sampled` value to the flag appended to the sentry-trace header."""
15+
16+
417
def test_empty_context():
518
ctx = PropagationContext()
619

@@ -51,13 +64,15 @@ def test_lazy_uuids():
5164

5265
def test_property_setters():
5366
ctx = PropagationContext()
67+
5468
ctx.trace_id = "X234567890abcdef1234567890abcdef"
5569
ctx.span_id = "X234567890abcdef"
5670

5771
assert ctx._trace_id == "X234567890abcdef1234567890abcdef"
5872
assert ctx.trace_id == "X234567890abcdef1234567890abcdef"
5973
assert ctx._span_id == "X234567890abcdef"
6074
assert ctx.span_id == "X234567890abcdef"
75+
assert ctx.dynamic_sampling_context is None
6176

6277

6378
def test_update():
@@ -81,3 +96,87 @@ def test_update():
8196
assert ctx.dynamic_sampling_context is None
8297

8398
assert not hasattr(ctx, "foo")
99+
100+
101+
def test_existing_sample_rand_kept():
102+
ctx = PropagationContext(
103+
trace_id="00000000000000000000000000000000",
104+
dynamic_sampling_context={"sample_rand": "0.5"},
105+
)
106+
107+
# If sample_rand was regenerated, the value would be 0.919221 based on the trace_id
108+
assert ctx.dynamic_sampling_context["sample_rand"] == "0.5"
109+
110+
111+
@pytest.mark.parametrize(
112+
("parent_sampled", "sample_rate", "expected_interval"),
113+
(
114+
# Note that parent_sampled and sample_rate do not scale the
115+
# sample_rand value, only determine the range of the value.
116+
# Expected values are determined by parent_sampled, sample_rate,
117+
# and the trace_id.
118+
(None, None, (0.0, 1.0)),
119+
(None, "0.5", (0.0, 1.0)),
120+
(False, None, (0.0, 1.0)),
121+
(True, None, (0.0, 1.0)),
122+
(False, "0.0", (0.0, 1.0)),
123+
(False, "0.01", (0.01, 1.0)),
124+
(True, "0.01", (0.0, 0.01)),
125+
(False, "0.1", (0.1, 1.0)),
126+
(True, "0.1", (0.0, 0.1)),
127+
(False, "0.5", (0.5, 1.0)),
128+
(True, "0.5", (0.0, 0.5)),
129+
(True, "1.0", (0.0, 1.0)),
130+
),
131+
)
132+
def test_sample_rand_filled(parent_sampled, sample_rate, expected_interval):
133+
"""When continuing a trace, we want to fill in the sample_rand value if it's missing."""
134+
if sample_rate is not None:
135+
sample_rate_str = f",sentry-sample_rate={sample_rate}" # noqa: E231
136+
else:
137+
sample_rate_str = ""
138+
139+
# for convenience, we'll just return the lower bound of the interval
140+
mock_uniform = mock.Mock(return_value=expected_interval[0])
141+
142+
def mock_random_class(seed):
143+
assert seed == "00000000000000000000000000000000", "seed should be the trace_id"
144+
rv = Mock()
145+
rv.uniform = mock_uniform
146+
return rv
147+
148+
with mock.patch("sentry_sdk.tracing_utils.Random", mock_random_class):
149+
ctx = PropagationContext().from_incoming_data(
150+
{
151+
"sentry-trace": f"00000000000000000000000000000000-0000000000000000{SAMPLED_FLAG[parent_sampled]}",
152+
# Placeholder is needed, since we only add sample_rand if sentry items are present in baggage
153+
"baggage": f"sentry-placeholder=asdf{sample_rate_str}",
154+
}
155+
)
156+
157+
assert (
158+
ctx.dynamic_sampling_context["sample_rand"]
159+
== f"{expected_interval[0]:.6f}" # noqa: E231
160+
)
161+
assert mock_uniform.call_count == 1
162+
assert mock_uniform.call_args[0] == expected_interval
163+
164+
165+
def test_sample_rand_rounds_down():
166+
# Mock value that should round down to 0.999_999
167+
mock_uniform = mock.Mock(return_value=0.999_999_9)
168+
169+
def mock_random_class(_):
170+
rv = Mock()
171+
rv.uniform = mock_uniform
172+
return rv
173+
174+
with mock.patch("sentry_sdk.tracing_utils.Random", mock_random_class):
175+
ctx = PropagationContext().from_incoming_data(
176+
{
177+
"sentry-trace": "00000000000000000000000000000000-0000000000000000",
178+
"baggage": "sentry-placeholder=asdf",
179+
}
180+
)
181+
182+
assert ctx.dynamic_sampling_context["sample_rand"] == "0.999999"

0 commit comments

Comments
 (0)