Skip to content

Commit fec92c5

Browse files
feat(tracing): Backfill missing sample_rand on PropagationContext
Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions. Ref #3998
1 parent b2fc801 commit fec92c5

File tree

3 files changed

+158
-2
lines changed

3 files changed

+158
-2
lines changed

sentry_sdk/tracing_utils.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from collections.abc import Mapping
77
from datetime import timedelta
88
from functools import wraps
9+
from random import Random
910
from urllib.parse import quote, unquote
1011
import uuid
1112

@@ -418,13 +419,17 @@ def from_incoming_data(cls, incoming_data):
418419
propagation_context = PropagationContext()
419420
propagation_context.update(sentrytrace_data)
420421

422+
if propagation_context is not None:
423+
propagation_context._fill_sample_rand()
424+
421425
return propagation_context
422426

423427
@property
424428
def trace_id(self):
425429
# type: () -> str
426430
"""The trace id of the Sentry trace."""
427431
if not self._trace_id:
432+
# New trace, don't fill in sample_rand
428433
self._trace_id = uuid.uuid4().hex
429434

430435
return self._trace_id
@@ -469,6 +474,48 @@ def __repr__(self):
469474
self.dynamic_sampling_context,
470475
)
471476

477+
def _fill_sample_rand(self):
478+
# type: () -> None
479+
"""
480+
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.
481+
482+
If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
483+
Otherwise, we generate a sample_rand value according to the following:
484+
485+
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
486+
a sample_rand value randomly in the range:
487+
- [0, sample_rate) if parent_sampled is True,
488+
- or, in the range [sample_rate, 1) if parent_sampled is False.
489+
490+
- If either parent_sampled or sample_rate is missing, we generate a random
491+
value in the range [0, 1).
492+
493+
The sample_rand is deterministically generated from the trace_id, if present.
494+
"""
495+
# Ensure that the dynamic_sampling_context is a dict
496+
self.dynamic_sampling_context = self.dynamic_sampling_context or {}
497+
498+
sample_rand = _try_float(self.dynamic_sampling_context.get("sample_rand"))
499+
if sample_rand is not None and 0 <= sample_rand < 1:
500+
# sample_rand is present and valid, so don't overwrite it
501+
return
502+
503+
# Get the sample rate and compute the transformation that will map the random value
504+
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
505+
sample_rate = _try_float(self.dynamic_sampling_context.get("sample_rate"))
506+
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)
507+
508+
try:
509+
self.dynamic_sampling_context["sample_rand"] = str(
510+
_GuaranteedRangeRandom(self.trace_id).uniform(lower, upper)
511+
)
512+
except ValueError:
513+
# lower >= upper in this case, indicating that the incoming trace had
514+
# a sample_rate that is inconsistent with the sampling decision (e.g.
515+
# sample_rate=0.0 but sampled=True). We cannot backfill a sensible
516+
# sample_rand value in this case.
517+
pass
518+
472519

473520
class Baggage:
474521
"""
@@ -748,6 +795,59 @@ def get_current_span(scope=None):
748795
return current_span
749796

750797

798+
def _try_float(value):
799+
# type: (Any) -> Optional[float]
800+
"""Small utility to convert a value to a float, if possible."""
801+
try:
802+
return float(value)
803+
except (ValueError, TypeError):
804+
return None
805+
806+
807+
def _sample_rand_range(parent_sampled, sample_rate):
808+
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
809+
"""
810+
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
811+
that a generated sample_rand value must fall into, given the parent_sampled and
812+
sample_rate values.
813+
"""
814+
if parent_sampled is None or sample_rate is None:
815+
return 0.0, 1.0
816+
elif parent_sampled is True:
817+
return 0.0, sample_rate
818+
else: # parent_sampled is False
819+
return sample_rate, 1.0
820+
821+
822+
class _GuaranteedRangeRandom:
823+
"""
824+
A random number generator with a uniform implementation that guarantees
825+
a return value in lower <= x < upper.
826+
"""
827+
828+
def __init__(self, seed):
829+
# type: (Optional[str]) -> None
830+
self._random = Random(seed)
831+
832+
def uniform(self, lower, upper):
833+
# type: (float, float) -> float
834+
"""
835+
Return a random number in the range lower <= x < upper.
836+
Raises an error if lower >= upper, as it would be impossible to generate
837+
a value in the range lower <= x < upper in such a case.
838+
"""
839+
if lower >= upper:
840+
raise ValueError("lower must be strictly less than upper")
841+
842+
rv = upper
843+
while rv == upper:
844+
# The built-in uniform() method can, in some cases, return the
845+
# upper bound. We request a new value until we get a different
846+
# value.
847+
rv = self._random.uniform(lower, upper)
848+
return rv
849+
850+
751851
# Circular imports
752852
from sentry_sdk.tracing import (
753853
BAGGAGE_HEADER_NAME,

tests/test_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_continue_trace(sentry_init):
111111
transaction = continue_trace(
112112
{
113113
"sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled),
114-
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19",
114+
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.1234567890",
115115
},
116116
name="some name",
117117
)
@@ -123,7 +123,8 @@ def test_continue_trace(sentry_init):
123123
assert propagation_context.parent_span_id == parent_span_id
124124
assert propagation_context.parent_sampled == parent_sampled
125125
assert propagation_context.dynamic_sampling_context == {
126-
"trace_id": "566e3688a61d4bc888951642d6f14a19"
126+
"trace_id": "566e3688a61d4bc888951642d6f14a19",
127+
"sample_rand": "0.1234567890",
127128
}
128129

129130

tests/test_propagationcontext.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
1+
import pytest
2+
13
from sentry_sdk.tracing_utils import PropagationContext
24

35

6+
SAMPLED_FLAG = {
7+
None: "",
8+
False: "-0",
9+
True: "-1",
10+
}
11+
"""Maps the `sampled` value to the flag appended to the sentry-trace header."""
12+
13+
414
def test_empty_context():
515
ctx = PropagationContext()
616

@@ -51,13 +61,15 @@ def test_lazy_uuids():
5161

5262
def test_property_setters():
5363
ctx = PropagationContext()
64+
5465
ctx.trace_id = "X234567890abcdef1234567890abcdef"
5566
ctx.span_id = "X234567890abcdef"
5667

5768
assert ctx._trace_id == "X234567890abcdef1234567890abcdef"
5869
assert ctx.trace_id == "X234567890abcdef1234567890abcdef"
5970
assert ctx._span_id == "X234567890abcdef"
6071
assert ctx.span_id == "X234567890abcdef"
72+
assert ctx.dynamic_sampling_context is None
6173

6274

6375
def test_update():
@@ -81,3 +93,46 @@ def test_update():
8193
assert ctx.dynamic_sampling_context is None
8294

8395
assert not hasattr(ctx, "foo")
96+
97+
98+
def test_existing_sample_rand_kept():
99+
ctx = PropagationContext(
100+
trace_id="00000000000000000000000000000000",
101+
dynamic_sampling_context={"sample_rand": "0.5"},
102+
)
103+
104+
# If sample_rand was regenerated, the value would be 0.8766381713144122 based on the trace_id
105+
assert ctx.dynamic_sampling_context["sample_rand"] == "0.5"
106+
107+
108+
@pytest.mark.parametrize(
109+
("parent_sampled", "sample_rate", "expected_sample_rand"),
110+
(
111+
(None, None, "0.8766381713144122"),
112+
(None, "0.5", "0.8766381713144122"),
113+
(False, None, "0.8766381713144122"),
114+
(True, None, "0.8766381713144122"),
115+
(False, "0.0", "0.8766381713144122"),
116+
(False, "0.01", "0.8778717896012681"),
117+
(True, "0.01", "0.008766381713144122"),
118+
(False, "0.1", "0.888974354182971"),
119+
(True, "0.1", "0.08766381713144122"),
120+
(False, "0.5", "0.9383190856572061"),
121+
(True, "0.5", "0.4383190856572061"),
122+
(True, "1.0", "0.8766381713144122"),
123+
),
124+
)
125+
def test_sample_rand_filled(parent_sampled, sample_rate, expected_sample_rand):
126+
"""When continuing a trace, we want to fill in the sample_rand value if it's missing."""
127+
dsc = {}
128+
if sample_rate is not None:
129+
dsc["sample_rate"] = sample_rate
130+
131+
ctx = PropagationContext().from_incoming_data(
132+
{
133+
"sentry-trace": f"00000000000000000000000000000000-0000000000000000{SAMPLED_FLAG[parent_sampled]}",
134+
"baggage": f"sentry-sample_rate={sample_rate}",
135+
}
136+
)
137+
138+
assert ctx.dynamic_sampling_context["sample_rand"] == expected_sample_rand

0 commit comments

Comments
 (0)