Skip to content

Commit cb8e728

Browse files
feat(tracing): Backfill missing sample_rand on PropagationContext
Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions. Ref #3998
1 parent 189e4a9 commit cb8e728

File tree

4 files changed

+245
-4
lines changed

4 files changed

+245
-4
lines changed

sentry_sdk/tracing_utils.py

Lines changed: 167 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import contextlib
2+
from decimal import Decimal
23
import inspect
34
import os
45
import re
56
import sys
67
from collections.abc import Mapping
78
from datetime import timedelta
89
from functools import wraps
10+
from random import Random
911
from urllib.parse import quote, unquote
1012
import uuid
1113

@@ -19,6 +21,7 @@
1921
match_regex_list,
2022
qualname_from_function,
2123
to_string,
24+
try_decimal,
2225
is_sentry_url,
2326
_is_external_source,
2427
_is_in_project_root,
@@ -418,13 +421,17 @@ def from_incoming_data(cls, incoming_data):
418421
propagation_context = PropagationContext()
419422
propagation_context.update(sentrytrace_data)
420423

424+
if propagation_context is not None:
425+
propagation_context._fill_sample_rand()
426+
421427
return propagation_context
422428

423429
@property
424430
def trace_id(self):
425431
# type: () -> str
426432
"""The trace id of the Sentry trace."""
427433
if not self._trace_id:
434+
# New trace, don't fill in sample_rand
428435
self._trace_id = uuid.uuid4().hex
429436

430437
return self._trace_id
@@ -469,6 +476,54 @@ def __repr__(self):
469476
self.dynamic_sampling_context,
470477
)
471478

479+
def _fill_sample_rand(self):
480+
# type: () -> None
481+
"""
482+
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.
483+
484+
If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
485+
Otherwise, we generate a sample_rand value according to the following:
486+
487+
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
488+
a sample_rand value randomly in the range:
489+
- [0, sample_rate) if parent_sampled is True,
490+
- or, in the range [sample_rate, 1) if parent_sampled is False.
491+
492+
- If either parent_sampled or sample_rate is missing, we generate a random
493+
value in the range [0, 1).
494+
495+
The sample_rand is deterministically generated from the trace_id, if present.
496+
497+
This function does nothing if there is no dynamic_sampling_context.
498+
"""
499+
if self.dynamic_sampling_context is None:
500+
return
501+
502+
sample_rand = SampleRandValue.try_from_incoming(
503+
self.dynamic_sampling_context.get("sample_rand")
504+
)
505+
if sample_rand is not None and 0 <= sample_rand.inner() < 1:
506+
# sample_rand is present and valid, so don't overwrite it
507+
return
508+
509+
# Get the sample rate and compute the transformation that will map the random value
510+
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
511+
sample_rate = try_decimal(self.dynamic_sampling_context.get("sample_rate"))
512+
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)
513+
514+
try:
515+
sample_rand = SampleRandValue.generate(
516+
self.trace_id, interval=(lower, upper)
517+
)
518+
except ValueError:
519+
# ValueError is raised if the interval is invalid, i.e. lower >= upper.
520+
# lower >= upper might happen if the incoming trace's sampled flag
521+
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
522+
# We cannot generate a sensible sample_rand value in this case.
523+
return
524+
525+
self.dynamic_sampling_context["sample_rand"] = str(sample_rand)
526+
472527

473528
class Baggage:
474529
"""
@@ -643,9 +698,105 @@ def __repr__(self):
643698
return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'
644699

645700

701+
class SampleRandValue:
702+
"""
703+
Lightweight wrapper around a Decimal value, with utilities for
704+
generating a sample rand value from a trace ID, parsing incoming
705+
sample_rand values, and for consistent serialization to a string.
706+
707+
SampleRandValue instances are immutable.
708+
"""
709+
710+
DECIMAL_0 = Decimal(0)
711+
DECIMAL_1 = Decimal(1)
712+
713+
PRECISION = 6
714+
"""We use this many decimal places for the sample_rand value.
715+
716+
If this value ever needs to be changed, also update the formatting
717+
in the __str__ method.
718+
"""
719+
720+
def __init__(self, value):
721+
# type: (Decimal) -> None
722+
"""
723+
Initialize SampleRandValue from a Decimal value. This constructor
724+
should only be called internally by the SampleRandValue class.
725+
"""
726+
self._value = value
727+
728+
@classmethod
729+
def try_from_incoming(cls, incoming_value):
730+
# type: (Optional[str]) -> Optional[SampleRandValue]
731+
"""
732+
Attempt to parse an incoming sample_rand value from a string.
733+
734+
Returns None if the incoming value is None or cannot be parsed as a Decimal.
735+
"""
736+
value = try_decimal(incoming_value)
737+
if value is not None and cls.DECIMAL_0 <= value < cls.DECIMAL_1:
738+
return cls(value)
739+
740+
return None
741+
742+
@classmethod
743+
def generate(
744+
cls,
745+
trace_id, # type: Optional[str]
746+
*,
747+
interval=(DECIMAL_0, DECIMAL_1), # type: tuple[Decimal, Decimal]
748+
):
749+
# type: (...) -> SampleRandValue
750+
"""Generate a sample_rand value from a trace ID.
751+
752+
The generated value will be pseudorandomly chosen from the provided
753+
interval. Specifically, given (lower, upper) = interval, the generated
754+
value will be in the range [lower, upper).
755+
756+
The pseudorandom number generator is seeded with the trace ID.
757+
"""
758+
lower_decimal, upper_decimal = interval
759+
if not lower_decimal < upper_decimal:
760+
raise ValueError("Invalid interval: lower must be less than upper")
761+
762+
# Since sample_rand values have 6-digit precision, we generate the
763+
# value as an integer in the range [lower_decimal * 10**6, upper_decimal * 10**6),
764+
# and then scale it to the desired range.
765+
lower_int = int(lower_decimal.scaleb(cls.PRECISION))
766+
upper_int = int(upper_decimal.scaleb(cls.PRECISION))
767+
768+
if lower_int == upper_int:
769+
# Edge case: lower_decimal < upper_decimal, but due to rounding,
770+
# lower_int == upper_int. In this case, we return
771+
# lower_int.scaleb(-SCALE_EXPONENT) here, since calling randrange()
772+
# with the same lower and upper bounds will raise an error.
773+
return cls(Decimal(lower_int).scaleb(-cls.PRECISION))
774+
775+
value = Random(trace_id).randrange(lower_int, upper_int)
776+
return cls(Decimal(value).scaleb(-cls.PRECISION))
777+
778+
def inner(self):
779+
# type: () -> Decimal
780+
"""
781+
Return the inner Decimal value.
782+
"""
783+
return self._value
784+
785+
def __str__(self):
786+
# type: () -> str
787+
"""
788+
Return a string representation of the SampleRandValue.
789+
790+
The string representation has 6 decimal places.
791+
"""
792+
# Lint E231 is a false-positive here. If we add a space after the :,
793+
# then the formatter puts an extra space before the decimal numbers.
794+
return f"{self._value:.6f}" # noqa: E231
795+
796+
646797
def should_propagate_trace(client, url):
647798
# type: (sentry_sdk.client.BaseClient, str) -> bool
648-
"""
799+
"""u
649800
Returns True if url matches trace_propagation_targets configured in the given client. Otherwise, returns False.
650801
"""
651802
trace_propagation_targets = client.options["trace_propagation_targets"]
@@ -748,6 +899,21 @@ def get_current_span(scope=None):
748899
return current_span
749900

750901

902+
def _sample_rand_range(parent_sampled, sample_rate):
903+
# type: (Optional[bool], Optional[Decimal]) -> tuple[Decimal, Decimal]
904+
"""
905+
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
906+
that a generated sample_rand value must fall into, given the parent_sampled and
907+
sample_rate values.
908+
"""
909+
if parent_sampled is None or sample_rate is None:
910+
return Decimal(0), Decimal(1)
911+
elif parent_sampled is True:
912+
return Decimal(0), sample_rate
913+
else: # parent_sampled is False
914+
return sample_rate, Decimal(1)
915+
916+
751917
# Circular imports
752918
from sentry_sdk.tracing import (
753919
BAGGAGE_HEADER_NAME,

sentry_sdk/utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import time
1313
from collections import namedtuple
1414
from datetime import datetime, timezone
15-
from decimal import Decimal
15+
from decimal import Decimal, InvalidOperation
1616
from functools import partial, partialmethod, wraps
1717
from numbers import Real
1818
from urllib.parse import parse_qs, unquote, urlencode, urlsplit, urlunsplit
@@ -1888,3 +1888,18 @@ def should_be_treated_as_error(ty, value):
18881888
return False
18891889

18901890
return True
1891+
1892+
1893+
def try_decimal(value):
1894+
# type: (Optional[str]) -> Optional[Decimal]
1895+
"""Small utility which attempts to convert an Optional[str] to a Decimal.
1896+
1897+
Returns None if the value is None or if the value cannot be parsed as a Decimal.
1898+
"""
1899+
if value is None:
1900+
return None
1901+
1902+
try:
1903+
return Decimal(value)
1904+
except InvalidOperation:
1905+
return None

tests/test_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_continue_trace(sentry_init):
111111
transaction = continue_trace(
112112
{
113113
"sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled),
114-
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19",
114+
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.123456",
115115
},
116116
name="some name",
117117
)
@@ -123,7 +123,8 @@ def test_continue_trace(sentry_init):
123123
assert propagation_context.parent_span_id == parent_span_id
124124
assert propagation_context.parent_sampled == parent_sampled
125125
assert propagation_context.dynamic_sampling_context == {
126-
"trace_id": "566e3688a61d4bc888951642d6f14a19"
126+
"trace_id": "566e3688a61d4bc888951642d6f14a19",
127+
"sample_rand": "0.123456",
127128
}
128129

129130

tests/test_propagationcontext.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
1+
import pytest
2+
13
from sentry_sdk.tracing_utils import PropagationContext
24

35

6+
SAMPLED_FLAG = {
7+
None: "",
8+
False: "-0",
9+
True: "-1",
10+
}
11+
"""Maps the `sampled` value to the flag appended to the sentry-trace header."""
12+
13+
414
def test_empty_context():
515
ctx = PropagationContext()
616

@@ -51,13 +61,15 @@ def test_lazy_uuids():
5161

5262
def test_property_setters():
5363
ctx = PropagationContext()
64+
5465
ctx.trace_id = "X234567890abcdef1234567890abcdef"
5566
ctx.span_id = "X234567890abcdef"
5667

5768
assert ctx._trace_id == "X234567890abcdef1234567890abcdef"
5869
assert ctx.trace_id == "X234567890abcdef1234567890abcdef"
5970
assert ctx._span_id == "X234567890abcdef"
6071
assert ctx.span_id == "X234567890abcdef"
72+
assert ctx.dynamic_sampling_context is None
6173

6274

6375
def test_update():
@@ -81,3 +93,50 @@ def test_update():
8193
assert ctx.dynamic_sampling_context is None
8294

8395
assert not hasattr(ctx, "foo")
96+
97+
98+
def test_existing_sample_rand_kept():
99+
ctx = PropagationContext(
100+
trace_id="00000000000000000000000000000000",
101+
dynamic_sampling_context={"sample_rand": "0.5"},
102+
)
103+
104+
# If sample_rand was regenerated, the value would be 0.8766381713144122 based on the trace_id
105+
assert ctx.dynamic_sampling_context["sample_rand"] == "0.5"
106+
107+
108+
@pytest.mark.parametrize(
109+
("parent_sampled", "sample_rate", "expected_sample_rand"),
110+
(
111+
# Note that parent_sampled and sample_rate do not scale the
112+
# sample_rand value, only determine the range of the value.
113+
# Expected values are determined by parent_sampled, sample_rate,
114+
# and the trace_id.
115+
(None, None, "0.919221"),
116+
(None, "0.5", "0.919221"),
117+
(False, None, "0.919221"),
118+
(True, None, "0.919221"),
119+
(False, "0.0", "0.919221"),
120+
(False, "0.01", "0.929221"),
121+
(True, "0.01", "0.006073"),
122+
(False, "0.1", "0.762590"),
123+
(True, "0.1", "0.082823"),
124+
(False, "0.5", "0.959610"),
125+
(True, "0.5", "0.459610"),
126+
(True, "1.0", "0.919221"),
127+
),
128+
)
129+
def test_sample_rand_filled(parent_sampled, sample_rate, expected_sample_rand):
130+
"""When continuing a trace, we want to fill in the sample_rand value if it's missing."""
131+
dsc = {}
132+
if sample_rate is not None:
133+
dsc["sample_rate"] = sample_rate
134+
135+
ctx = PropagationContext().from_incoming_data(
136+
{
137+
"sentry-trace": f"00000000000000000000000000000000-0000000000000000{SAMPLED_FLAG[parent_sampled]}",
138+
"baggage": f"sentry-sample_rate={sample_rate}",
139+
}
140+
)
141+
142+
assert ctx.dynamic_sampling_context["sample_rand"] == expected_sample_rand

0 commit comments

Comments
 (0)