Skip to content

Commit 5f57723

Browse files
feat(tracing): Use sample_rand for sampling decisions
Use the `sample_rand` value from an incoming trace to make sampling decisions, rather than generating a random value. When we are the head SDK starting a new trace, save our randomly-generated value as the `sample_rand`, and also change the random generation logic so that the `sample_rand` is computed deterministically based on the `trace_id`. Closes #3998
1 parent 7ec8e08 commit 5f57723

File tree

11 files changed

+154
-50
lines changed

11 files changed

+154
-50
lines changed

sentry_sdk/tracing.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import uuid
2-
import random
32
import warnings
43
from datetime import datetime, timedelta, timezone
4+
from random import Random
55

66
import sentry_sdk
77
from sentry_sdk.consts import INSTRUMENTER, SPANSTATUS, SPANDATA
@@ -774,6 +774,7 @@ class Transaction(Span):
774774
"_contexts",
775775
"_profile",
776776
"_baggage",
777+
"_sample_rand",
777778
)
778779

779780
def __init__( # type: ignore[misc]
@@ -799,6 +800,14 @@ def __init__( # type: ignore[misc]
799800
) # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile]
800801
self._baggage = baggage
801802

803+
baggage_sample_rand = (
804+
None if self._baggage is None else self._baggage._sample_rand()
805+
)
806+
if baggage_sample_rand is not None:
807+
self._sample_rand = baggage_sample_rand
808+
else:
809+
self._sample_rand = Random(self.trace_id).random()
810+
802811
def __repr__(self):
803812
# type: () -> str
804813
return (
@@ -1167,10 +1176,10 @@ def _set_initial_sampling_decision(self, sampling_context):
11671176
self.sampled = False
11681177
return
11691178

1170-
# Now we roll the dice. random.random is inclusive of 0, but not of 1,
1179+
# Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
11711180
# so strict < is safe here. In case sample_rate is a boolean, cast it
11721181
# to a float (True becomes 1.0 and False becomes 0.0)
1173-
self.sampled = random.random() < self.sample_rate
1182+
self.sampled = self._sample_rand < self.sample_rate
11741183

11751184
if self.sampled:
11761185
logger.debug(

sentry_sdk/tracing_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ def populate_from_transaction(cls, transaction):
630630
options = client.options or {}
631631

632632
sentry_items["trace_id"] = transaction.trace_id
633+
sentry_items["sample_rand"] = str(transaction._sample_rand)
633634

634635
if options.get("environment"):
635636
sentry_items["environment"] = options["environment"]
@@ -702,6 +703,20 @@ def strip_sentry_baggage(header):
702703
)
703704
)
704705

706+
def _sample_rand(self):
707+
# type: () -> Optional[float]
708+
"""Convenience method to get the sample_rand value from the sentry_items.
709+
710+
We validate the value and parse it as a float before returning it. The value is considered
711+
valid if it is a float in the range [0, 1).
712+
"""
713+
sample_rand = _try_float(self.sentry_items.get("sample_rand"))
714+
715+
if sample_rand is not None and 0 <= sample_rand < 1:
716+
return sample_rand
717+
718+
return None
719+
705720
def __repr__(self):
706721
# type: () -> str
707722
return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'

tests/integrations/aiohttp/test_aiohttp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ async def handler(request):
636636

637637
assert (
638638
resp.request_info.headers["baggage"]
639-
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
639+
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.3015579701611357,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
640640
)
641641

642642

tests/integrations/celery/test_celery.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,8 @@ def test_baggage_propagation(init_celery):
511511
def dummy_task(self, x, y):
512512
return _get_headers(self)
513513

514-
with start_transaction() as transaction:
514+
# force trace_id for predictable sample_rand
515+
with start_transaction(trace_id="00000000000000000000000000000000"):
515516
result = dummy_task.apply_async(
516517
args=(1, 0),
517518
headers={"baggage": "custom=value"},
@@ -520,8 +521,9 @@ def dummy_task(self, x, y):
520521
assert sorted(result["baggage"].split(",")) == sorted(
521522
[
522523
"sentry-release=abcdef",
523-
"sentry-trace_id={}".format(transaction.trace_id),
524+
"sentry-trace_id=00000000000000000000000000000000",
524525
"sentry-environment=production",
526+
"sentry-sample_rand=0.8766381713144122",
525527
"sentry-sample_rate=1.0",
526528
"sentry-sampled=true",
527529
"custom=value",

tests/integrations/httpx/test_httpx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_outgoing_trace_headers_append_to_baggage(
192192
)
193193
assert (
194194
response.request.headers["baggage"]
195-
== "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
195+
== "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-sample_rand=0.07190396862619497,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
196196
)
197197

198198

tests/integrations/stdlib/test_httplib.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import random
21
from http.client import HTTPConnection, HTTPSConnection
32
from socket import SocketIO
43
from urllib.error import HTTPError
@@ -189,7 +188,7 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
189188
headers["baggage"] = (
190189
"other-vendor-value-1=foo;bar;baz, sentry-trace_id=771a43a4192642f0b136d5159a501700, "
191190
"sentry-public_key=49d0f7386ad645858ae85020e393bef3, sentry-sample_rate=0.01337, "
192-
"sentry-user_id=Am%C3%A9lie, other-vendor-value-2=foo;bar;"
191+
"sentry-user_id=Am%C3%A9lie, sentry-sample_rand=0.132521102938283, other-vendor-value-2=foo;bar;"
193192
)
194193

195194
transaction = Transaction.continue_from_headers(headers)
@@ -221,7 +220,8 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
221220
"sentry-trace_id=771a43a4192642f0b136d5159a501700,"
222221
"sentry-public_key=49d0f7386ad645858ae85020e393bef3,"
223222
"sentry-sample_rate=0.01337,"
224-
"sentry-user_id=Am%C3%A9lie"
223+
"sentry-user_id=Am%C3%A9lie,"
224+
"sentry-sample_rand=0.132521102938283"
225225
)
226226

227227
assert request_headers["baggage"] == expected_outgoing_baggage
@@ -234,11 +234,12 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
234234
mock_send = mock.Mock()
235235
monkeypatch.setattr(HTTPSConnection, "send", mock_send)
236236

237-
# make sure transaction is always sampled
238-
monkeypatch.setattr(random, "random", lambda: 0.1)
239-
240237
sentry_init(traces_sample_rate=0.5, release="foo")
241-
transaction = Transaction.continue_from_headers({})
238+
239+
# forced trace_id results in sample_rand=0.27862410307482766, so sampled=True
240+
transaction = Transaction.continue_from_headers(
241+
{}, trace_id="22222222222222222222222222222222"
242+
)
242243

243244
with start_transaction(transaction=transaction, name="Head SDK tx") as transaction:
244245
HTTPSConnection("www.squirrelchasers.com").request("GET", "/top-chasers")
@@ -259,12 +260,13 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
259260
assert request_headers["sentry-trace"] == expected_sentry_trace
260261

261262
expected_outgoing_baggage = (
262-
"sentry-trace_id=%s,"
263+
"sentry-trace_id=22222222222222222222222222222222,"
264+
"sentry-sample_rand=0.27862410307482766,"
263265
"sentry-environment=production,"
264266
"sentry-release=foo,"
265267
"sentry-sample_rate=0.5,"
266268
"sentry-sampled=%s"
267-
) % (transaction.trace_id, "true" if transaction.sampled else "false")
269+
) % ("true" if transaction.sampled else "false")
268270

269271
assert request_headers["baggage"] == expected_outgoing_baggage
270272

tests/test_api.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import pytest
2+
3+
import re
24
from unittest import mock
35

46
import sentry_sdk
@@ -95,10 +97,10 @@ def test_baggage_with_tracing_disabled(sentry_init):
9597
def test_baggage_with_tracing_enabled(sentry_init):
9698
sentry_init(traces_sample_rate=1.0, release="1.0.0", environment="dev")
9799
with start_transaction() as transaction:
98-
expected_baggage = "sentry-trace_id={},sentry-environment=dev,sentry-release=1.0.0,sentry-sample_rate=1.0,sentry-sampled={}".format(
100+
expected_baggage_re = r"^sentry-trace_id={},sentry-sample_rand=0\.\d+,sentry-environment=dev,sentry-release=1\.0\.0,sentry-sample_rate=1\.0,sentry-sampled={}$".format(
99101
transaction.trace_id, "true" if transaction.sampled else "false"
100102
)
101-
assert get_baggage() == expected_baggage
103+
assert re.match(expected_baggage_re, get_baggage())
102104

103105

104106
@pytest.mark.forked

tests/test_monitor.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import random
21
from collections import Counter
32
from unittest import mock
43

@@ -68,15 +67,15 @@ def test_transaction_uses_downsampled_rate(
6867
monitor = sentry_sdk.get_client().monitor
6968
monitor.interval = 0.1
7069

71-
# make sure rng doesn't sample
72-
monkeypatch.setattr(random, "random", lambda: 0.9)
73-
7470
assert monitor.is_healthy() is True
7571
monitor.run()
7672
assert monitor.is_healthy() is False
7773
assert monitor.downsample_factor == 1
7874

79-
with sentry_sdk.start_transaction(name="foobar") as transaction:
75+
# trace_id forces sample_rand == 0.8766381713144122 >= 0.5, so sampled is False
76+
with sentry_sdk.start_transaction(
77+
name="foobar", trace_id="00000000000000000000000000000000"
78+
) as transaction:
8079
assert transaction.sampled is False
8180
assert transaction.sample_rate == 0.5
8281

tests/tracing/test_integration_tests.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import gc
2-
import random
32
import re
43
import sys
54
import weakref
@@ -161,10 +160,11 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
161160
sentry_init(traces_sample_rate=sample_rate, release="foo")
162161
envelopes = capture_envelopes()
163162

164-
# make sure transaction is sampled for both cases
165-
monkeypatch.setattr(random, "random", lambda: 0.1)
166-
167-
transaction = Transaction.continue_from_headers({}, name="Head SDK tx")
163+
# force trace_id such that sample_rand is 0.27862410307482766, so transaction
164+
# is sampled in both cases
165+
transaction = Transaction.continue_from_headers(
166+
{}, name="Head SDK tx", trace_id="22222222222222222222222222222222"
167+
)
168168

169169
# will create empty mutable baggage
170170
baggage = transaction._baggage
@@ -184,36 +184,47 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
184184
assert baggage
185185
assert not baggage.mutable
186186
assert baggage.third_party_items == ""
187-
assert baggage.sentry_items == {
188-
"environment": "production",
189-
"release": "foo",
190-
"sample_rate": str(sample_rate),
191-
"sampled": "true" if transaction.sampled else "false",
192-
"transaction": "Head SDK tx",
193-
"trace_id": trace_id,
187+
assert baggage.sentry_items.keys() == {
188+
"environment",
189+
"release",
190+
"sample_rate",
191+
"sampled",
192+
"transaction",
193+
"trace_id",
194+
"sample_rand",
194195
}
196+
assert (
197+
baggage.sentry_items.items()
198+
>= {
199+
"environment": "production",
200+
"release": "foo",
201+
"sample_rate": str(sample_rate),
202+
"sampled": "true" if transaction.sampled else "false",
203+
"transaction": "Head SDK tx",
204+
"trace_id": trace_id,
205+
}.items()
206+
)
207+
assert 0.0 <= float(baggage.sentry_items["sample_rand"]) < 1.0
195208

196209
expected_baggage = (
197210
"sentry-trace_id=%s,"
211+
"sentry-sample_rand=%s,"
198212
"sentry-environment=production,"
199213
"sentry-release=foo,"
200214
"sentry-transaction=Head%%20SDK%%20tx,"
201215
"sentry-sample_rate=%s,"
202216
"sentry-sampled=%s"
203-
% (trace_id, sample_rate, "true" if transaction.sampled else "false")
217+
% (
218+
trace_id,
219+
baggage.sentry_items["sample_rand"],
220+
sample_rate,
221+
"true" if transaction.sampled else "false",
222+
)
204223
)
205224
assert baggage.serialize() == expected_baggage
206225

207226
(envelope,) = envelopes
208227
assert envelope.headers["trace"] == baggage.dynamic_sampling_context()
209-
assert envelope.headers["trace"] == {
210-
"environment": "production",
211-
"release": "foo",
212-
"sample_rate": str(sample_rate),
213-
"sampled": "true" if transaction.sampled else "false",
214-
"transaction": "Head SDK tx",
215-
"trace_id": trace_id,
216-
}
217228

218229

219230
@pytest.mark.parametrize(

tests/tracing/test_sample_rand.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pytest
2+
3+
import sentry_sdk
4+
from sentry_sdk.tracing_utils import Baggage
5+
6+
TEST_TRACE_ID_SAMPLE_RANDS = {
7+
"00000000000000000000000000000000": 0.8766381713144122,
8+
"01234567012345670123456701234567": 0.6451742521664413,
9+
"0123456789abcdef0123456789abcdef": 0.9338861957669223,
10+
}
11+
"""
12+
A dictionary of some trace IDs used in the tests, and their precomputed sample_rand values.
13+
14+
sample_rand values are pseudo-random numbers, deterministically generated from the trace ID.
15+
"""
16+
17+
18+
@pytest.mark.parametrize(
19+
("trace_id", "expected_sample_rand"),
20+
TEST_TRACE_ID_SAMPLE_RANDS.items(),
21+
)
22+
# test 21 linearly spaced sample_rate values from 0.0 to 1.0, inclusive
23+
@pytest.mark.parametrize("sample_rate", (i / 20 for i in range(21)))
24+
def test_deterministic_sampled(
25+
sentry_init, capture_events, sample_rate, trace_id, expected_sample_rand
26+
):
27+
"""
28+
Test that the sample_rand value is deterministic based on the trace ID, and
29+
that it is used to determine the sampling decision. Also, ensure that the
30+
transaction's baggage contains the sample_rand value.
31+
"""
32+
sentry_init(traces_sample_rate=sample_rate)
33+
events = capture_events()
34+
35+
with sentry_sdk.start_transaction(trace_id=trace_id) as transaction:
36+
assert transaction.get_baggage().sentry_items["sample_rand"] == str(
37+
expected_sample_rand
38+
)
39+
40+
# Transaction event captured if sample_rand < sample_rate, indicating that
41+
# sample_rand is used to make the sampling decision.
42+
assert len(events) == int(expected_sample_rand < sample_rate)
43+
44+
45+
@pytest.mark.parametrize("sample_rand", (0.0, 0.2, 0.4, 0.6, 0.8))
46+
@pytest.mark.parametrize("sample_rate", (0.0, 0.2, 0.4, 0.6, 0.8, 1.0))
47+
def test_transaction_uses_incoming_sample_rand(
48+
sentry_init, capture_events, sample_rate, sample_rand
49+
):
50+
"""
51+
Test that the transaction uses the sample_rand value from the incoming baggage.
52+
"""
53+
baggage = Baggage(sentry_items={"sample_rand": str(sample_rand)})
54+
55+
sentry_init(traces_sample_rate=sample_rate)
56+
events = capture_events()
57+
58+
with sentry_sdk.start_transaction(baggage=baggage) as transaction:
59+
assert transaction.get_baggage().sentry_items["sample_rand"] == str(sample_rand)
60+
61+
# Transaction event captured if sample_rand < sample_rate, indicating that
62+
# sample_rand is used to make the sampling decision.
63+
assert len(events) == int(sample_rand < sample_rate)

tests/tracing/test_sampling.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import sentry_sdk
88
from sentry_sdk import start_span, start_transaction, capture_exception
99
from sentry_sdk.tracing import Transaction
10+
from sentry_sdk.tracing_utils import Baggage
1011
from sentry_sdk.utils import logger
1112

1213

@@ -73,9 +74,9 @@ def test_uses_traces_sample_rate_correctly(
7374
):
7475
sentry_init(traces_sample_rate=traces_sample_rate)
7576

76-
with mock.patch.object(random, "random", return_value=0.5):
77-
transaction = start_transaction(name="dogpark")
78-
assert transaction.sampled is expected_decision
77+
baggage = Baggage(sentry_items={"sample_rand": 0.5})
78+
transaction = start_transaction(name="dogpark", baggage=baggage)
79+
assert transaction.sampled is expected_decision
7980

8081

8182
@pytest.mark.parametrize(
@@ -89,9 +90,9 @@ def test_uses_traces_sampler_return_value_correctly(
8990
):
9091
sentry_init(traces_sampler=mock.Mock(return_value=traces_sampler_return_value))
9192

92-
with mock.patch.object(random, "random", return_value=0.5):
93-
transaction = start_transaction(name="dogpark")
94-
assert transaction.sampled is expected_decision
93+
baggage = Baggage(sentry_items={"sample_rand": 0.5})
94+
transaction = start_transaction(name="dogpark", baggage=baggage)
95+
assert transaction.sampled is expected_decision
9596

9697

9798
@pytest.mark.parametrize("traces_sampler_return_value", [True, False])

0 commit comments

Comments
 (0)