Skip to content

Commit 8d4d858

Browse files
feat(tracing): Use sample_rand for sampling decisions
Use the `sample_rand` value from an incoming trace to make sampling decisions, rather than generating a random value. When we are the head SDK starting a new trace, save our randomly-generated value as the `sample_rand`, and also change the random generation logic so that the `sample_rand` is computed deterministically based on the `trace_id`. Closes #3998
1 parent 61ae6e7 commit 8d4d858

File tree

11 files changed

+155
-50
lines changed

11 files changed

+155
-50
lines changed

sentry_sdk/tracing.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
from decimal import Decimal
12
import uuid
2-
import random
33
import warnings
44
from datetime import datetime, timedelta, timezone
55
from enum import Enum
@@ -785,6 +785,7 @@ class Transaction(Span):
785785
"_profile",
786786
"_continuous_profile",
787787
"_baggage",
788+
"_sample_rand",
788789
)
789790

790791
def __init__( # type: ignore[misc]
@@ -809,6 +810,14 @@ def __init__( # type: ignore[misc]
809810
self._continuous_profile = None # type: Optional[ContinuousProfile]
810811
self._baggage = baggage
811812

813+
baggage_sample_rand = (
814+
None if self._baggage is None else self._baggage._sample_rand()
815+
)
816+
if baggage_sample_rand is not None:
817+
self._sample_rand = SampleRandValue(baggage_sample_rand)
818+
else:
819+
self._sample_rand = SampleRandValue.generate(self.trace_id)
820+
812821
def __repr__(self):
813822
# type: () -> str
814823
return (
@@ -1179,10 +1188,10 @@ def _set_initial_sampling_decision(self, sampling_context):
11791188
self.sampled = False
11801189
return
11811190

1182-
# Now we roll the dice. random.random is inclusive of 0, but not of 1,
1191+
# Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
11831192
# so strict < is safe here. In case sample_rate is a boolean, cast it
11841193
# to a float (True becomes 1.0 and False becomes 0.0)
1185-
self.sampled = random.random() < self.sample_rate
1194+
self.sampled = self._sample_rand.inner() < self.sample_rate
11861195

11871196
if self.sampled:
11881197
logger.debug(
@@ -1338,6 +1347,7 @@ async def my_async_function():
13381347
from sentry_sdk.tracing_utils import (
13391348
Baggage,
13401349
EnvironHeaders,
1350+
SampleRandValue,
13411351
extract_sentrytrace_data,
13421352
has_tracing_enabled,
13431353
maybe_create_breadcrumbs_from_span,

sentry_sdk/tracing_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ def populate_from_transaction(cls, transaction):
645645
options = client.options or {}
646646

647647
sentry_items["trace_id"] = transaction.trace_id
648+
sentry_items["sample_rand"] = str(transaction._sample_rand)
648649

649650
if options.get("environment"):
650651
sentry_items["environment"] = options["environment"]
@@ -717,6 +718,20 @@ def strip_sentry_baggage(header):
717718
)
718719
)
719720

721+
def _sample_rand(self):
722+
# type: () -> Optional[Decimal]
723+
"""Convenience method to get the sample_rand value from the sentry_items.
724+
725+
We validate the value and parse it as a Decimal before returning it. The value is considered
726+
valid if it is a Decimal in the range [0, 1).
727+
"""
728+
sample_rand = try_decimal(self.sentry_items.get("sample_rand"))
729+
730+
if sample_rand is not None and Decimal(0) <= sample_rand < Decimal(1):
731+
return sample_rand
732+
733+
return None
734+
720735
def __repr__(self):
721736
# type: () -> str
722737
return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'

tests/integrations/aiohttp/test_aiohttp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ async def handler(request):
636636

637637
assert (
638638
resp.request_info.headers["baggage"]
639-
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
639+
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.3015579701611357,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
640640
)
641641

642642

tests/integrations/celery/test_celery.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,8 @@ def test_baggage_propagation(init_celery):
509509
def dummy_task(self, x, y):
510510
return _get_headers(self)
511511

512-
with start_transaction() as transaction:
512+
# force trace_id for predictable sample_rand
513+
with start_transaction(trace_id="00000000000000000000000000000000"):
513514
result = dummy_task.apply_async(
514515
args=(1, 0),
515516
headers={"baggage": "custom=value"},
@@ -518,8 +519,9 @@ def dummy_task(self, x, y):
518519
assert sorted(result["baggage"].split(",")) == sorted(
519520
[
520521
"sentry-release=abcdef",
521-
"sentry-trace_id={}".format(transaction.trace_id),
522+
"sentry-trace_id=00000000000000000000000000000000",
522523
"sentry-environment=production",
524+
"sentry-sample_rand=0.8766381713144122",
523525
"sentry-sample_rate=1.0",
524526
"sentry-sampled=true",
525527
"custom=value",

tests/integrations/httpx/test_httpx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_outgoing_trace_headers_append_to_baggage(
192192
)
193193
assert (
194194
response.request.headers["baggage"]
195-
== "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
195+
== "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-sample_rand=0.07190396862619497,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
196196
)
197197

198198

tests/integrations/stdlib/test_httplib.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import random
21
from http.client import HTTPConnection, HTTPSConnection
32
from socket import SocketIO
43
from urllib.error import HTTPError
@@ -189,7 +188,7 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
189188
"baggage": (
190189
"other-vendor-value-1=foo;bar;baz, sentry-trace_id=771a43a4192642f0b136d5159a501700, "
191190
"sentry-public_key=49d0f7386ad645858ae85020e393bef3, sentry-sample_rate=0.01337, "
192-
"sentry-user_id=Am%C3%A9lie, other-vendor-value-2=foo;bar;"
191+
"sentry-user_id=Am%C3%A9lie, sentry-sample_rand=0.132521102938283, other-vendor-value-2=foo;bar;"
193192
),
194193
}
195194

@@ -222,7 +221,8 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
222221
"sentry-trace_id=771a43a4192642f0b136d5159a501700,"
223222
"sentry-public_key=49d0f7386ad645858ae85020e393bef3,"
224223
"sentry-sample_rate=1.0,"
225-
"sentry-user_id=Am%C3%A9lie"
224+
"sentry-user_id=Am%C3%A9lie,"
225+
"sentry-sample_rand=0.132521102938283"
226226
)
227227

228228
assert request_headers["baggage"] == expected_outgoing_baggage
@@ -235,11 +235,12 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
235235
mock_send = mock.Mock()
236236
monkeypatch.setattr(HTTPSConnection, "send", mock_send)
237237

238-
# make sure transaction is always sampled
239-
monkeypatch.setattr(random, "random", lambda: 0.1)
240-
241238
sentry_init(traces_sample_rate=0.5, release="foo")
242-
transaction = Transaction.continue_from_headers({})
239+
240+
# forced trace_id results in sample_rand=0.27862410307482766, so sampled=True
241+
transaction = Transaction.continue_from_headers(
242+
{}, trace_id="22222222222222222222222222222222"
243+
)
243244

244245
with start_transaction(transaction=transaction, name="Head SDK tx") as transaction:
245246
HTTPSConnection("www.squirrelchasers.com").request("GET", "/top-chasers")
@@ -260,12 +261,13 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
260261
assert request_headers["sentry-trace"] == expected_sentry_trace
261262

262263
expected_outgoing_baggage = (
263-
"sentry-trace_id=%s,"
264+
"sentry-trace_id=22222222222222222222222222222222,"
265+
"sentry-sample_rand=0.27862410307482766,"
264266
"sentry-environment=production,"
265267
"sentry-release=foo,"
266268
"sentry-sample_rate=0.5,"
267269
"sentry-sampled=%s"
268-
) % (transaction.trace_id, "true" if transaction.sampled else "false")
270+
) % ("true" if transaction.sampled else "false")
269271

270272
assert request_headers["baggage"] == expected_outgoing_baggage
271273

tests/test_api.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import pytest
2+
3+
import re
24
from unittest import mock
35

46
import sentry_sdk
@@ -95,10 +97,10 @@ def test_baggage_with_tracing_disabled(sentry_init):
9597
def test_baggage_with_tracing_enabled(sentry_init):
9698
sentry_init(traces_sample_rate=1.0, release="1.0.0", environment="dev")
9799
with start_transaction() as transaction:
98-
expected_baggage = "sentry-trace_id={},sentry-environment=dev,sentry-release=1.0.0,sentry-sample_rate=1.0,sentry-sampled={}".format(
100+
expected_baggage_re = r"^sentry-trace_id={},sentry-sample_rand=0\.\d+,sentry-environment=dev,sentry-release=1\.0\.0,sentry-sample_rate=1\.0,sentry-sampled={}$".format(
99101
transaction.trace_id, "true" if transaction.sampled else "false"
100102
)
101-
assert get_baggage() == expected_baggage
103+
assert re.match(expected_baggage_re, get_baggage())
102104

103105

104106
@pytest.mark.forked

tests/test_monitor.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import random
21
from collections import Counter
32
from unittest import mock
43

@@ -68,15 +67,15 @@ def test_transaction_uses_downsampled_rate(
6867
monitor = sentry_sdk.get_client().monitor
6968
monitor.interval = 0.1
7069

71-
# make sure rng doesn't sample
72-
monkeypatch.setattr(random, "random", lambda: 0.9)
73-
7470
assert monitor.is_healthy() is True
7571
monitor.run()
7672
assert monitor.is_healthy() is False
7773
assert monitor.downsample_factor == 1
7874

79-
with sentry_sdk.start_transaction(name="foobar") as transaction:
75+
# trace_id forces sample_rand == 0.8766381713144122 >= 0.5, so sampled is False
76+
with sentry_sdk.start_transaction(
77+
name="foobar", trace_id="00000000000000000000000000000000"
78+
) as transaction:
8079
assert transaction.sampled is False
8180
assert transaction.sample_rate == 0.5
8281

tests/tracing/test_integration_tests.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import gc
2-
import random
32
import re
43
import sys
54
import weakref
@@ -168,10 +167,11 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
168167
sentry_init(traces_sample_rate=sample_rate, release="foo")
169168
envelopes = capture_envelopes()
170169

171-
# make sure transaction is sampled for both cases
172-
monkeypatch.setattr(random, "random", lambda: 0.1)
173-
174-
transaction = Transaction.continue_from_headers({}, name="Head SDK tx")
170+
# force trace_id such that sample_rand is 0.27862410307482766, so transaction
171+
# is sampled in both cases
172+
transaction = Transaction.continue_from_headers(
173+
{}, name="Head SDK tx", trace_id="22222222222222222222222222222222"
174+
)
175175

176176
# will create empty mutable baggage
177177
baggage = transaction._baggage
@@ -191,36 +191,47 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
191191
assert baggage
192192
assert not baggage.mutable
193193
assert baggage.third_party_items == ""
194-
assert baggage.sentry_items == {
195-
"environment": "production",
196-
"release": "foo",
197-
"sample_rate": str(sample_rate),
198-
"sampled": "true" if transaction.sampled else "false",
199-
"transaction": "Head SDK tx",
200-
"trace_id": trace_id,
194+
assert baggage.sentry_items.keys() == {
195+
"environment",
196+
"release",
197+
"sample_rate",
198+
"sampled",
199+
"transaction",
200+
"trace_id",
201+
"sample_rand",
201202
}
203+
assert (
204+
baggage.sentry_items.items()
205+
>= {
206+
"environment": "production",
207+
"release": "foo",
208+
"sample_rate": str(sample_rate),
209+
"sampled": "true" if transaction.sampled else "false",
210+
"transaction": "Head SDK tx",
211+
"trace_id": trace_id,
212+
}.items()
213+
)
214+
assert 0.0 <= float(baggage.sentry_items["sample_rand"]) < 1.0
202215

203216
expected_baggage = (
204217
"sentry-trace_id=%s,"
218+
"sentry-sample_rand=%s,"
205219
"sentry-environment=production,"
206220
"sentry-release=foo,"
207221
"sentry-transaction=Head%%20SDK%%20tx,"
208222
"sentry-sample_rate=%s,"
209223
"sentry-sampled=%s"
210-
% (trace_id, sample_rate, "true" if transaction.sampled else "false")
224+
% (
225+
trace_id,
226+
baggage.sentry_items["sample_rand"],
227+
sample_rate,
228+
"true" if transaction.sampled else "false",
229+
)
211230
)
212231
assert baggage.serialize() == expected_baggage
213232

214233
(envelope,) = envelopes
215234
assert envelope.headers["trace"] == baggage.dynamic_sampling_context()
216-
assert envelope.headers["trace"] == {
217-
"environment": "production",
218-
"release": "foo",
219-
"sample_rate": str(sample_rate),
220-
"sampled": "true" if transaction.sampled else "false",
221-
"transaction": "Head SDK tx",
222-
"trace_id": trace_id,
223-
}
224235

225236

226237
@pytest.mark.parametrize(

tests/tracing/test_sample_rand.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pytest
2+
3+
import sentry_sdk
4+
from sentry_sdk.tracing_utils import Baggage
5+
6+
TEST_TRACE_ID_SAMPLE_RANDS = {
7+
"00000000000000000000000000000000": 0.8766381713144122,
8+
"01234567012345670123456701234567": 0.6451742521664413,
9+
"0123456789abcdef0123456789abcdef": 0.9338861957669223,
10+
}
11+
"""
12+
A dictionary of some trace IDs used in the tests, and their precomputed sample_rand values.
13+
14+
sample_rand values are pseudo-random numbers, deterministically generated from the trace ID.
15+
"""
16+
17+
18+
@pytest.mark.parametrize(
19+
("trace_id", "expected_sample_rand"),
20+
TEST_TRACE_ID_SAMPLE_RANDS.items(),
21+
)
22+
# test 21 linearly spaced sample_rate values from 0.0 to 1.0, inclusive
23+
@pytest.mark.parametrize("sample_rate", (i / 20 for i in range(21)))
24+
def test_deterministic_sampled(
25+
sentry_init, capture_events, sample_rate, trace_id, expected_sample_rand
26+
):
27+
"""
28+
Test that the sample_rand value is deterministic based on the trace ID, and
29+
that it is used to determine the sampling decision. Also, ensure that the
30+
transaction's baggage contains the sample_rand value.
31+
"""
32+
sentry_init(traces_sample_rate=sample_rate)
33+
events = capture_events()
34+
35+
with sentry_sdk.start_transaction(trace_id=trace_id) as transaction:
36+
assert transaction.get_baggage().sentry_items["sample_rand"] == str(
37+
expected_sample_rand
38+
)
39+
40+
# Transaction event captured if sample_rand < sample_rate, indicating that
41+
# sample_rand is used to make the sampling decision.
42+
assert len(events) == int(expected_sample_rand < sample_rate)
43+
44+
45+
@pytest.mark.parametrize("sample_rand", (0.0, 0.2, 0.4, 0.6, 0.8))
46+
@pytest.mark.parametrize("sample_rate", (0.0, 0.2, 0.4, 0.6, 0.8, 1.0))
47+
def test_transaction_uses_incoming_sample_rand(
48+
sentry_init, capture_events, sample_rate, sample_rand
49+
):
50+
"""
51+
Test that the transaction uses the sample_rand value from the incoming baggage.
52+
"""
53+
baggage = Baggage(sentry_items={"sample_rand": str(sample_rand)})
54+
55+
sentry_init(traces_sample_rate=sample_rate)
56+
events = capture_events()
57+
58+
with sentry_sdk.start_transaction(baggage=baggage) as transaction:
59+
assert transaction.get_baggage().sentry_items["sample_rand"] == str(sample_rand)
60+
61+
# Transaction event captured if sample_rand < sample_rate, indicating that
62+
# sample_rand is used to make the sampling decision.
63+
assert len(events) == int(sample_rand < sample_rate)

0 commit comments

Comments
 (0)