feat(tracing): Use sample_rand for sampling decisions

szokeasaurusrex · szokeasaurusrex · commit 5f577233821f · 2025-02-12T14:26:53.000+01:00
Use the `sample_rand` value from an incoming trace to make sampling decisions, rather than generating a random value. When we are the head SDK starting a new trace, save our randomly-generated value as the `sample_rand`, and also change the random generation logic so that the `sample_rand` is computed deterministically based on the `trace_id`. Closes #3998
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
@@ -1,7 +1,7 @@
 import uuid
-import random
 import warnings
 from datetime import datetime, timedelta, timezone
+from random import Random
 
 import sentry_sdk
 from sentry_sdk.consts import INSTRUMENTER, SPANSTATUS, SPANDATA
@@ -774,6 +774,7 @@ class Transaction(Span):
         "_contexts",
         "_profile",
         "_baggage",
+        "_sample_rand",
     )
 
     def __init__(  # type: ignore[misc]
@@ -799,6 +800,14 @@ def __init__(  # type: ignore[misc]
         )  # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile]
         self._baggage = baggage
 
+        baggage_sample_rand = (
+            None if self._baggage is None else self._baggage._sample_rand()
+        )
+        if baggage_sample_rand is not None:
+            self._sample_rand = baggage_sample_rand
+        else:
+            self._sample_rand = Random(self.trace_id).random()
+
     def __repr__(self):
         # type: () -> str
         return (
@@ -1167,10 +1176,10 @@ def _set_initial_sampling_decision(self, sampling_context):
             self.sampled = False
             return
 
-        # Now we roll the dice. random.random is inclusive of 0, but not of 1,
+        # Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
         # so strict < is safe here. In case sample_rate is a boolean, cast it
         # to a float (True becomes 1.0 and False becomes 0.0)
-        self.sampled = random.random() < self.sample_rate
+        self.sampled = self._sample_rand < self.sample_rate
 
         if self.sampled:
             logger.debug(
diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py
@@ -630,6 +630,7 @@ def populate_from_transaction(cls, transaction):
         options = client.options or {}
 
         sentry_items["trace_id"] = transaction.trace_id
+        sentry_items["sample_rand"] = str(transaction._sample_rand)
 
         if options.get("environment"):
             sentry_items["environment"] = options["environment"]
@@ -702,6 +703,20 @@ def strip_sentry_baggage(header):
             )
         )
 
+    def _sample_rand(self):
+        # type: () -> Optional[float]
+        """Convenience method to get the sample_rand value from the sentry_items.
+
+        We validate the value and parse it as a float before returning it. The value is considered
+        valid if it is a float in the range [0, 1).
+        """
+        sample_rand = _try_float(self.sentry_items.get("sample_rand"))
+
+        if sample_rand is not None and 0 <= sample_rand < 1:
+            return sample_rand
+
+        return None
+
     def __repr__(self):
         # type: () -> str
         return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'
diff --git a/tests/integrations/aiohttp/test_aiohttp.py b/tests/integrations/aiohttp/test_aiohttp.py
@@ -636,7 +636,7 @@ async def handler(request):
 
         assert (
             resp.request_info.headers["baggage"]
-            == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
+            == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.3015579701611357,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
         )
 
 
diff --git a/tests/integrations/celery/test_celery.py b/tests/integrations/celery/test_celery.py
@@ -511,7 +511,8 @@ def test_baggage_propagation(init_celery):
     def dummy_task(self, x, y):
         return _get_headers(self)
 
-    with start_transaction() as transaction:
+    # force trace_id for predictable sample_rand
+    with start_transaction(trace_id="00000000000000000000000000000000"):
         result = dummy_task.apply_async(
             args=(1, 0),
             headers={"baggage": "custom=value"},
@@ -520,8 +521,9 @@ def dummy_task(self, x, y):
         assert sorted(result["baggage"].split(",")) == sorted(
             [
                 "sentry-release=abcdef",
-                "sentry-trace_id={}".format(transaction.trace_id),
+                "sentry-trace_id=00000000000000000000000000000000",
                 "sentry-environment=production",
+                "sentry-sample_rand=0.8766381713144122",
                 "sentry-sample_rate=1.0",
                 "sentry-sampled=true",
                 "custom=value",
diff --git a/tests/integrations/httpx/test_httpx.py b/tests/integrations/httpx/test_httpx.py
@@ -192,7 +192,7 @@ def test_outgoing_trace_headers_append_to_baggage(
         )
         assert (
             response.request.headers["baggage"]
-            == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
+            == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-sample_rand=0.07190396862619497,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
         )
 
 
diff --git a/tests/integrations/stdlib/test_httplib.py b/tests/integrations/stdlib/test_httplib.py
@@ -1,4 +1,3 @@
-import random
 from http.client import HTTPConnection, HTTPSConnection
 from socket import SocketIO
 from urllib.error import HTTPError
@@ -189,7 +188,7 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
     headers["baggage"] = (
         "other-vendor-value-1=foo;bar;baz, sentry-trace_id=771a43a4192642f0b136d5159a501700, "
         "sentry-public_key=49d0f7386ad645858ae85020e393bef3, sentry-sample_rate=0.01337, "
-        "sentry-user_id=Am%C3%A9lie, other-vendor-value-2=foo;bar;"
+        "sentry-user_id=Am%C3%A9lie, sentry-sample_rand=0.132521102938283, other-vendor-value-2=foo;bar;"
     )
 
     transaction = Transaction.continue_from_headers(headers)
@@ -221,7 +220,8 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch):
             "sentry-trace_id=771a43a4192642f0b136d5159a501700,"
             "sentry-public_key=49d0f7386ad645858ae85020e393bef3,"
             "sentry-sample_rate=0.01337,"
-            "sentry-user_id=Am%C3%A9lie"
+            "sentry-user_id=Am%C3%A9lie,"
+            "sentry-sample_rand=0.132521102938283"
         )
 
         assert request_headers["baggage"] == expected_outgoing_baggage
@@ -234,11 +234,12 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
     mock_send = mock.Mock()
     monkeypatch.setattr(HTTPSConnection, "send", mock_send)
 
-    # make sure transaction is always sampled
-    monkeypatch.setattr(random, "random", lambda: 0.1)
-
     sentry_init(traces_sample_rate=0.5, release="foo")
-    transaction = Transaction.continue_from_headers({})
+
+    # forced trace_id results in sample_rand=0.27862410307482766, so sampled=True
+    transaction = Transaction.continue_from_headers(
+        {}, trace_id="22222222222222222222222222222222"
+    )
 
     with start_transaction(transaction=transaction, name="Head SDK tx") as transaction:
         HTTPSConnection("www.squirrelchasers.com").request("GET", "/top-chasers")
@@ -259,12 +260,13 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch):
         assert request_headers["sentry-trace"] == expected_sentry_trace
 
         expected_outgoing_baggage = (
-            "sentry-trace_id=%s,"
+            "sentry-trace_id=22222222222222222222222222222222,"
+            "sentry-sample_rand=0.27862410307482766,"
             "sentry-environment=production,"
             "sentry-release=foo,"
             "sentry-sample_rate=0.5,"
             "sentry-sampled=%s"
-        ) % (transaction.trace_id, "true" if transaction.sampled else "false")
+        ) % ("true" if transaction.sampled else "false")
 
         assert request_headers["baggage"] == expected_outgoing_baggage
 
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -1,4 +1,6 @@
 import pytest
+
+import re
 from unittest import mock
 
 import sentry_sdk
@@ -95,10 +97,10 @@ def test_baggage_with_tracing_disabled(sentry_init):
 def test_baggage_with_tracing_enabled(sentry_init):
     sentry_init(traces_sample_rate=1.0, release="1.0.0", environment="dev")
     with start_transaction() as transaction:
-        expected_baggage = "sentry-trace_id={},sentry-environment=dev,sentry-release=1.0.0,sentry-sample_rate=1.0,sentry-sampled={}".format(
+        expected_baggage_re = r"^sentry-trace_id={},sentry-sample_rand=0\.\d+,sentry-environment=dev,sentry-release=1\.0\.0,sentry-sample_rate=1\.0,sentry-sampled={}$".format(
             transaction.trace_id, "true" if transaction.sampled else "false"
         )
-        assert get_baggage() == expected_baggage
+        assert re.match(expected_baggage_re, get_baggage())
 
 
 @pytest.mark.forked
diff --git a/tests/test_monitor.py b/tests/test_monitor.py
@@ -1,4 +1,3 @@
-import random
 from collections import Counter
 from unittest import mock
 
@@ -68,15 +67,15 @@ def test_transaction_uses_downsampled_rate(
     monitor = sentry_sdk.get_client().monitor
     monitor.interval = 0.1
 
-    # make sure rng doesn't sample
-    monkeypatch.setattr(random, "random", lambda: 0.9)
-
     assert monitor.is_healthy() is True
     monitor.run()
     assert monitor.is_healthy() is False
     assert monitor.downsample_factor == 1
 
-    with sentry_sdk.start_transaction(name="foobar") as transaction:
+    # trace_id forces sample_rand == 0.8766381713144122 >= 0.5, so sampled is False
+    with sentry_sdk.start_transaction(
+        name="foobar", trace_id="00000000000000000000000000000000"
+    ) as transaction:
         assert transaction.sampled is False
         assert transaction.sample_rate == 0.5
 
diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py
@@ -1,5 +1,4 @@
 import gc
-import random
 import re
 import sys
 import weakref
@@ -161,10 +160,11 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
     sentry_init(traces_sample_rate=sample_rate, release="foo")
     envelopes = capture_envelopes()
 
-    # make sure transaction is sampled for both cases
-    monkeypatch.setattr(random, "random", lambda: 0.1)
-
-    transaction = Transaction.continue_from_headers({}, name="Head SDK tx")
+    # force trace_id such that sample_rand is 0.27862410307482766, so transaction
+    # is sampled in both cases
+    transaction = Transaction.continue_from_headers(
+        {}, name="Head SDK tx", trace_id="22222222222222222222222222222222"
+    )
 
     # will create empty mutable baggage
     baggage = transaction._baggage
@@ -184,36 +184,47 @@ def test_dynamic_sampling_head_sdk_creates_dsc(
     assert baggage
     assert not baggage.mutable
     assert baggage.third_party_items == ""
-    assert baggage.sentry_items == {
-        "environment": "production",
-        "release": "foo",
-        "sample_rate": str(sample_rate),
-        "sampled": "true" if transaction.sampled else "false",
-        "transaction": "Head SDK tx",
-        "trace_id": trace_id,
+    assert baggage.sentry_items.keys() == {
+        "environment",
+        "release",
+        "sample_rate",
+        "sampled",
+        "transaction",
+        "trace_id",
+        "sample_rand",
     }
+    assert (
+        baggage.sentry_items.items()
+        >= {
+            "environment": "production",
+            "release": "foo",
+            "sample_rate": str(sample_rate),
+            "sampled": "true" if transaction.sampled else "false",
+            "transaction": "Head SDK tx",
+            "trace_id": trace_id,
+        }.items()
+    )
+    assert 0.0 <= float(baggage.sentry_items["sample_rand"]) < 1.0
 
     expected_baggage = (
         "sentry-trace_id=%s,"
+        "sentry-sample_rand=%s,"
         "sentry-environment=production,"
         "sentry-release=foo,"
         "sentry-transaction=Head%%20SDK%%20tx,"
         "sentry-sample_rate=%s,"
         "sentry-sampled=%s"
-        % (trace_id, sample_rate, "true" if transaction.sampled else "false")
+        % (
+            trace_id,
+            baggage.sentry_items["sample_rand"],
+            sample_rate,
+            "true" if transaction.sampled else "false",
+        )
     )
     assert baggage.serialize() == expected_baggage
 
     (envelope,) = envelopes
     assert envelope.headers["trace"] == baggage.dynamic_sampling_context()
-    assert envelope.headers["trace"] == {
-        "environment": "production",
-        "release": "foo",
-        "sample_rate": str(sample_rate),
-        "sampled": "true" if transaction.sampled else "false",
-        "transaction": "Head SDK tx",
-        "trace_id": trace_id,
-    }
 
 
 @pytest.mark.parametrize(
diff --git a/tests/tracing/test_sample_rand.py b/tests/tracing/test_sample_rand.py
@@ -0,0 +1,63 @@
+import pytest
+
+import sentry_sdk
+from sentry_sdk.tracing_utils import Baggage
+
+TEST_TRACE_ID_SAMPLE_RANDS = {
+    "00000000000000000000000000000000": 0.8766381713144122,
+    "01234567012345670123456701234567": 0.6451742521664413,
+    "0123456789abcdef0123456789abcdef": 0.9338861957669223,
+}
+"""
+A dictionary of some trace IDs used in the tests, and their precomputed sample_rand values.
+
+sample_rand values are pseudo-random numbers, deterministically generated from the trace ID.
+"""
+
+
+@pytest.mark.parametrize(
+    ("trace_id", "expected_sample_rand"),
+    TEST_TRACE_ID_SAMPLE_RANDS.items(),
+)
+# test 21 linearly spaced sample_rate values from 0.0 to 1.0, inclusive
+@pytest.mark.parametrize("sample_rate", (i / 20 for i in range(21)))
+def test_deterministic_sampled(
+    sentry_init, capture_events, sample_rate, trace_id, expected_sample_rand
+):
+    """
+    Test that the sample_rand value is deterministic based on the trace ID, and
+    that it is used to determine the sampling decision. Also, ensure that the
+    transaction's baggage contains the sample_rand value.
+    """
+    sentry_init(traces_sample_rate=sample_rate)
+    events = capture_events()
+
+    with sentry_sdk.start_transaction(trace_id=trace_id) as transaction:
+        assert transaction.get_baggage().sentry_items["sample_rand"] == str(
+            expected_sample_rand
+        )
+
+    # Transaction event captured if sample_rand < sample_rate, indicating that
+    # sample_rand is used to make the sampling decision.
+    assert len(events) == int(expected_sample_rand < sample_rate)
+
+
+@pytest.mark.parametrize("sample_rand", (0.0, 0.2, 0.4, 0.6, 0.8))
+@pytest.mark.parametrize("sample_rate", (0.0, 0.2, 0.4, 0.6, 0.8, 1.0))
+def test_transaction_uses_incoming_sample_rand(
+    sentry_init, capture_events, sample_rate, sample_rand
+):
+    """
+    Test that the transaction uses the sample_rand value from the incoming baggage.
+    """
+    baggage = Baggage(sentry_items={"sample_rand": str(sample_rand)})
+
+    sentry_init(traces_sample_rate=sample_rate)
+    events = capture_events()
+
+    with sentry_sdk.start_transaction(baggage=baggage) as transaction:
+        assert transaction.get_baggage().sentry_items["sample_rand"] == str(sample_rand)
+
+    # Transaction event captured if sample_rand < sample_rate, indicating that
+    # sample_rand is used to make the sampling decision.
+    assert len(events) == int(sample_rand < sample_rate)
diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py
@@ -7,6 +7,7 @@
 import sentry_sdk
 from sentry_sdk import start_span, start_transaction, capture_exception
 from sentry_sdk.tracing import Transaction
+from sentry_sdk.tracing_utils import Baggage
 from sentry_sdk.utils import logger
 
 
@@ -73,9 +74,9 @@ def test_uses_traces_sample_rate_correctly(
 ):
     sentry_init(traces_sample_rate=traces_sample_rate)
 
-    with mock.patch.object(random, "random", return_value=0.5):
-        transaction = start_transaction(name="dogpark")
-        assert transaction.sampled is expected_decision
+    baggage = Baggage(sentry_items={"sample_rand": 0.5})
+    transaction = start_transaction(name="dogpark", baggage=baggage)
+    assert transaction.sampled is expected_decision
 
 
 @pytest.mark.parametrize(
@@ -89,9 +90,9 @@ def test_uses_traces_sampler_return_value_correctly(
 ):
     sentry_init(traces_sampler=mock.Mock(return_value=traces_sampler_return_value))
 
-    with mock.patch.object(random, "random", return_value=0.5):
-        transaction = start_transaction(name="dogpark")
-        assert transaction.sampled is expected_decision
+    baggage = Baggage(sentry_items={"sample_rand": 0.5})
+    transaction = start_transaction(name="dogpark", baggage=baggage)
+    assert transaction.sampled is expected_decision
 
 
 @pytest.mark.parametrize("traces_sampler_return_value", [True, False])

Original file line number	Diff line number	Diff line change
`@@ -636,7 +636,7 @@ async def handler(request):`
`636`	`636`
`637`	`637`	`assert (`
`638`	`638`	`resp.request_info.headers["baggage"]`
`639`		`- == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"`
	`639`	`+ == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.3015579701611357,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"`
`640`	`640`	`)`
`641`	`641`
`642`	`642`
Original file line number	Diff line number	Diff line change
`@@ -192,7 +192,7 @@ def test_outgoing_trace_headers_append_to_baggage(`
`192`	`192`	`)`
`193`	`193`	`assert (`
`194`	`194`	`response.request.headers["baggage"]`
`195`		`- == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"`
	`195`	`+ == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-sample_rand=0.07190396862619497,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"`
`196`	`196`	`)`
`197`	`197`
`198`	`198`