68
68
record_hash_calculation_metrics ,
69
69
record_new_group_metrics ,
70
70
)
71
+ from sentry .grouping .ingest .seer import get_seer_similar_issues , should_call_seer_for_grouping
71
72
from sentry .grouping .ingest .utils import (
72
73
add_group_id_to_grouphashes ,
73
74
check_for_category_mismatch ,
136
137
from sentry .utils .safe import get_path , safe_execute , setdefault_path , trim
137
138
from sentry .utils .sdk import set_measurement
138
139
from sentry .utils .tag_normalization import normalized_sdk_tag_from_event
140
+ from sentry .utils .types import NonNone
139
141
140
142
if TYPE_CHECKING :
141
143
from sentry .eventstore .models import BaseEvent , Event
@@ -1436,6 +1438,15 @@ def _save_aggregate(
1436
1438
):
1437
1439
# These values will get overridden with whatever happens inside the lock if we do manage
1438
1440
# to acquire it, so it should only end up with `wait-for-lock` if we don't
1441
+ #
1442
+ # TODO: If we're using this `outome` value for anything more than a count in DD (in
1443
+ # other words, if we care about duration), we should probably update it so that when an
1444
+ # event does have to wait, we record whether during its wait the event which got the
1445
+ # lock first
1446
+ # a) created a new group without consulting Seer,
1447
+ # b) created a new group because Seer didn't find a close enough match, or
1448
+ # c) used an existing group found by Seer
1449
+ # because which of those things happened will have an effect on how long the event had to wait.
1439
1450
span .set_tag ("outcome" , "wait_for_lock" )
1440
1451
metric_tags ["outcome" ] = "wait_for_lock"
1441
1452
@@ -1444,25 +1455,29 @@ def _save_aggregate(
1444
1455
all_grouphash_ids .append (root_hierarchical_grouphash .id )
1445
1456
1446
1457
# If we're in this branch, we checked our grouphashes and didn't find one with a group
1447
- # attached. We thus want to create a new group, but we need to guard against another
1448
- # event with the same hash coming in before we're done here and also thinking it needs
1449
- # to create a new group. To prevent this, we're using double-checked locking
1458
+ # attached. We thus want to either ask seer for a nearest neighbor group (and create a
1459
+ # new group if one isn't found) or just create a new group without consulting seer, but
1460
+ # either way we need to guard against another event with the same hash coming in before
1461
+ # we're done here and also thinking it needs to talk to seer and/or create a new group.
1462
+ # To prevent this, we're using double-checked locking
1450
1463
# (https://en.wikipedia.org/wiki/Double-checked_locking).
1451
1464
1452
1465
# First, try to lock the relevant rows in the `GroupHash` table. If another (identically
1453
- # hashed) event is also in the process of creating a group and has grabbed the lock
1454
- # before us, we'll block here until it's done. If not, we've now got the lock and other
1455
- # identically-hashed events will have to wait for us.
1466
+ # hashed) event is already in the process of talking to seer and/or creating a group and
1467
+ # has grabbed the lock before us, we'll block here until it's done. If not, we've now
1468
+ # got the lock and other identically-hashed events will have to wait for us.
1456
1469
all_grouphashes = list (
1457
1470
GroupHash .objects .filter (id__in = all_grouphash_ids ).select_for_update ()
1458
1471
)
1459
1472
1460
1473
flat_grouphashes = [gh for gh in all_grouphashes if gh .hash in hashes .hashes ]
1461
1474
1462
- # Now check again to see if any of our grouphashes have a group. In the first race
1463
- # condition scenario above, we'll have been blocked long enough for the other event to
1464
- # have created the group and updated our grouphashes with a group id, which means this
1465
- # time, we'll find something.
1475
+ # Now check again to see if any of our grouphashes have a group. If we got the lock, the
1476
+ # result won't have changed and we still won't find anything. If we didn't get it, we'll
1477
+ # have blocked until whichever identically-hashed event *did* get the lock has either
1478
+ # created a new group for our hashes or assigned them to a neighboring group suggessted
1479
+ # by seer. If that happens, we'll skip this whole branch and jump down to the same one
1480
+ # we would have landed in had we found a group to begin with.
1466
1481
existing_grouphash , root_hierarchical_hash = find_existing_grouphash (
1467
1482
project , flat_grouphashes , hashes .hierarchical_hashes
1468
1483
)
@@ -1474,10 +1489,34 @@ def _save_aggregate(
1474
1489
else :
1475
1490
root_hierarchical_grouphash = None
1476
1491
1477
- # If we still haven't found a matching grouphash, we're now safe to go ahead and create
1478
- # the group.
1492
+ # If we still haven't found a matching grouphash, we're now safe to go ahead and talk to
1493
+ # seer and/or create the group.
1479
1494
if existing_grouphash is None :
1480
- group = _create_group (project , event , ** group_creation_kwargs )
1495
+ seer_matched_group = None
1496
+
1497
+ if should_call_seer_for_grouping (event , project ):
1498
+ try :
1499
+ # If the `projects:similarity-embeddings-grouping` feature is disabled,
1500
+ # we'll still get back result metadata, but `seer_matched_group` will be None
1501
+ seer_response_data , seer_matched_group = get_seer_similar_issues (
1502
+ event , primary_hashes
1503
+ )
1504
+ event .data ["seer_similarity" ] = seer_response_data
1505
+
1506
+ # We only want to add this data to new groups, while we're testing
1507
+ # TODO: Remove this once we're out of the testing phase
1508
+ if not seer_matched_group :
1509
+ group_creation_kwargs ["data" ]["metadata" ][
1510
+ "seer_similarity"
1511
+ ] = seer_response_data
1512
+
1513
+ # Insurance - in theory we shouldn't ever land here
1514
+ except Exception as e :
1515
+ sentry_sdk .capture_exception (
1516
+ e , tags = {"event" : event .event_id , "project" : project .id }
1517
+ )
1518
+
1519
+ group = seer_matched_group or _create_group (project , event , ** group_creation_kwargs )
1481
1520
1482
1521
if root_hierarchical_grouphash is not None :
1483
1522
new_hashes = [root_hierarchical_grouphash ]
@@ -1488,41 +1527,64 @@ def _save_aggregate(
1488
1527
state = GroupHash .State .LOCKED_IN_MIGRATION
1489
1528
).update (group = group )
1490
1529
1491
- is_new = True
1492
- is_regression = False
1530
+ is_new = not seer_matched_group
1531
+ is_regression = (
1532
+ False
1533
+ if is_new
1534
+ else _process_existing_aggregate (
1535
+ # If `seer_matched_group` were `None`, `is_new` would be true and we
1536
+ # wouldn't be here
1537
+ group = NonNone (seer_matched_group ),
1538
+ event = event ,
1539
+ incoming_group_values = group_creation_kwargs ,
1540
+ release = release ,
1541
+ )
1542
+ )
1493
1543
1494
- span .set_tag ("outcome" , "new_group" )
1495
- metric_tags ["outcome" ] = "new_group"
1544
+ span .set_tag ("outcome" , "new_group" if is_new else "seer_match" )
1545
+ metric_tags ["outcome" ] = "new_group" if is_new else "seer_match"
1496
1546
record_calculation_metric_with_result (
1497
1547
project = project ,
1498
1548
has_secondary_hashes = has_secondary_hashes ,
1499
1549
result = "no_match" ,
1500
1550
)
1501
1551
1502
- metrics .incr (
1503
- "group.created" ,
1504
- skip_internal = True ,
1505
- tags = {
1506
- "platform" : event .platform or "unknown" ,
1507
- "sdk" : normalized_sdk_tag_from_event (event ),
1508
- },
1509
- )
1510
-
1511
- # This only applies to events with stacktraces
1512
- frame_mix = event .get_event_metadata ().get ("in_app_frame_mix" )
1513
- if frame_mix :
1552
+ if is_new :
1514
1553
metrics .incr (
1515
- "grouping.in_app_frame_mix " ,
1516
- sample_rate = 1.0 ,
1554
+ "group.created " ,
1555
+ skip_internal = True ,
1517
1556
tags = {
1518
1557
"platform" : event .platform or "unknown" ,
1519
1558
"sdk" : normalized_sdk_tag_from_event (event ),
1520
- "frame_mix" : frame_mix ,
1521
1559
},
1522
1560
)
1523
1561
1562
+ # This only applies to events with stacktraces, and we only do this for new
1563
+ # groups, because we assume that if Seer puts an event in an existing group, it
1564
+ # and the existing group have the same frame mix
1565
+ frame_mix = event .get_event_metadata ().get ("in_app_frame_mix" )
1566
+ if frame_mix :
1567
+ metrics .incr (
1568
+ "grouping.in_app_frame_mix" ,
1569
+ sample_rate = 1.0 ,
1570
+ tags = {
1571
+ "platform" : event .platform or "unknown" ,
1572
+ "sdk" : normalized_sdk_tag_from_event (event ),
1573
+ "frame_mix" : frame_mix ,
1574
+ },
1575
+ )
1576
+
1524
1577
return GroupInfo (group , is_new , is_regression )
1525
1578
1579
+ # If we land here, it's because either:
1580
+ #
1581
+ # a) There's an existing group with one of our hashes and we found it the first time we looked.
1582
+ #
1583
+ # b) We didn't find a group the first time we looked, but another identically-hashed event beat
1584
+ # us to the lock and while we were waiting either created a new group or assigned our hashes to
1585
+ # a neighboring group suggested by seer - such that when we finally got the lock and looked
1586
+ # again, this time there was a group to find.
1587
+
1526
1588
group = Group .objects .get (id = existing_grouphash .group_id )
1527
1589
if group .issue_category != GroupCategory .ERROR :
1528
1590
logger .info (
@@ -1559,9 +1621,10 @@ def _save_aggregate(
1559
1621
record_calculation_metric_with_result (
1560
1622
project = project ,
1561
1623
has_secondary_hashes = has_secondary_hashes ,
1562
- # If at least one primary hash value isn't new, then we will have found it, since we check
1563
- # those before the secondary hash values. If the primary hash values are all new, then we
1564
- # must have found a secondary hash (or we'd be in the group-creation branch).
1624
+ # If at least one primary hash value isn't new, then we'll definitely have found it, since
1625
+ # we check all of the primary hashes before any secondary ones. If the primary hash values
1626
+ # *are* all new, then we must have gotten here by finding a secondary hash (or we'd be in
1627
+ # the group-creation/seer-consultation branch).
1565
1628
result = "found_primary" if not all_primary_hashes_are_new else "found_secondary" ,
1566
1629
)
1567
1630
@@ -1604,6 +1667,8 @@ def _save_aggregate(
1604
1667
return GroupInfo (group , is_new , is_regression )
1605
1668
1606
1669
1670
+ # TODO: None of the seer logic has been added to this version yet, so you can't simultaneously use
1671
+ # optimized transitions and seer
1607
1672
def _save_aggregate_new (
1608
1673
event : Event ,
1609
1674
job : Job ,
0 commit comments