Skip to content

Commit 9509c82

Browse files
linasmrobskillington
authored andcommitted
[dbnode] Refactoring dbShard (#2848)
1 parent b9547d8 commit 9509c82

File tree

6 files changed

+84
-366
lines changed

6 files changed

+84
-366
lines changed

src/dbnode/storage/namespace.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,10 +1732,6 @@ func (n *dbNamespace) aggregateTiles(
17321732
return 0, errNamespaceNotBootstrapped
17331733
}
17341734

1735-
n.RLock()
1736-
nsCtx := n.nsContextWithRLock()
1737-
n.RUnlock()
1738-
17391735
var (
17401736
processedShards = opts.InsOptions.MetricsScope().Counter("processed-shards")
17411737
targetShards = n.OwnedShards()
@@ -1778,7 +1774,7 @@ func (n *dbNamespace) aggregateTiles(
17781774
}
17791775

17801776
shardProcessedTileCount, err := targetShard.AggregateTiles(
1781-
sourceNs.ID(), sourceShard.ID(), blockReaders, writer, sourceBlockVolumes, opts, nsCtx.Schema)
1777+
sourceNs.ID(), n, sourceShard.ID(), blockReaders, writer, sourceBlockVolumes, opts)
17821778

17831779
processedTileCount += shardProcessedTileCount
17841780
processedShards.Inc(1)

src/dbnode/storage/namespace_test.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,8 +1415,8 @@ func TestNamespaceAggregateTiles(t *testing.T) {
14151415
targetBlockSize = 2 * time.Hour
14161416
start = time.Now().Truncate(targetBlockSize)
14171417
secondSourceBlockStart = start.Add(sourceBlockSize)
1418-
sourceShard0ID uint32 = 10
1419-
sourceShard1ID uint32 = 20
1418+
shard0ID uint32 = 10
1419+
shard1ID uint32 = 20
14201420
insOpts = instrument.NewOptions()
14211421
)
14221422

@@ -1440,12 +1440,12 @@ func TestNamespaceAggregateTiles(t *testing.T) {
14401440
sourceNs.shards[0] = sourceShard0
14411441
sourceNs.shards[1] = sourceShard1
14421442

1443-
sourceShard0.EXPECT().ID().Return(sourceShard0ID)
1443+
sourceShard0.EXPECT().ID().Return(shard0ID)
14441444
sourceShard0.EXPECT().IsBootstrapped().Return(true)
14451445
sourceShard0.EXPECT().LatestVolume(start).Return(5, nil)
14461446
sourceShard0.EXPECT().LatestVolume(start.Add(sourceBlockSize)).Return(15, nil)
14471447

1448-
sourceShard1.EXPECT().ID().Return(sourceShard1ID)
1448+
sourceShard1.EXPECT().ID().Return(shard1ID)
14491449
sourceShard1.EXPECT().IsBootstrapped().Return(true)
14501450
sourceShard1.EXPECT().LatestVolume(start).Return(7, nil)
14511451
sourceShard1.EXPECT().LatestVolume(start.Add(sourceBlockSize)).Return(17, nil)
@@ -1462,8 +1462,18 @@ func TestNamespaceAggregateTiles(t *testing.T) {
14621462
sourceBlockVolumes1 := []shardBlockVolume{{start, 7}, {secondSourceBlockStart, 17}}
14631463

14641464
sourceNsIDMatcher := ident.NewIDMatcher(sourceNsID.String())
1465-
targetShard0.EXPECT().AggregateTiles(sourceNsIDMatcher, sourceShard0ID, gomock.Any(), gomock.Any(), sourceBlockVolumes0, opts, targetNs.Schema()).Return(int64(3), nil)
1466-
targetShard1.EXPECT().AggregateTiles(sourceNsIDMatcher, sourceShard1ID, gomock.Any(), gomock.Any(), sourceBlockVolumes1, opts, targetNs.Schema()).Return(int64(2), nil)
1465+
1466+
targetShard0.EXPECT().
1467+
AggregateTiles(
1468+
sourceNsIDMatcher, targetNs, shard0ID, gomock.Len(2), gomock.Any(),
1469+
sourceBlockVolumes0, opts).
1470+
Return(int64(3), nil)
1471+
1472+
targetShard1.EXPECT().
1473+
AggregateTiles(
1474+
sourceNsIDMatcher, targetNs, shard1ID, gomock.Len(2), gomock.Any(),
1475+
sourceBlockVolumes1, opts).
1476+
Return(int64(2), nil)
14671477

14681478
processedTileCount, err := targetNs.AggregateTiles(sourceNs, opts)
14691479

src/dbnode/storage/shard.go

Lines changed: 31 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ import (
2929
"sync"
3030
"time"
3131

32-
"github.com/m3db/m3/src/dbnode/encoding"
33-
"github.com/m3db/m3/src/dbnode/encoding/tile"
34-
"github.com/m3db/m3/src/dbnode/generated/proto/annotation"
3532
"github.com/m3db/m3/src/dbnode/generated/proto/pagetoken"
3633
"github.com/m3db/m3/src/dbnode/namespace"
3734
"github.com/m3db/m3/src/dbnode/persist"
@@ -47,7 +44,6 @@ import (
4744
"github.com/m3db/m3/src/dbnode/storage/series/lookup"
4845
"github.com/m3db/m3/src/dbnode/tracepoint"
4946
"github.com/m3db/m3/src/dbnode/ts"
50-
"github.com/m3db/m3/src/dbnode/ts/downsample"
5147
"github.com/m3db/m3/src/dbnode/ts/writes"
5248
"github.com/m3db/m3/src/dbnode/x/xio"
5349
"github.com/m3db/m3/src/m3ninx/doc"
@@ -72,18 +68,16 @@ const (
7268
)
7369

7470
var (
75-
errShardEntryNotFound = errors.New("shard entry not found")
76-
errShardNotOpen = errors.New("shard is not open")
77-
errShardAlreadyTicking = errors.New("shard is already ticking")
78-
errShardClosingTickTerminated = errors.New("shard is closing, terminating tick")
79-
errShardInvalidPageToken = errors.New("shard could not unmarshal page token")
80-
errNewShardEntryTagsTypeInvalid = errors.New("new shard entry options error: tags type invalid")
81-
errNewShardEntryTagsIterNotAtIndexZero = errors.New("new shard entry options error: tags iter not at index zero")
82-
errShardIsNotBootstrapped = errors.New("shard is not bootstrapped")
83-
errShardAlreadyBootstrapped = errors.New("shard is already bootstrapped")
84-
errFlushStateIsNotInitialized = errors.New("shard flush state is not initialized")
85-
errFlushStateAlreadyInitialized = errors.New("shard flush state is already initialized")
86-
errTriedToLoadNilSeries = errors.New("tried to load nil series into shard")
71+
errShardEntryNotFound = errors.New("shard entry not found")
72+
errShardNotOpen = errors.New("shard is not open")
73+
errShardAlreadyTicking = errors.New("shard is already ticking")
74+
errShardClosingTickTerminated = errors.New("shard is closing, terminating tick")
75+
errShardInvalidPageToken = errors.New("shard could not unmarshal page token")
76+
errNewShardEntryTagsTypeInvalid = errors.New("new shard entry options error: tags type invalid")
77+
errShardIsNotBootstrapped = errors.New("shard is not bootstrapped")
78+
errShardAlreadyBootstrapped = errors.New("shard is already bootstrapped")
79+
errFlushStateIsNotInitialized = errors.New("shard flush state is not initialized")
80+
errTriedToLoadNilSeries = errors.New("tried to load nil series into shard")
8781

8882
// ErrDatabaseLoadLimitHit is the error returned when the database load limit
8983
// is hit or exceeded.
@@ -188,6 +182,7 @@ type dbShard struct {
188182
currRuntimeOptions dbShardRuntimeOptions
189183
logger *zap.Logger
190184
metrics dbShardMetrics
185+
tileAggregator TileAggregator
191186
ticking bool
192187
shard uint32
193188
coldWritesEnabled bool
@@ -328,6 +323,7 @@ func newDatabaseShard(
328323
coldWritesEnabled: namespaceMetadata.Options().ColdWritesEnabled(),
329324
logger: opts.InstrumentOptions().Logger(),
330325
metrics: newDatabaseShardMetrics(shard, scope),
326+
tileAggregator: opts.TileAggregator(),
331327
}
332328
s.insertQueue = newDatabaseShardInsertQueue(s.insertSeriesBatch,
333329
s.nowFn, scope, opts.InstrumentOptions().Logger())
@@ -2663,21 +2659,26 @@ func (s *dbShard) Repair(
26632659

26642660
func (s *dbShard) AggregateTiles(
26652661
sourceNsID ident.ID,
2666-
sourceShardID uint32,
2662+
targetNs Namespace,
2663+
shardID uint32,
26672664
blockReaders []fs.DataFileSetReader,
26682665
writer fs.StreamingWriter,
26692666
sourceBlockVolumes []shardBlockVolume,
26702667
opts AggregateTilesOptions,
2671-
targetSchemaDescr namespace.SchemaDescr,
26722668
) (int64, error) {
26732669
if len(blockReaders) != len(sourceBlockVolumes) {
2674-
return 0, fmt.Errorf("blockReaders and sourceBlockVolumes length mismatch (%d != %d)", len(blockReaders), len(sourceBlockVolumes))
2670+
return 0, fmt.Errorf(
2671+
"blockReaders and sourceBlockVolumes length mismatch (%d != %d)",
2672+
len(blockReaders),
2673+
len(sourceBlockVolumes))
26752674
}
26762675

26772676
openBlockReaders := make([]fs.DataFileSetReader, 0, len(blockReaders))
26782677
defer func() {
26792678
for _, reader := range openBlockReaders {
2680-
reader.Close()
2679+
if err := reader.Close(); err != nil {
2680+
s.logger.Error("could not close DataFileSetReader", zap.Error(err))
2681+
}
26812682
}
26822683
}()
26832684

@@ -2687,7 +2688,7 @@ func (s *dbShard) AggregateTiles(
26872688
openOpts := fs.DataReaderOpenOptions{
26882689
Identifier: fs.FileSetFileIdentifier{
26892690
Namespace: sourceNsID,
2690-
Shard: sourceShardID,
2691+
Shard: shardID,
26912692
BlockStart: sourceBlockVolume.blockStart,
26922693
VolumeIndex: sourceBlockVolume.latestVolume,
26932694
},
@@ -2706,46 +2707,15 @@ func (s *dbShard) AggregateTiles(
27062707
zap.Int("volumeIndex", sourceBlockVolume.latestVolume))
27072708
return 0, err
27082709
}
2709-
if blockReader.Entries() > maxEntries {
2710-
maxEntries = blockReader.Entries()
2710+
2711+
entries := blockReader.Entries()
2712+
if entries > maxEntries {
2713+
maxEntries = entries
27112714
}
27122715

27132716
openBlockReaders = append(openBlockReaders, blockReader)
27142717
}
27152718

2716-
crossBlockReader, err := fs.NewCrossBlockReader(openBlockReaders, s.opts.InstrumentOptions())
2717-
if err != nil {
2718-
s.logger.Error("NewCrossBlockReader", zap.Error(err))
2719-
return 0, err
2720-
}
2721-
defer crossBlockReader.Close()
2722-
2723-
tileOpts := tile.Options{
2724-
FrameSize: opts.Step,
2725-
Start: xtime.ToUnixNano(opts.Start),
2726-
ReaderIteratorPool: s.opts.ReaderIteratorPool(),
2727-
}
2728-
2729-
readerIter, err := tile.NewSeriesBlockIterator(crossBlockReader, tileOpts)
2730-
if err != nil {
2731-
s.logger.Error("error when creating new series block iterator", zap.Error(err))
2732-
return 0, err
2733-
}
2734-
2735-
closed := false
2736-
defer func() {
2737-
if !closed {
2738-
if err := readerIter.Close(); err != nil {
2739-
// NB: log the error on ungraceful exit.
2740-
s.logger.Error("could not close read iterator on error", zap.Error(err))
2741-
}
2742-
}
2743-
}()
2744-
2745-
encoder := s.opts.EncoderPool().Get()
2746-
defer encoder.Close()
2747-
encoder.Reset(opts.Start, 0, targetSchemaDescr)
2748-
27492719
latestTargetVolume, err := s.LatestVolume(opts.Start)
27502720
if err != nil {
27512721
return 0, err
@@ -2764,54 +2734,12 @@ func (s *dbShard) AggregateTiles(
27642734
return 0, err
27652735
}
27662736

2767-
var (
2768-
annotationPayload annotation.Payload
2769-
// NB: there is a maximum of 4 datapoints per frame for counters.
2770-
downsampledValues = make([]downsample.Value, 0, 4)
2771-
processedTileCount int64
2772-
segmentCapacity int
2773-
writerData = make([][]byte, 2)
2774-
multiErr xerrors.MultiError
2775-
)
2776-
2777-
for readerIter.Next() {
2778-
seriesIter, id, encodedTags := readerIter.Current()
2779-
2780-
seriesTileCount, err := encodeAggregatedSeries(seriesIter, annotationPayload, downsampledValues, encoder)
2781-
if err != nil {
2782-
s.metrics.largeTilesWriteErrors.Inc(1)
2783-
multiErr = multiErr.Add(err)
2784-
break
2785-
}
2786-
2787-
if seriesTileCount == 0 {
2788-
break
2789-
}
2790-
2791-
processedTileCount += seriesTileCount
2792-
segment := encoder.DiscardReset(opts.Start, segmentCapacity, targetSchemaDescr)
2793-
2794-
segmentLen := segment.Len()
2795-
if segmentLen > segmentCapacity {
2796-
// Will use the same capacity for the next series.
2797-
segmentCapacity = segmentLen
2798-
}
2799-
2800-
writerData[0] = segment.Head.Bytes()
2801-
writerData[1] = segment.Tail.Bytes()
2802-
checksum := segment.CalculateChecksum()
2803-
2804-
if err := writer.WriteAll(id, encodedTags, writerData, checksum); err != nil {
2805-
s.metrics.largeTilesWriteErrors.Inc(1)
2806-
multiErr = multiErr.Add(err)
2807-
} else {
2808-
s.metrics.largeTilesWrites.Inc(1)
2809-
}
2810-
2811-
segment.Finalize()
2812-
}
2737+
var multiErr xerrors.MultiError
28132738

2814-
if err := readerIter.Err(); err != nil {
2739+
processedTileCount, err := s.tileAggregator.AggregateTiles(
2740+
opts, targetNs, s.ID(), openBlockReaders, writer)
2741+
if err != nil {
2742+
// NB: cannot return on the error here, must finish writing.
28152743
multiErr = multiErr.Add(err)
28162744
}
28172745

@@ -2833,11 +2761,6 @@ func (s *dbShard) AggregateTiles(
28332761
}
28342762
}
28352763

2836-
closed = true
2837-
if err := readerIter.Close(); err != nil {
2838-
multiErr = multiErr.Add(err)
2839-
}
2840-
28412764
if err := multiErr.FinalError(); err != nil {
28422765
return 0, err
28432766
}
@@ -2849,102 +2772,6 @@ func (s *dbShard) AggregateTiles(
28492772
return processedTileCount, nil
28502773
}
28512774

2852-
func encodeAggregatedSeries(
2853-
seriesIter tile.SeriesFrameIterator,
2854-
annotationPayload annotation.Payload,
2855-
downsampledValues []downsample.Value,
2856-
encoder encoding.Encoder,
2857-
) (int64, error) {
2858-
var (
2859-
prevFrameLastValue = math.NaN()
2860-
processedTileCount int64
2861-
handleValueResets bool
2862-
firstUnit xtime.Unit
2863-
firstAnnotation ts.Annotation
2864-
err error
2865-
)
2866-
2867-
for seriesIter.Next() {
2868-
frame := seriesIter.Current()
2869-
2870-
frameValues := frame.Values()
2871-
if len(frameValues) == 0 {
2872-
continue
2873-
}
2874-
2875-
if processedTileCount == 0 {
2876-
firstUnit, err = frame.Units().Value(0)
2877-
if err != nil {
2878-
return 0, err
2879-
}
2880-
2881-
firstAnnotation, err = frame.Annotations().Value(0)
2882-
if err != nil {
2883-
return 0, err
2884-
}
2885-
2886-
annotationPayload.Reset()
2887-
if annotationPayload.Unmarshal(firstAnnotation) == nil {
2888-
// NB: unmarshall error might be a result of some historical annotation data
2889-
// which is not compatible with protobuf payload struct. This would generally mean
2890-
// that metrics type is unknown, so we should ignore the error here.
2891-
handleValueResets = annotationPayload.HandleValueResets
2892-
}
2893-
}
2894-
2895-
downsampledValues = downsampledValues[:0]
2896-
lastIdx := len(frameValues) - 1
2897-
2898-
if handleValueResets {
2899-
// Last value plus possible few more datapoints to preserve counter semantics.
2900-
downsampledValues = downsample.DownsampleCounterResets(prevFrameLastValue, frameValues, downsampledValues)
2901-
} else {
2902-
// Plain last value per frame.
2903-
downsampledValue := downsample.Value{
2904-
FrameIndex: lastIdx,
2905-
Value: frameValues[lastIdx],
2906-
}
2907-
downsampledValues = append(downsampledValues, downsampledValue)
2908-
}
2909-
2910-
if err = encodeDownsampledValues(downsampledValues, frame, firstUnit, firstAnnotation, encoder); err != nil {
2911-
return 0, err
2912-
}
2913-
2914-
prevFrameLastValue = frameValues[lastIdx]
2915-
processedTileCount++
2916-
}
2917-
2918-
if err := seriesIter.Err(); err != nil {
2919-
return 0, err
2920-
}
2921-
2922-
return processedTileCount, nil
2923-
}
2924-
2925-
func encodeDownsampledValues(
2926-
downsampledValues []downsample.Value,
2927-
frame tile.SeriesBlockFrame,
2928-
unit xtime.Unit,
2929-
annotation ts.Annotation,
2930-
encoder encoding.Encoder,
2931-
) error {
2932-
for _, downsampledValue := range downsampledValues {
2933-
timestamp := frame.Timestamps()[downsampledValue.FrameIndex]
2934-
dp := ts.Datapoint{
2935-
Timestamp: timestamp,
2936-
TimestampNanos: xtime.ToUnixNano(timestamp),
2937-
Value: downsampledValue.Value,
2938-
}
2939-
2940-
if err := encoder.Encode(dp, unit, annotation); err != nil {
2941-
return err
2942-
}
2943-
}
2944-
2945-
return nil
2946-
}
2947-
29482775
func (s *dbShard) BootstrapState() BootstrapState {
29492776
s.RLock()
29502777
bs := s.bootstrapState

0 commit comments

Comments
 (0)