@@ -4,8 +4,10 @@ import (
4
4
"encoding/json"
5
5
"errors"
6
6
"fmt"
7
+ "io/ioutil"
7
8
"math"
8
9
"os"
10
+ "path/filepath"
9
11
"strconv"
10
12
"strings"
11
13
"sync"
@@ -32,6 +34,12 @@ const (
32
34
defaultMaxProcs = 1 // 默认没有并发
33
35
// TypeMarshalError 表示marshal出错
34
36
TypeMarshalError = reqerr .SendErrorType ("Data Marshal failed" )
37
+ // KeyUnMarshalError
38
+ KeyUnMarshalError = "Data unmarshal failed"
39
+ // NumUnMarshalError
40
+ NumUnMarshalError = 10
41
+ // lag file
42
+ LagFilename = "meta.lag"
35
43
)
36
44
37
45
var _ SkipDeepCopySender = & FtSender {}
@@ -202,6 +210,9 @@ func newFtSender(innerSender Sender, runnerName string, opt *FtOption) (*FtSende
202
210
isBlock : opt .isBlock ,
203
211
backoff : utils .NewBackoff (2 , 1 , 1 * time .Second , 5 * time .Minute ),
204
212
}
213
+ ftSender .statsMutex .Lock ()
214
+ ftSender .stats .FtSendLag = ftSender .readLag ()
215
+ ftSender .statsMutex .Unlock ()
205
216
206
217
if opt .innerSenderType == TypePandora {
207
218
ftSender .pandoraKeyCache = make (map [string ]KeyInfo )
@@ -269,9 +280,17 @@ func (ft *FtSender) RawSend(datas []string) error {
269
280
} else {
270
281
// se 中的 lasterror 和 senderror 都为空,需要使用 se.FtQueueLag
271
282
se .AddSuccessNum (len (datas ))
283
+ ft .statsMutex .Lock ()
284
+ ft .stats .FtSendLag = ft .stats .FtSendLag + int64 (len (datas ))
285
+ ft .statsMutex .Unlock ()
272
286
ft .backoff .Reset ()
273
287
}
274
288
se .FtQueueLag = ft .BackupQueue .Depth () + ft .logQueue .Depth ()
289
+ if se .FtQueueLag == 0 {
290
+ ft .statsMutex .Lock ()
291
+ ft .stats .FtSendLag = 0
292
+ ft .statsMutex .Unlock ()
293
+ }
275
294
}
276
295
return se
277
296
}
@@ -350,9 +369,17 @@ func (ft *FtSender) Send(datas []Data) error {
350
369
} else {
351
370
// se 中的 lasterror 和 senderror 都为空,需要使用 se.FtQueueLag
352
371
se .AddSuccessNum (len (datas ))
372
+ ft .statsMutex .Lock ()
373
+ ft .stats .FtSendLag = ft .stats .FtSendLag + int64 (len (datas ))
374
+ ft .statsMutex .Unlock ()
353
375
ft .backoff .Reset ()
354
376
}
355
377
se .FtQueueLag = ft .BackupQueue .Depth () + ft .logQueue .Depth ()
378
+ if se .FtQueueLag == 0 {
379
+ ft .statsMutex .Lock ()
380
+ ft .stats .FtSendLag = 0
381
+ ft .statsMutex .Unlock ()
382
+ }
356
383
return se
357
384
}
358
385
@@ -391,6 +418,9 @@ func (ft *FtSender) Close() error {
391
418
// persist queue's meta data
392
419
ft .logQueue .Close ()
393
420
ft .BackupQueue .Close ()
421
+ ft .statsMutex .Lock ()
422
+ ft .writeLag (ft .stats .FtSendLag )
423
+ ft .statsMutex .Unlock ()
394
424
395
425
return ft .innerSender .Close ()
396
426
}
@@ -477,6 +507,9 @@ func (ft *FtSender) saveToFile(datas []Data) error {
477
507
}
478
508
479
509
func (ft * FtSender ) asyncSendLogFromQueue () {
510
+ // if not sleep, queue lag may be cleared
511
+ time .Sleep (time .Second * 10 )
512
+
480
513
for i := 0 ; i < ft .procs ; i ++ {
481
514
if ft .opt .sendRaw {
482
515
readLinesChan := make (<- chan []string )
@@ -502,18 +535,31 @@ func (ft *FtSender) asyncSendLogFromQueue() {
502
535
}
503
536
504
537
// trySend 从bytes反序列化数据后尝试发送数据
505
- func (ft * FtSender ) trySendBytes (dat []byte , failSleep int , isRetry bool ) (backDataContext []* datasContext , err error ) {
538
+ func (ft * FtSender ) trySendBytes (dat []byte , failSleep int , isRetry bool , isFromQueue bool ) (backDataContext []* datasContext , err error ) {
506
539
if ft .opt .sendRaw {
507
540
datas , err := ft .unmarshalRaws (dat )
508
541
if err != nil {
509
- return nil , err
542
+ return nil , errors . New ( KeyUnMarshalError + ":" + err . Error ())
510
543
}
544
+ ft .statsMutex .Lock ()
545
+ ft .stats .FtSendLag = ft .stats .FtSendLag - int64 (len (datas ))
546
+ if ft .stats .FtSendLag < 0 {
547
+ ft .stats .FtSendLag = 0
548
+ }
549
+ ft .statsMutex .Unlock ()
550
+
511
551
return ft .backOffSendRawFromQueue (datas , failSleep , isRetry )
512
552
}
513
553
datas , err := ft .unmarshalData (dat )
514
554
if err != nil {
515
- return nil , err
555
+ return nil , errors .New (KeyUnMarshalError + ":" + err .Error ())
556
+ }
557
+ ft .statsMutex .Lock ()
558
+ ft .stats .FtSendLag = ft .stats .FtSendLag - int64 (len (datas ))
559
+ if ft .stats .FtSendLag < 0 {
560
+ ft .stats .FtSendLag = 0
516
561
}
562
+ ft .statsMutex .Unlock ()
517
563
518
564
return ft .backOffSendFromQueue (datas , failSleep , isRetry )
519
565
}
@@ -562,6 +608,9 @@ func (ft *FtSender) trySendRaws(datas []string, failSleep int, isRetry bool) (ba
562
608
log .Errorf ("Runner[%v] Sender[%v] cannot write points back to queue %v: %v, discard datas %d" , ft .runnerName , ft .innerSender .Name (), ft .BackupQueue .Name (), err , len (datas ))
563
609
return nil , nil
564
610
}
611
+ ft .statsMutex .Lock ()
612
+ ft .stats .FtSendLag += int64 (len (v .Lines ))
613
+ ft .statsMutex .Unlock ()
565
614
}
566
615
567
616
time .Sleep (time .Second * time .Duration (math .Pow (2 , float64 (failSleep ))))
@@ -616,6 +665,9 @@ func (ft *FtSender) trySendDatas(datas []Data, failSleep int, isRetry bool) (bac
616
665
log .Errorf ("Runner[%v] Sender[%v] cannot write points back to queue %v: %v, discard datas %d" , ft .runnerName , ft .innerSender .Name (), ft .BackupQueue .Name (), err , len (datas ))
617
666
return nil , nil
618
667
}
668
+ ft .statsMutex .Lock ()
669
+ ft .stats .FtSendLag += int64 (len (v .Datas ))
670
+ ft .statsMutex .Unlock ()
619
671
}
620
672
621
673
time .Sleep (time .Second * time .Duration (math .Pow (2 , float64 (failSleep ))))
@@ -876,6 +928,7 @@ func (ft *FtSender) sendRawFromQueue(queueName string, readChan <-chan []byte, r
876
928
timer := time .NewTicker (time .Second )
877
929
defer timer .Stop ()
878
930
numWaits := 1
931
+ unmarshalDataError := 0
879
932
var curDataContext , otherDataContext []* datasContext
880
933
var curIdx int
881
934
var backDataContext []* datasContext
@@ -891,8 +944,14 @@ func (ft *FtSender) sendRawFromQueue(queueName string, readChan <-chan []byte, r
891
944
} else {
892
945
select {
893
946
case bytes := <- readChan :
894
- backDataContext , err = ft .trySendBytes (bytes , numWaits , isRetry )
947
+ backDataContext , err = ft .trySendBytes (bytes , numWaits , isRetry , true )
895
948
case datas := <- readDatasChan :
949
+ ft .statsMutex .Lock ()
950
+ ft .stats .FtSendLag = ft .stats .FtSendLag - int64 (len (datas ))
951
+ if ft .stats .FtSendLag < 0 {
952
+ ft .stats .FtSendLag = 0
953
+ }
954
+ ft .statsMutex .Unlock ()
896
955
backDataContext , err = ft .backOffSendRawFromQueue (datas , numWaits , isRetry )
897
956
case <- timer .C :
898
957
continue
@@ -908,6 +967,15 @@ func (ft *FtSender) sendRawFromQueue(queueName string, readChan <-chan []byte, r
908
967
if numWaits > 5 {
909
968
numWaits = 5
910
969
}
970
+ if strings .HasPrefix (err .Error (), KeyUnMarshalError ) {
971
+ unmarshalDataError ++
972
+ if unmarshalDataError > NumUnMarshalError {
973
+ time .Sleep (time .Second )
974
+ log .Errorf ("Runner[%s] Sender[%s] sleep 1s due to unmarshal err" , ft .runnerName , ft .innerSender .Name (), queueName , err )
975
+ }
976
+ } else {
977
+ unmarshalDataError = 0
978
+ }
911
979
}
912
980
if backDataContext != nil {
913
981
otherDataContext = append (otherDataContext , backDataContext ... )
@@ -924,6 +992,7 @@ func (ft *FtSender) sendFromQueue(queueName string, readChan <-chan []byte, read
924
992
timer := time .NewTicker (time .Second )
925
993
defer timer .Stop ()
926
994
numWaits := 1
995
+ unmarshalDataError := 0
927
996
var curDataContext , otherDataContext []* datasContext
928
997
var curIdx int
929
998
var backDataContext []* datasContext
@@ -939,8 +1008,14 @@ func (ft *FtSender) sendFromQueue(queueName string, readChan <-chan []byte, read
939
1008
} else {
940
1009
select {
941
1010
case bytes := <- readChan :
942
- backDataContext , err = ft .trySendBytes (bytes , numWaits , isRetry )
1011
+ backDataContext , err = ft .trySendBytes (bytes , numWaits , isRetry , true )
943
1012
case datas := <- readDatasChan :
1013
+ ft .statsMutex .Lock ()
1014
+ ft .stats .FtSendLag = ft .stats .FtSendLag - int64 (len (datas ))
1015
+ if ft .stats .FtSendLag < 0 {
1016
+ ft .stats .FtSendLag = 0
1017
+ }
1018
+ ft .statsMutex .Unlock ()
944
1019
backDataContext , err = ft .backOffSendFromQueue (datas , numWaits , isRetry )
945
1020
case <- timer .C :
946
1021
continue
@@ -956,6 +1031,15 @@ func (ft *FtSender) sendFromQueue(queueName string, readChan <-chan []byte, read
956
1031
if numWaits > 5 {
957
1032
numWaits = 5
958
1033
}
1034
+ if strings .HasPrefix (err .Error (), KeyUnMarshalError ) {
1035
+ unmarshalDataError ++
1036
+ if unmarshalDataError > NumUnMarshalError {
1037
+ time .Sleep (time .Second )
1038
+ log .Errorf ("Runner[%s] Sender[%s] sleep 1s due to unmarshal err" , ft .runnerName , ft .innerSender .Name (), queueName , err )
1039
+ }
1040
+ } else {
1041
+ unmarshalDataError = 0
1042
+ }
959
1043
}
960
1044
if backDataContext != nil {
961
1045
otherDataContext = append (otherDataContext , backDataContext ... )
@@ -993,8 +1077,8 @@ func SplitData(data string) (valArray []string) {
993
1077
valArray = SplitDataWithSplitSize (valArray , data [start :offset ], DefaultSplitSize )
994
1078
if len (valArray ) > 0 {
995
1079
// 最后一个分片参与下次split
996
- start = offset - len (valArray [len (valArray ) - 1 ])
997
- valArray = valArray [:len (valArray ) - 1 ]
1080
+ start = offset - len (valArray [len (valArray )- 1 ])
1081
+ valArray = valArray [:len (valArray )- 1 ]
998
1082
}
999
1083
continue
1000
1084
}
@@ -1017,7 +1101,7 @@ func SplitDataWithSplitSize(originArray []string, data string, splitSize int64)
1017
1101
if len (originArray ) != 0 {
1018
1102
num := (DefaultMaxBatchSize - int64 (len (originArray [len (originArray )- 1 ]))) / splitSize
1019
1103
if num > 0 {
1020
- end := num * splitSize
1104
+ end := num * splitSize
1021
1105
if end > int64 (len (data )) {
1022
1106
end = int64 (len (data ))
1023
1107
}
@@ -1200,3 +1284,34 @@ func (ft *FtSender) backOffReTrySendRaw(lines []string, isRetry bool) (res []*da
1200
1284
time .Sleep (backoff .Duration ())
1201
1285
}
1202
1286
}
1287
+
1288
+ // readLag read lag from file
1289
+ func (ft * FtSender ) readLag () int64 {
1290
+ path := filepath .Join (ft .opt .saveLogPath , LagFilename )
1291
+ f , err := ioutil .ReadFile (path )
1292
+ if err != nil {
1293
+ log .Errorf ("Runner[%v] Sender[%v] read file error : %v" , ft .runnerName , ft .innerSender .Name (), err )
1294
+ return 0
1295
+ }
1296
+ lag , err := strconv .ParseInt (string (f ), 10 , 64 )
1297
+ if err != nil {
1298
+ log .Errorf ("Runner[%v] Sender[%v] parse lag error : %v" , ft .runnerName , ft .innerSender .Name (), err )
1299
+ }
1300
+ return lag
1301
+ }
1302
+
1303
+ // writeLag write lag into file
1304
+ func (ft * FtSender ) writeLag (lag int64 ) error {
1305
+ path := filepath .Join (ft .opt .saveLogPath , LagFilename )
1306
+ file , err := os .OpenFile (path , os .O_WRONLY | os .O_TRUNC | os .O_CREATE , 0666 )
1307
+ defer func () {
1308
+ file .Sync ()
1309
+ file .Close ()
1310
+ }()
1311
+ if err != nil {
1312
+ return err
1313
+ }
1314
+ lagStr := strconv .FormatInt (lag , 10 )
1315
+ _ , err = file .WriteString (lagStr )
1316
+ return err
1317
+ }
0 commit comments