Skip to content

Commit ecbb03d

Browse files
authored
Improve testing and try to fix MySQL hanging (#32515)
By some CI fine tunes (`run tests`), SQLite & MSSQL could complete in about 12~13 minutes (before > 14), MySQL could complete in 18 minutes (before: about 23 or even > 30) Major changes: 1. use tmpfs for MySQL storage 1. run `make test-mysql` instead of `make integration-test-coverage` because the code coverage is not really used at the moment. 1. refactor testlogger to make it more reliable and be able to report stuck stacktrace 1. do not requeue failed items when a queue is being flushed (failed items would keep failing and make flush uncompleted) 1. reduce the file sizes for testing 1. use math ChaCha20 random data instead of crypot/rand (for testing purpose only) 1. no need to `DeleteRepository` in `TestLinguist` 1. other related refactoring to make code easier to maintain
1 parent a0c0cb3 commit ecbb03d

File tree

15 files changed

+197
-242
lines changed

15 files changed

+197
-242
lines changed

.github/workflows/pull-db-tests.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,15 @@ jobs:
154154
runs-on: ubuntu-latest
155155
services:
156156
mysql:
157-
image: mysql:8.0
157+
# the bitnami mysql image has more options than the official one, it's easier to customize
158+
image: bitnami/mysql:8.0
158159
env:
159-
MYSQL_ALLOW_EMPTY_PASSWORD: true
160+
ALLOW_EMPTY_PASSWORD: true
160161
MYSQL_DATABASE: testgitea
161162
ports:
162163
- "3306:3306"
164+
options: >-
165+
--mount type=tmpfs,destination=/bitnami/mysql/data
163166
elasticsearch:
164167
image: elasticsearch:7.5.0
165168
env:
@@ -188,7 +191,8 @@ jobs:
188191
- name: run migration tests
189192
run: make test-mysql-migration
190193
- name: run tests
191-
run: make integration-test-coverage
194+
# run: make integration-test-coverage (at the moment, no coverage is really handled)
195+
run: make test-mysql
192196
env:
193197
TAGS: bindata
194198
RACE_ENABLED: true

models/migrations/base/tests.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"code.gitea.io/gitea/models/unittest"
1616
"code.gitea.io/gitea/modules/base"
1717
"code.gitea.io/gitea/modules/git"
18-
"code.gitea.io/gitea/modules/log"
1918
"code.gitea.io/gitea/modules/setting"
2019
"code.gitea.io/gitea/modules/testlogger"
2120

@@ -91,21 +90,19 @@ func PrepareTestEnv(t *testing.T, skip int, syncModels ...any) (*xorm.Engine, fu
9190
}
9291

9392
func MainTest(m *testing.M) {
94-
log.RegisterEventWriter("test", testlogger.NewTestLoggerWriter)
93+
testlogger.Init()
9594

9695
giteaRoot := base.SetupGiteaRoot()
9796
if giteaRoot == "" {
98-
fmt.Println("Environment variable $GITEA_ROOT not set")
99-
os.Exit(1)
97+
testlogger.Fatalf("Environment variable $GITEA_ROOT not set\n")
10098
}
10199
giteaBinary := "gitea"
102100
if runtime.GOOS == "windows" {
103101
giteaBinary += ".exe"
104102
}
105103
setting.AppPath = filepath.Join(giteaRoot, giteaBinary)
106104
if _, err := os.Stat(setting.AppPath); err != nil {
107-
fmt.Printf("Could not find gitea binary at %s\n", setting.AppPath)
108-
os.Exit(1)
105+
testlogger.Fatalf("Could not find gitea binary at %s\n", setting.AppPath)
109106
}
110107

111108
giteaConf := os.Getenv("GITEA_CONF")
@@ -122,17 +119,15 @@ func MainTest(m *testing.M) {
122119

123120
tmpDataPath, err := os.MkdirTemp("", "data")
124121
if err != nil {
125-
fmt.Printf("Unable to create temporary data path %v\n", err)
126-
os.Exit(1)
122+
testlogger.Fatalf("Unable to create temporary data path %v\n", err)
127123
}
128124

129125
setting.CustomPath = filepath.Join(setting.AppWorkPath, "custom")
130126
setting.AppDataPath = tmpDataPath
131127

132128
unittest.InitSettings()
133129
if err = git.InitFull(context.Background()); err != nil {
134-
fmt.Printf("Unable to InitFull: %v\n", err)
135-
os.Exit(1)
130+
testlogger.Fatalf("Unable to InitFull: %v\n", err)
136131
}
137132
setting.LoadDBSetting()
138133
setting.InitLoggersForTest()

modules/log/color.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,19 @@ type ColoredValue struct {
8686
colors []ColorAttribute
8787
}
8888

89+
var _ fmt.Formatter = (*ColoredValue)(nil)
90+
8991
func (c *ColoredValue) Format(f fmt.State, verb rune) {
9092
_, _ = f.Write(ColorBytes(c.colors...))
9193
s := fmt.Sprintf(fmt.FormatString(f, verb), c.v)
9294
_, _ = f.Write([]byte(s))
9395
_, _ = f.Write(resetBytes)
9496
}
9597

98+
func (c *ColoredValue) Value() any {
99+
return c.v
100+
}
101+
96102
func NewColoredValue(v any, color ...ColorAttribute) *ColoredValue {
97103
return &ColoredValue{v: v, colors: color}
98104
}

modules/queue/workergroup.go

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ var (
2323
)
2424

2525
func init() {
26-
unhandledItemRequeueDuration.Store(int64(5 * time.Second))
26+
unhandledItemRequeueDuration.Store(int64(time.Second))
2727
}
2828

2929
// workerGroup is a group of workers to work with a WorkerPoolQueue
@@ -104,7 +104,12 @@ func (q *WorkerPoolQueue[T]) doWorkerHandle(batch []T) {
104104
// if none of the items were handled, it should back-off for a few seconds
105105
// in this case the handler (eg: document indexer) may have encountered some errors/failures
106106
if len(unhandled) == len(batch) && unhandledItemRequeueDuration.Load() != 0 {
107+
if q.isFlushing.Load() {
108+
return // do not requeue items when flushing, since all items failed, requeue them will continue failing.
109+
}
107110
log.Error("Queue %q failed to handle batch of %d items, backoff for a few seconds", q.GetName(), len(batch))
111+
// TODO: ideally it shouldn't "sleep" here (blocks the worker, then blocks flush).
112+
// It could debounce the requeue operation, and try to requeue the items in the future.
108113
select {
109114
case <-q.ctxRun.Done():
110115
case <-time.After(time.Duration(unhandledItemRequeueDuration.Load())):
@@ -193,6 +198,9 @@ func (q *WorkerPoolQueue[T]) doStartNewWorker(wp *workerGroup[T]) {
193198
// doFlush flushes the queue: it tries to read all items from the queue and handles them.
194199
// It is for testing purpose only. It's not designed to work for a cluster.
195200
func (q *WorkerPoolQueue[T]) doFlush(wg *workerGroup[T], flush flushType) {
201+
q.isFlushing.Store(true)
202+
defer q.isFlushing.Store(false)
203+
196204
log.Debug("Queue %q starts flushing", q.GetName())
197205
defer log.Debug("Queue %q finishes flushing", q.GetName())
198206

@@ -236,6 +244,9 @@ loop:
236244
emptyCounter := 0
237245
for {
238246
select {
247+
case <-q.ctxRun.Done():
248+
log.Debug("Queue %q is shutting down", q.GetName())
249+
return
239250
case data, dataOk := <-wg.popItemChan:
240251
if !dataOk {
241252
return
@@ -251,9 +262,6 @@ loop:
251262
log.Error("Failed to pop item from queue %q (doFlush): %v", q.GetName(), err)
252263
}
253264
return
254-
case <-q.ctxRun.Done():
255-
log.Debug("Queue %q is shutting down", q.GetName())
256-
return
257265
case <-time.After(20 * time.Millisecond):
258266
// There is no reliable way to make sure all queue items are consumed by the Flush, there always might be some items stored in some buffers/temp variables.
259267
// If we run Gitea in a cluster, we can even not guarantee all items are consumed in a deterministic instance.
@@ -331,6 +339,15 @@ func (q *WorkerPoolQueue[T]) doRun() {
331339
var batchDispatchC <-chan time.Time = infiniteTimerC
332340
for {
333341
select {
342+
case flush := <-q.flushChan:
343+
// before flushing, it needs to try to dispatch the batch to worker first, in case there is no worker running
344+
// after the flushing, there is at least one worker running, so "doFlush" could wait for workers to finish
345+
// since we are already in a "flush" operation, so the dispatching function shouldn't read the flush chan.
346+
q.doDispatchBatchToWorker(wg, skipFlushChan)
347+
q.doFlush(wg, flush)
348+
case <-q.ctxRun.Done():
349+
log.Debug("Queue %q is shutting down", q.GetName())
350+
return
334351
case data, dataOk := <-wg.popItemChan:
335352
if !dataOk {
336353
return
@@ -349,20 +366,11 @@ func (q *WorkerPoolQueue[T]) doRun() {
349366
case <-batchDispatchC:
350367
batchDispatchC = infiniteTimerC
351368
q.doDispatchBatchToWorker(wg, q.flushChan)
352-
case flush := <-q.flushChan:
353-
// before flushing, it needs to try to dispatch the batch to worker first, in case there is no worker running
354-
// after the flushing, there is at least one worker running, so "doFlush" could wait for workers to finish
355-
// since we are already in a "flush" operation, so the dispatching function shouldn't read the flush chan.
356-
q.doDispatchBatchToWorker(wg, skipFlushChan)
357-
q.doFlush(wg, flush)
358369
case err := <-wg.popItemErr:
359370
if !q.isCtxRunCanceled() {
360371
log.Error("Failed to pop item from queue %q (doRun): %v", q.GetName(), err)
361372
}
362373
return
363-
case <-q.ctxRun.Done():
364-
log.Debug("Queue %q is shutting down", q.GetName())
365-
return
366374
}
367375
}
368376
}

modules/queue/workerqueue.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ type WorkerPoolQueue[T any] struct {
3232
baseConfig *BaseConfig
3333
baseQueue baseQueue
3434

35-
batchChan chan []T
36-
flushChan chan flushType
35+
batchChan chan []T
36+
flushChan chan flushType
37+
isFlushing atomic.Bool
3738

3839
batchLength int
3940
workerNum int

modules/testlogger/testlogger.go

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ import (
1919
)
2020

2121
var (
22-
prefix string
23-
SlowTest = 10 * time.Second
24-
SlowFlush = 5 * time.Second
22+
prefix string
23+
TestTimeout = 10 * time.Minute
24+
TestSlowRun = 10 * time.Second
25+
TestSlowFlush = 1 * time.Second
2526
)
2627

2728
var WriterCloser = &testLoggerWriterCloser{}
@@ -89,79 +90,97 @@ func (w *testLoggerWriterCloser) Reset() {
8990
w.Unlock()
9091
}
9192

93+
// Printf takes a format and args and prints the string to os.Stdout
94+
func Printf(format string, args ...any) {
95+
if !log.CanColorStdout {
96+
for i := 0; i < len(args); i++ {
97+
if c, ok := args[i].(*log.ColoredValue); ok {
98+
args[i] = c.Value()
99+
}
100+
}
101+
}
102+
_, _ = fmt.Fprintf(os.Stdout, format, args...)
103+
}
104+
92105
// PrintCurrentTest prints the current test to os.Stdout
93106
func PrintCurrentTest(t testing.TB, skip ...int) func() {
94107
t.Helper()
95-
start := time.Now()
108+
runStart := time.Now()
96109
actualSkip := util.OptionalArg(skip) + 1
97110
_, filename, line, _ := runtime.Caller(actualSkip)
98111

99-
if log.CanColorStdout {
100-
_, _ = fmt.Fprintf(os.Stdout, "=== %s (%s:%d)\n", fmt.Formatter(log.NewColoredValue(t.Name())), strings.TrimPrefix(filename, prefix), line)
101-
} else {
102-
_, _ = fmt.Fprintf(os.Stdout, "=== %s (%s:%d)\n", t.Name(), strings.TrimPrefix(filename, prefix), line)
103-
}
112+
Printf("=== %s (%s:%d)\n", log.NewColoredValue(t.Name()), strings.TrimPrefix(filename, prefix), line)
113+
104114
WriterCloser.pushT(t)
105-
return func() {
106-
took := time.Since(start)
107-
if took > SlowTest {
108-
if log.CanColorStdout {
109-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s is a slow test (took %v)\n", fmt.Formatter(log.NewColoredValue(t.Name(), log.Bold, log.FgYellow)), fmt.Formatter(log.NewColoredValue(took, log.Bold, log.FgYellow)))
110-
} else {
111-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s is a slow test (took %v)\n", t.Name(), took)
115+
timeoutChecker := time.AfterFunc(TestTimeout, func() {
116+
l := 128 * 1024
117+
var stack []byte
118+
for {
119+
stack = make([]byte, l)
120+
n := runtime.Stack(stack, true)
121+
if n <= l {
122+
stack = stack[:n]
123+
break
112124
}
125+
l = n
113126
}
114-
timer := time.AfterFunc(SlowFlush, func() {
115-
if log.CanColorStdout {
116-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s ... still flushing after %v ...\n", fmt.Formatter(log.NewColoredValue(t.Name(), log.Bold, log.FgRed)), SlowFlush)
117-
} else {
118-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s ... still flushing after %v ...\n", t.Name(), SlowFlush)
119-
}
127+
Printf("!!! %s ... timeout: %v ... stacktrace:\n%s\n\n", log.NewColoredValue(t.Name(), log.Bold, log.FgRed), TestTimeout, string(stack))
128+
})
129+
return func() {
130+
flushStart := time.Now()
131+
slowFlushChecker := time.AfterFunc(TestSlowFlush, func() {
132+
Printf("+++ %s ... still flushing after %v ...\n", log.NewColoredValue(t.Name(), log.Bold, log.FgRed), TestSlowFlush)
120133
})
121134
if err := queue.GetManager().FlushAll(context.Background(), -1); err != nil {
122135
t.Errorf("Flushing queues failed with error %v", err)
123136
}
124-
timer.Stop()
125-
flushTook := time.Since(start) - took
126-
if flushTook > SlowFlush {
127-
if log.CanColorStdout {
128-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s had a slow clean-up flush (took %v)\n", fmt.Formatter(log.NewColoredValue(t.Name(), log.Bold, log.FgRed)), fmt.Formatter(log.NewColoredValue(flushTook, log.Bold, log.FgRed)))
129-
} else {
130-
_, _ = fmt.Fprintf(os.Stdout, "+++ %s had a slow clean-up flush (took %v)\n", t.Name(), flushTook)
131-
}
132-
}
133-
WriterCloser.popT()
134-
}
135-
}
137+
slowFlushChecker.Stop()
138+
timeoutChecker.Stop()
136139

137-
// Printf takes a format and args and prints the string to os.Stdout
138-
func Printf(format string, args ...any) {
139-
if log.CanColorStdout {
140-
for i := 0; i < len(args); i++ {
141-
args[i] = log.NewColoredValue(args[i])
140+
runDuration := time.Since(runStart)
141+
flushDuration := time.Since(flushStart)
142+
if runDuration > TestSlowRun {
143+
Printf("+++ %s is a slow test (run: %v, flush: %v)\n", log.NewColoredValue(t.Name(), log.Bold, log.FgYellow), runDuration, flushDuration)
142144
}
145+
WriterCloser.popT()
143146
}
144-
_, _ = fmt.Fprintf(os.Stdout, "\t"+format, args...)
145147
}
146148

147149
// TestLogEventWriter is a logger which will write to the testing log
148150
type TestLogEventWriter struct {
149151
*log.EventWriterBaseImpl
150152
}
151153

152-
// NewTestLoggerWriter creates a TestLogEventWriter as a log.LoggerProvider
153-
func NewTestLoggerWriter(name string, mode log.WriterMode) log.EventWriter {
154+
// newTestLoggerWriter creates a TestLogEventWriter as a log.LoggerProvider
155+
func newTestLoggerWriter(name string, mode log.WriterMode) log.EventWriter {
154156
w := &TestLogEventWriter{}
155157
w.EventWriterBaseImpl = log.NewEventWriterBase(name, "test-log-writer", mode)
156158
w.OutputWriteCloser = WriterCloser
157159
return w
158160
}
159161

160-
func init() {
162+
func Init() {
161163
const relFilePath = "modules/testlogger/testlogger.go"
162164
_, filename, _, _ := runtime.Caller(0)
163165
if !strings.HasSuffix(filename, relFilePath) {
164166
panic("source code file path doesn't match expected: " + relFilePath)
165167
}
166168
prefix = strings.TrimSuffix(filename, relFilePath)
169+
170+
log.RegisterEventWriter("test", newTestLoggerWriter)
171+
172+
duration, err := time.ParseDuration(os.Getenv("GITEA_TEST_SLOW_RUN"))
173+
if err == nil && duration > 0 {
174+
TestSlowRun = duration
175+
}
176+
177+
duration, err = time.ParseDuration(os.Getenv("GITEA_TEST_SLOW_FLUSH"))
178+
if err == nil && duration > 0 {
179+
TestSlowFlush = duration
180+
}
181+
}
182+
183+
func Fatalf(format string, args ...any) {
184+
Printf(format+"\n", args...)
185+
os.Exit(1)
167186
}

tests/integration/README.md

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,8 @@ We appreciate that some testing machines may not be very powerful and
9999
the default timeouts for declaring a slow test or a slow clean-up flush
100100
may not be appropriate.
101101

102-
You can either:
103-
104-
* Within the test ini file set the following section:
105-
106-
```ini
107-
[integration-tests]
108-
SLOW_TEST = 10s ; 10s is the default value
109-
SLOW_FLUSH = 5S ; 5s is the default value
110-
```
111-
112-
* Set the following environment variables:
102+
You can set the following environment variables:
113103

114104
```bash
115-
GITEA_SLOW_TEST_TIME="10s" GITEA_SLOW_FLUSH_TIME="5s" make test-sqlite
105+
GITEA_TEST_SLOW_RUN="10s" GITEA_TEST_SLOW_FLUSH="1s" make test-sqlite
116106
```

tests/integration/api_repo_file_get_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ func TestAPIGetRawFileOrLFS(t *testing.T) {
3939

4040
t.Run("Partial Clone", doPartialGitClone(dstPath2, u))
4141

42-
lfs := lfsCommitAndPushTest(t, dstPath, littleSize)[0]
42+
lfs := lfsCommitAndPushTest(t, dstPath, testFileSizeSmall)[0]
4343

4444
reqLFS := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/"+lfs)
4545
respLFS := MakeRequestNilResponseRecorder(t, reqLFS, http.StatusOK)
46-
assert.Equal(t, littleSize, respLFS.Length)
46+
assert.Equal(t, testFileSizeSmall, respLFS.Length)
4747

4848
doAPIDeleteRepository(httpContext)
4949
})

0 commit comments

Comments
 (0)