Skip to content

Commit 8dc1017

Browse files
committed
Merge remote-tracking branch 'upstream/main'
* upstream/main: Add doctor command for full GC of LFS (go-gitea#21978) remove silentcode from MAINTAINERS (go-gitea#22143)
2 parents bc43db4 + 651fe4b commit 8dc1017

File tree

6 files changed

+245
-40
lines changed

6 files changed

+245
-40
lines changed

MAINTAINERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ Janis Estelmann <[email protected]> (@KN4CK3R)
4444
Steven Kriegler <[email protected]> (@justusbunsi)
4545
Jimmy Praet <[email protected]> (@jpraet)
4646
Leon Hofmeister <[email protected]> (@delvh)
47-
silentcode <[email protected]> (@silentcodeg)
4847
Wim <[email protected]> (@42wim)
4948
xinyu <[email protected]> (@penlinux)
5049
Jason Song <[email protected]> (@wolfogre)

models/git/lfs.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package git
66
import (
77
"context"
88
"fmt"
9+
"time"
910

1011
"code.gitea.io/gitea/models/db"
1112
"code.gitea.io/gitea/models/perm"
@@ -14,6 +15,7 @@ import (
1415
user_model "code.gitea.io/gitea/models/user"
1516
"code.gitea.io/gitea/modules/lfs"
1617
"code.gitea.io/gitea/modules/log"
18+
"code.gitea.io/gitea/modules/setting"
1719
"code.gitea.io/gitea/modules/timeutil"
1820
"code.gitea.io/gitea/modules/util"
1921

@@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) {
180182
// RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID.
181183
// It may return ErrLFSObjectNotExist or a database error.
182184
func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
185+
return RemoveLFSMetaObjectByOidFn(repoID, oid, nil)
186+
}
187+
188+
// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID.
189+
// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction
190+
func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) {
183191
if len(oid) == 0 {
184192
return 0, ErrLFSObjectNotExist
185193
}
@@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
200208
return count, err
201209
}
202210

211+
if fn != nil {
212+
if err := fn(count); err != nil {
213+
return count, err
214+
}
215+
}
216+
203217
return count, committer.Commit()
204218
}
205219

@@ -319,3 +333,43 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
319333
}
320334
return lfsSize, nil
321335
}
336+
337+
type IterateLFSMetaObjectsForRepoOptions struct {
338+
OlderThan time.Time
339+
}
340+
341+
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
342+
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
343+
var start int
344+
batchSize := setting.Database.IterateBufferSize
345+
engine := db.GetEngine(ctx)
346+
type CountLFSMetaObject struct {
347+
Count int64
348+
LFSMetaObject
349+
}
350+
351+
for {
352+
beans := make([]*CountLFSMetaObject, 0, batchSize)
353+
// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
354+
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
355+
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
356+
Where("`lfs_meta_object`.repository_id = ?", repoID)
357+
if !opts.OlderThan.IsZero() {
358+
sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
359+
}
360+
sess.GroupBy("`lfs_meta_object`.id")
361+
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
362+
return err
363+
}
364+
if len(beans) == 0 {
365+
return nil
366+
}
367+
start += len(beans)
368+
369+
for _, bean := range beans {
370+
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
371+
return err
372+
}
373+
}
374+
}
375+
}

modules/doctor/lfs.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2022 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package doctor
5+
6+
import (
7+
"context"
8+
"fmt"
9+
10+
"code.gitea.io/gitea/modules/log"
11+
"code.gitea.io/gitea/modules/setting"
12+
"code.gitea.io/gitea/services/repository"
13+
)
14+
15+
func init() {
16+
Register(&Check{
17+
Title: "Garbage collect LFS",
18+
Name: "gc-lfs",
19+
IsDefault: false,
20+
Run: garbageCollectLFSCheck,
21+
AbortIfFailed: false,
22+
SkipDatabaseInitialization: false,
23+
Priority: 1,
24+
})
25+
}
26+
27+
func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error {
28+
if !setting.LFS.StartServer {
29+
return fmt.Errorf("LFS support is disabled")
30+
}
31+
32+
if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
33+
return err
34+
}
35+
36+
return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix)
37+
}

services/cron/tasks_basic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func registerRepoHealthCheck() {
6363
for _, arg := range rhcConfig.Args {
6464
args = append(args, git.CmdArg(arg))
6565
}
66-
return repo_service.GitFsck(ctx, rhcConfig.Timeout, args)
66+
return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args)
6767
})
6868
}
6969

services/repository/check.go

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import (
2222
"xorm.io/builder"
2323
)
2424

25-
// GitFsck calls 'git fsck' to check repository health.
26-
func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
25+
// GitFsckRepos calls 'git fsck' to check repository health.
26+
func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
2727
log.Trace("Doing: GitFsck")
2828

2929
if err := db.Iterate(
@@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
3535
return db.ErrCancelledf("before fsck of %s", repo.FullName())
3636
default:
3737
}
38-
log.Trace("Running health check on repository %v", repo)
39-
repoPath := repo.RepoPath()
40-
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
41-
log.Warn("Failed to health check repository (%v): %v", repo, err)
42-
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
43-
log.Error("CreateRepositoryNotice: %v", err)
44-
}
45-
}
46-
return nil
38+
return GitFsckRepo(ctx, repo, timeout, args)
4739
},
4840
); err != nil {
4941
log.Trace("Error: GitFsck: %v", err)
@@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
5446
return nil
5547
}
5648

49+
// GitFsckRepo calls 'git fsck' to check an individual repository's health.
50+
func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
51+
log.Trace("Running health check on repository %-v", repo)
52+
repoPath := repo.RepoPath()
53+
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
54+
log.Warn("Failed to health check repository (%-v): %v", repo, err)
55+
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
56+
log.Error("CreateRepositoryNotice: %v", err)
57+
}
58+
}
59+
return nil
60+
}
61+
5762
// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
5863
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
5964
log.Trace("Doing: GitGcRepos")
@@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
6873
return db.ErrCancelledf("before GC of %s", repo.FullName())
6974
default:
7075
}
71-
log.Trace("Running git gc on %v", repo)
72-
command := git.NewCommand(ctx, args...).
73-
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
74-
var stdout string
75-
var err error
76-
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
77-
78-
if err != nil {
79-
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
80-
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
81-
if err = system_model.CreateRepositoryNotice(desc); err != nil {
82-
log.Error("CreateRepositoryNotice: %v", err)
83-
}
84-
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
85-
}
86-
87-
// Now update the size of the repository
88-
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
89-
log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
90-
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
91-
if err = system_model.CreateRepositoryNotice(desc); err != nil {
92-
log.Error("CreateRepositoryNotice: %v", err)
93-
}
94-
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
95-
}
96-
97-
return nil
76+
return GitGcRepo(ctx, repo, timeout, args)
9877
},
9978
); err != nil {
10079
return err
@@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
10483
return nil
10584
}
10685

86+
// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
87+
func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
88+
log.Trace("Running git gc on %-v", repo)
89+
command := git.NewCommand(ctx, args...).
90+
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
91+
var stdout string
92+
var err error
93+
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
94+
95+
if err != nil {
96+
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
97+
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
98+
if err = system_model.CreateRepositoryNotice(desc); err != nil {
99+
log.Error("CreateRepositoryNotice: %v", err)
100+
}
101+
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
102+
}
103+
104+
// Now update the size of the repository
105+
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
106+
log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
107+
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
108+
if err = system_model.CreateRepositoryNotice(desc); err != nil {
109+
log.Error("CreateRepositoryNotice: %v", err)
110+
}
111+
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
112+
}
113+
114+
return nil
115+
}
116+
107117
func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
108118
repos := make([]*repo_model.Repository, 0, 10)
109119
if err := db.Iterate(

services/repository/lfs.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2022 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package repository
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"time"
10+
11+
"code.gitea.io/gitea/models/db"
12+
git_model "code.gitea.io/gitea/models/git"
13+
repo_model "code.gitea.io/gitea/models/repo"
14+
"code.gitea.io/gitea/modules/git"
15+
"code.gitea.io/gitea/modules/lfs"
16+
"code.gitea.io/gitea/modules/log"
17+
18+
"xorm.io/builder"
19+
)
20+
21+
func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
22+
log.Trace("Doing: GarbageCollectLFSMetaObjects")
23+
24+
if err := db.Iterate(
25+
ctx,
26+
builder.And(builder.Gt{"id": 0}),
27+
func(ctx context.Context, repo *repo_model.Repository) error {
28+
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
29+
},
30+
); err != nil {
31+
return err
32+
}
33+
34+
log.Trace("Finished: GarbageCollectLFSMetaObjects")
35+
return nil
36+
}
37+
38+
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
39+
if logger != nil {
40+
logger.Info("Checking %-v", repo)
41+
}
42+
total, orphaned, collected, deleted := 0, 0, 0, 0
43+
if logger != nil {
44+
defer func() {
45+
if orphaned == 0 {
46+
logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
47+
} else if !autofix {
48+
logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
49+
} else {
50+
logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
51+
}
52+
}()
53+
}
54+
55+
gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
56+
if err != nil {
57+
log.Error("Unable to open git repository %-v: %v", repo, err)
58+
return err
59+
}
60+
defer gitRepo.Close()
61+
62+
store := lfs.NewContentStore()
63+
64+
return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
65+
total++
66+
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
67+
68+
if gitRepo.IsObjectExist(pointerSha.String()) {
69+
return nil
70+
}
71+
orphaned++
72+
73+
if !autofix {
74+
return nil
75+
}
76+
// Non-existent pointer file
77+
_, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error {
78+
if count > 0 {
79+
return nil
80+
}
81+
82+
if err := store.Delete(metaObject.RelativePath()); err != nil {
83+
log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
84+
}
85+
deleted++
86+
return nil
87+
})
88+
if err != nil {
89+
return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
90+
}
91+
collected++
92+
93+
return nil
94+
}, &git_model.IterateLFSMetaObjectsForRepoOptions{
95+
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
96+
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
97+
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
98+
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
99+
// objects.
100+
//
101+
// It is likely that a week is potentially excessive but it should definitely be enough that any
102+
// unassociated LFS object is genuinely unassociated.
103+
OlderThan: time.Now().Add(-24 * 7 * time.Hour),
104+
})
105+
}

0 commit comments

Comments
 (0)