Skip to content

Commit d71f595

Browse files
kevinburkebradfitz
authored andcommitted
maintner: start adding issue bodies to the corpus
When we get an issue from Github, add the issue and associated details to the Corpus. Check whether the `Updated` timestamp on an issue is newer than the one we have in the Corpus; if the timestamp is the same, we can stop processing since we are sorting by the updated date and no older issues will have new data. If the mutation changes the Corpus, add it to an array of Mutations that we will eventually flush to disk. Adds a helper function to convert a github.Issue into a maintpb.Mutation - this way we can reuse the logic for adding data to the Corpus. Adds some more tests around data processing. Change-Id: I9475e293941784179420ca7b638917943be1f29e Reviewed-on: https://go-review.googlesource.com/37490 Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent c36309f commit d71f595

File tree

2 files changed

+270
-29
lines changed

2 files changed

+270
-29
lines changed

maintner/maintner.go

Lines changed: 137 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ package maintner
1212

1313
import (
1414
"context"
15+
"errors"
1516
"fmt"
1617
"io/ioutil"
1718
"log"
1819
"strings"
1920
"sync"
2021
"time"
2122

23+
"github.com/golang/protobuf/ptypes"
2224
"github.com/google/go-github/github"
2325
"golang.org/x/build/maintner/maintpb"
2426
"golang.org/x/oauth2"
@@ -75,6 +77,24 @@ func (c *Corpus) AddGithub(owner, repo, tokenFile string) {
7577
// such as "golang/go".
7678
type githubRepo string
7779

80+
// Org finds "golang" in the githubRepo string "golang/go", or returns an empty
81+
// string if it is malformed.
82+
func (gr githubRepo) Org() string {
83+
sep := strings.IndexByte(string(gr), '/')
84+
if sep == -1 {
85+
return ""
86+
}
87+
return string(gr[:sep])
88+
}
89+
90+
func (gr githubRepo) Repo() string {
91+
sep := strings.IndexByte(string(gr), '/')
92+
if sep == -1 || sep == len(gr)-1 {
93+
return ""
94+
}
95+
return string(gr[sep+1:])
96+
}
97+
7898
// githubUser represents a github user.
7999
// It is a subset of https://developer.github.com/v3/users/#get-a-single-user
80100
type githubUser struct {
@@ -169,7 +189,80 @@ func (c *Corpus) getGithubUser(pu *maintpb.GithubUser) *githubUser {
169189
return u
170190
}
171191

172-
func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) {
192+
var errNoChanges = errors.New("No changes in this github.Issue")
193+
194+
// newMutationFromIssue generates a GithubIssueMutation using the smallest
195+
// possible diff between ci (a corpus Issue) and gi (an external github issue).
196+
//
197+
// If newMutationFromIssue returns nil, the provided github.Issue is no newer
198+
// than the data we have in the corpus. ci may be nil.
199+
func newMutationFromIssue(ci *githubIssue, gi *github.Issue, rp githubRepo) *maintpb.GithubIssueMutation {
200+
if gi == nil || gi.Number == nil {
201+
panic(fmt.Sprintf("github issue with nil number: %#v", gi))
202+
}
203+
owner, repo := rp.Org(), rp.Repo()
204+
// always need these fields to figure out which key to write to
205+
m := &maintpb.GithubIssueMutation{
206+
Owner: owner,
207+
Repo: repo,
208+
Number: int32(*gi.Number),
209+
}
210+
if ci == nil {
211+
// We don't know about this github issue, so populate all fields in one
212+
// mutation.
213+
if gi.CreatedAt != nil {
214+
tproto, err := ptypes.TimestampProto(*gi.CreatedAt)
215+
if err != nil {
216+
panic(err)
217+
}
218+
m.Created = tproto
219+
}
220+
if gi.UpdatedAt != nil {
221+
tproto, err := ptypes.TimestampProto(*gi.UpdatedAt)
222+
if err != nil {
223+
panic(err)
224+
}
225+
m.Updated = tproto
226+
}
227+
if gi.Body != nil {
228+
m.Body = *gi.Body
229+
}
230+
return m
231+
}
232+
if gi.UpdatedAt != nil {
233+
if gi.UpdatedAt.Before(ci.Updated) {
234+
// This data is stale, ignore it.
235+
return nil
236+
}
237+
tproto, err := ptypes.TimestampProto(*gi.UpdatedAt)
238+
if err != nil {
239+
panic(err)
240+
}
241+
m.Updated = tproto
242+
}
243+
if gi.Body != nil && *gi.Body != ci.Body {
244+
m.Body = *gi.Body
245+
}
246+
return m
247+
}
248+
249+
// getIssue finds an issue in the Corpus or returns nil, false if it is not
250+
// present.
251+
func (c *Corpus) getIssue(rp githubRepo, number int32) (*githubIssue, bool) {
252+
issueMap, ok := c.githubIssues[rp]
253+
if !ok {
254+
return nil, false
255+
}
256+
gi, ok := issueMap[number]
257+
return gi, ok
258+
}
259+
260+
// processGithubIssueMutation updates the corpus with the information in m, and
261+
// returns true if the Corpus was modified.
262+
func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) (changed bool) {
263+
if c == nil {
264+
panic("nil corpus")
265+
}
173266
k := c.repoKey(m.Owner, m.Repo)
174267
if k == "" {
175268
// TODO: errors? return false? skip for now.
@@ -188,16 +281,39 @@ func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) {
188281
}
189282
gi, ok := issueMap[m.Number]
190283
if !ok {
284+
created, err := ptypes.Timestamp(m.Created)
285+
if err != nil {
286+
panic(err)
287+
}
191288
gi = &githubIssue{
192-
Number: m.Number,
193-
User: c.getGithubUser(m.User),
289+
Number: m.Number,
290+
User: c.getGithubUser(m.User),
291+
Created: created,
194292
}
195293
issueMap[m.Number] = gi
294+
changed = true
295+
}
296+
// Check Updated before all other fields so they don't update if this
297+
// Mutation is stale
298+
if m.Updated != nil {
299+
updated, err := ptypes.Timestamp(m.Updated)
300+
if err != nil {
301+
panic(err)
302+
}
303+
if !updated.IsZero() && updated.Before(gi.Updated) {
304+
// this mutation represents data older than the data we have in
305+
// the corpus; ignore it.
306+
return false
307+
}
308+
gi.Updated = updated
309+
changed = changed || updated.After(gi.Updated)
196310
}
197311
if m.Body != "" {
198312
gi.Body = m.Body
313+
changed = changed || m.Body != gi.Body
199314
}
200-
// TODO: times, etc.
315+
// ignoring Created since it *should* never update
316+
return changed
201317
}
202318

203319
// PopulateFromServer populates the corpus from a maintnerd server.
@@ -257,14 +373,16 @@ func (c *Corpus) pollGithub(ctx context.Context, rp githubRepo, ghc *github.Clie
257373
log.Printf("Polling github for %s ...", rp)
258374
page := 1
259375
keepGoing := true
260-
splits := strings.SplitN(string(rp), "/", 2)
261-
owner, repo := splits[0], splits[1]
376+
owner, repo := rp.Org(), rp.Repo()
262377
for keepGoing {
263378
// TODO: use https://godoc.org/github.com/google/go-github/github#ActivityService.ListIssueEventsForRepository probably
264-
issues, res, err := ghc.Issues.ListByRepo(context.TODO(), owner, repo, &github.IssueListByRepoOptions{
379+
issues, _, err := ghc.Issues.ListByRepo(ctx, owner, repo, &github.IssueListByRepoOptions{
265380
State: "all",
266381
Sort: "updated",
267382
Direction: "desc",
383+
// TODO: if an issue gets updated while we are paging, we might
384+
// process the same issue twice - as item 100 on page 1 and then
385+
// again as item 1 on page 2.
268386
ListOptions: github.ListOptions{
269387
Page: page,
270388
PerPage: 100,
@@ -273,15 +391,22 @@ func (c *Corpus) pollGithub(ctx context.Context, rp githubRepo, ghc *github.Clie
273391
if err != nil {
274392
return err
275393
}
276-
log.Printf("github %s/%s: page %d, num issues %d, res: %#v", owner, repo, page, len(issues), res)
277-
keepGoing = false
394+
log.Printf("github %s/%s: page %d, num issues %d", owner, repo, page, len(issues))
395+
if len(issues) == 0 {
396+
break
397+
}
398+
c.mu.Lock()
278399
for _, is := range issues {
279400
fmt.Printf("issue %d: %s\n", is.ID, *is.Title)
280-
changes := false
281-
if changes {
282-
keepGoing = true
401+
gi, _ := c.getIssue(rp, int32(*is.Number))
402+
mp := newMutationFromIssue(gi, is, rp)
403+
if mp == nil {
404+
keepGoing = false
405+
break
283406
}
407+
c.processGithubIssueMutation(mp)
284408
}
409+
c.mu.Unlock()
285410
page++
286411
}
287412
return nil

maintner/maintner_test.go

Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ package maintner
77
import (
88
"reflect"
99
"testing"
10+
"time"
1011

12+
"github.com/golang/protobuf/ptypes"
13+
google_protobuf "github.com/golang/protobuf/ptypes/timestamp"
14+
"github.com/google/go-github/github"
1115
"golang.org/x/build/maintner/maintpb"
1216
)
1317

@@ -18,6 +22,9 @@ type mutationTest struct {
1822

1923
func (mt mutationTest) test(t *testing.T, muts ...*maintpb.Mutation) {
2024
c := mt.corpus
25+
if c == nil {
26+
c = NewCorpus()
27+
}
2128
for _, m := range muts {
2229
c.processMutationLocked(m)
2330
}
@@ -26,26 +33,71 @@ func (mt mutationTest) test(t *testing.T, muts ...*maintpb.Mutation) {
2633
}
2734
}
2835

36+
var t1, t2 time.Time
37+
var tp1, tp2 *google_protobuf.Timestamp
38+
39+
func init() {
40+
t1, _ = time.Parse(time.RFC3339, "2016-01-02T15:04:00Z")
41+
t2, _ = time.Parse(time.RFC3339, "2016-01-02T15:30:00Z")
42+
tp1, _ = ptypes.TimestampProto(t1)
43+
tp2, _ = ptypes.TimestampProto(t2)
44+
}
45+
2946
func TestProcessMutation_Github_NewIssue(t *testing.T) {
30-
mutationTest{
31-
want: &Corpus{
32-
githubUsers: map[int64]*githubUser{
33-
100: &githubUser{
34-
Login: "gopherbot",
35-
ID: 100,
36-
},
47+
c := NewCorpus()
48+
c.githubUsers = map[int64]*githubUser{
49+
100: &githubUser{
50+
Login: "gopherbot",
51+
ID: 100,
52+
},
53+
}
54+
c.githubIssues = map[githubRepo]map[int32]*githubIssue{
55+
"golang/go": map[int32]*githubIssue{
56+
3: &githubIssue{
57+
Number: 3,
58+
User: &githubUser{ID: 100, Login: "gopherbot"},
59+
Body: "some body",
60+
Created: t1,
3761
},
38-
githubIssues: map[githubRepo]map[int32]*githubIssue{
39-
"golang/go": map[int32]*githubIssue{
40-
3: &githubIssue{
41-
Number: 3,
42-
User: &githubUser{ID: 100, Login: "gopherbot"},
43-
Body: "some body",
44-
},
45-
},
62+
},
63+
}
64+
mutationTest{want: c}.test(t, &maintpb.Mutation{
65+
GithubIssue: &maintpb.GithubIssueMutation{
66+
Owner: "golang",
67+
Repo: "go",
68+
Number: 3,
69+
User: &maintpb.GithubUser{
70+
Login: "gopherbot",
71+
Id: 100,
4672
},
73+
Body: "some body",
74+
Created: tp1,
4775
},
48-
}.test(t, &maintpb.Mutation{
76+
})
77+
}
78+
79+
func TestProcessMutation_OldIssue(t *testing.T) {
80+
// process a mutation with an Updated timestamp older than the existing
81+
// issue.
82+
c := NewCorpus()
83+
c.githubUsers = map[int64]*githubUser{
84+
100: &githubUser{
85+
Login: "gopherbot",
86+
ID: 100,
87+
},
88+
}
89+
c.githubIssues = map[githubRepo]map[int32]*githubIssue{
90+
"golang/go": map[int32]*githubIssue{
91+
3: &githubIssue{
92+
Number: 3,
93+
User: &githubUser{ID: 100, Login: "gopherbot"},
94+
Body: "some body",
95+
Created: t2,
96+
Updated: t2,
97+
},
98+
},
99+
}
100+
mutationTest{want: c}.test(t, &maintpb.Mutation{
49101
GithubIssue: &maintpb.GithubIssueMutation{
50102
Owner: "golang",
51103
Repo: "go",
@@ -54,7 +106,71 @@ func TestProcessMutation_Github_NewIssue(t *testing.T) {
54106
Login: "gopherbot",
55107
Id: 100,
56108
},
57-
Body: "some body",
109+
Body: "some body",
110+
Created: tp2,
111+
Updated: tp2,
112+
},
113+
}, &maintpb.Mutation{
114+
// The second issue is older than the first.
115+
GithubIssue: &maintpb.GithubIssueMutation{
116+
Owner: "golang",
117+
Repo: "go",
118+
Number: 3,
119+
User: &maintpb.GithubUser{
120+
Login: "gopherbot",
121+
Id: 100,
122+
},
123+
Body: "issue body changed",
124+
Created: tp1,
125+
Updated: tp1,
58126
},
59127
})
60128
}
129+
130+
func TestNewMutationsFromIssue(t *testing.T) {
131+
gh := &github.Issue{
132+
Number: github.Int(5),
133+
CreatedAt: &t1,
134+
UpdatedAt: &t2,
135+
Body: github.String("body of the issue"),
136+
State: github.String("closed"),
137+
}
138+
is := newMutationFromIssue(nil, gh, githubRepo("golang/go"))
139+
want := &maintpb.GithubIssueMutation{
140+
Owner: "golang",
141+
Repo: "go",
142+
Number: 5,
143+
Body: "body of the issue",
144+
Created: tp1,
145+
Updated: tp2,
146+
}
147+
if !reflect.DeepEqual(is, want) {
148+
t.Errorf("issue mismatch\n got: %#v\nwant: %#v", is, want)
149+
}
150+
}
151+
152+
func TestGithubRepoOrg(t *testing.T) {
153+
gr := githubRepo("golang/go")
154+
want := "golang"
155+
if org := gr.Org(); org != want {
156+
t.Errorf("githubRepo(\"%s\").Org(): got %s, want %s", gr, org, want)
157+
}
158+
gr = githubRepo("unknown format")
159+
want = ""
160+
if org := gr.Org(); org != want {
161+
t.Errorf("githubRepo(\"%s\").Org(): got %s, want %s", gr, org, want)
162+
}
163+
}
164+
165+
func TestGithubRepo(t *testing.T) {
166+
gr := githubRepo("golang/go")
167+
want := "go"
168+
if repo := gr.Repo(); repo != want {
169+
t.Errorf("githubRepo(\"%s\").Repo(): got %s, want %s", gr, repo, want)
170+
}
171+
gr = githubRepo("bad/")
172+
want = ""
173+
if repo := gr.Repo(); repo != want {
174+
t.Errorf("githubRepo(\"%s\").Repo(): got %s, want %s", gr, repo, want)
175+
}
176+
}

0 commit comments

Comments
 (0)