Skip to content

Commit 02eccf8

Browse files
ethantkoeniglafriks
authored andcommitted
Faster commit lookup (go-gitea#91)
* Faster commit lookup * Fix copy/rename bug * Comment * Use defer
1 parent f9dd682 commit 02eccf8

File tree

4 files changed

+366
-168
lines changed

4 files changed

+366
-168
lines changed

commit_info.go

+307
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
// Copyright 2017 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package git
6+
7+
import (
8+
"bufio"
9+
"context"
10+
"fmt"
11+
"os/exec"
12+
"path"
13+
"runtime"
14+
"strconv"
15+
"strings"
16+
"sync"
17+
"time"
18+
)
19+
20+
const (
21+
// parameters for searching for commit infos. If the untargeted search has
22+
// not found any entries in the past 5 commits, and 12 or fewer entries
23+
// remain, then we'll just let the targeted-searching threads finish off,
24+
// and stop the untargeted search to not interfere.
25+
deferToTargetedSearchColdStreak = 5
26+
deferToTargetedSearchNumRemainingEntries = 12
27+
)
28+
29+
// getCommitsInfoState shared state while getting commit info for entries
30+
type getCommitsInfoState struct {
31+
lock sync.Mutex
32+
/* read-only fields, can be read without the mutex */
33+
// entries and entryPaths are read-only after initialization, so they can
34+
// safely be read without the mutex
35+
entries []*TreeEntry
36+
// set of filepaths to get info for
37+
entryPaths map[string]struct{}
38+
treePath string
39+
headCommit *Commit
40+
41+
/* mutable fields, must hold mutex to read or write */
42+
// map from filepath to commit
43+
commits map[string]*Commit
44+
// set of filepaths that have been or are being searched for in a target search
45+
targetedPaths map[string]struct{}
46+
}
47+
48+
func (state *getCommitsInfoState) numRemainingEntries() int {
49+
state.lock.Lock()
50+
defer state.lock.Unlock()
51+
return len(state.entries) - len(state.commits)
52+
}
53+
54+
// getTargetEntryPath Returns the next path for a targeted-searching thread to
55+
// search for, or returns the empty string if nothing left to search for
56+
func (state *getCommitsInfoState) getTargetedEntryPath() string {
57+
var targetedEntryPath string
58+
state.lock.Lock()
59+
defer state.lock.Unlock()
60+
for _, entry := range state.entries {
61+
entryPath := path.Join(state.treePath, entry.Name())
62+
if _, ok := state.commits[entryPath]; ok {
63+
continue
64+
} else if _, ok = state.targetedPaths[entryPath]; ok {
65+
continue
66+
}
67+
targetedEntryPath = entryPath
68+
state.targetedPaths[entryPath] = struct{}{}
69+
break
70+
}
71+
return targetedEntryPath
72+
}
73+
74+
// repeatedly perform targeted searches for unpopulated entries
75+
func targetedSearch(state *getCommitsInfoState, done chan error) {
76+
for {
77+
entryPath := state.getTargetedEntryPath()
78+
if len(entryPath) == 0 {
79+
done <- nil
80+
return
81+
}
82+
command := NewCommand("rev-list", "-1", "HEAD", "--", entryPath)
83+
output, err := command.RunInDir(state.headCommit.repo.Path)
84+
if err != nil {
85+
done <- err
86+
return
87+
}
88+
id, err := NewIDFromString(strings.TrimSpace(output))
89+
if err != nil {
90+
done <- err
91+
return
92+
}
93+
commit, err := state.headCommit.repo.getCommit(id)
94+
if err != nil {
95+
done <- err
96+
return
97+
}
98+
state.update(entryPath, commit)
99+
}
100+
}
101+
102+
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
103+
entryPaths := make(map[string]struct{}, len(entries))
104+
for _, entry := range entries {
105+
entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
106+
}
107+
if treePath = path.Clean(treePath); treePath == "." {
108+
treePath = ""
109+
}
110+
return &getCommitsInfoState{
111+
entries: entries,
112+
entryPaths: entryPaths,
113+
commits: make(map[string]*Commit, len(entries)),
114+
targetedPaths: make(map[string]struct{}, len(entries)),
115+
treePath: treePath,
116+
headCommit: headCommit,
117+
}
118+
}
119+
120+
// GetCommitsInfo gets information of all commits that are corresponding to these entries
121+
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
122+
state := initGetCommitInfoState(tes, commit, treePath)
123+
if err := getCommitsInfo(state); err != nil {
124+
return nil, err
125+
}
126+
if len(state.commits) < len(state.entryPaths) {
127+
return nil, fmt.Errorf("could not find commits for all entries")
128+
}
129+
130+
commitsInfo := make([][]interface{}, len(tes))
131+
for i, entry := range tes {
132+
commit, ok := state.commits[path.Join(treePath, entry.Name())]
133+
if !ok {
134+
return nil, fmt.Errorf("could not find commit for %s", entry.Name())
135+
}
136+
switch entry.Type {
137+
case ObjectCommit:
138+
subModuleURL := ""
139+
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
140+
return nil, err
141+
} else if subModule != nil {
142+
subModuleURL = subModule.URL
143+
}
144+
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
145+
commitsInfo[i] = []interface{}{entry, subModuleFile}
146+
default:
147+
commitsInfo[i] = []interface{}{entry, commit}
148+
}
149+
}
150+
return commitsInfo, nil
151+
}
152+
153+
func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
154+
if rawEntryPath[0] == '"' {
155+
var err error
156+
rawEntryPath, err = strconv.Unquote(rawEntryPath)
157+
if err != nil {
158+
return rawEntryPath, err
159+
}
160+
}
161+
var entryNameStartIndex int
162+
if len(state.treePath) > 0 {
163+
entryNameStartIndex = len(state.treePath) + 1
164+
}
165+
166+
if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
167+
return rawEntryPath[:entryNameStartIndex+index], nil
168+
}
169+
return rawEntryPath, nil
170+
}
171+
172+
// update report that the given path was last modified by the given commit.
173+
// Returns whether state.commits was updated
174+
func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
175+
if _, ok := state.entryPaths[entryPath]; !ok {
176+
return false
177+
}
178+
179+
var updated bool
180+
state.lock.Lock()
181+
defer state.lock.Unlock()
182+
if _, ok := state.commits[entryPath]; !ok {
183+
state.commits[entryPath] = commit
184+
updated = true
185+
}
186+
return updated
187+
}
188+
189+
const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
190+
191+
func getCommitsInfo(state *getCommitsInfoState) error {
192+
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
193+
defer cancel()
194+
195+
args := []string{"log", getCommitsInfoPretty, "--name-status", "-c"}
196+
if len(state.treePath) > 0 {
197+
args = append(args, "--", state.treePath)
198+
}
199+
cmd := exec.CommandContext(ctx, "git", args...)
200+
cmd.Dir = state.headCommit.repo.Path
201+
202+
readCloser, err := cmd.StdoutPipe()
203+
if err != nil {
204+
return err
205+
}
206+
207+
if err := cmd.Start(); err != nil {
208+
return err
209+
}
210+
211+
numThreads := runtime.NumCPU()
212+
done := make(chan error, numThreads)
213+
for i := 0; i < numThreads; i++ {
214+
go targetedSearch(state, done)
215+
}
216+
217+
scanner := bufio.NewScanner(readCloser)
218+
err = state.processGitLogOutput(scanner)
219+
for i := 0; i < numThreads; i++ {
220+
doneErr := <-done
221+
if doneErr != nil && err == nil {
222+
err = doneErr
223+
}
224+
}
225+
return err
226+
}
227+
228+
func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
229+
// keep a local cache of seen paths to avoid acquiring a lock for paths
230+
// we've already seen
231+
seenPaths := make(map[string]struct{}, len(state.entryPaths))
232+
// number of consecutive commits without any finds
233+
coldStreak := 0
234+
var commit *Commit
235+
var err error
236+
for scanner.Scan() {
237+
line := scanner.Text()
238+
if len(line) == 0 { // in-between commits
239+
numRemainingEntries := state.numRemainingEntries()
240+
if numRemainingEntries == 0 {
241+
break
242+
}
243+
if coldStreak >= deferToTargetedSearchColdStreak &&
244+
numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
245+
// stop this untargeted search, and let the targeted-search threads
246+
// finish the work
247+
break
248+
}
249+
continue
250+
}
251+
if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
252+
// look for the last tab, since for copies (C) and renames (R) two
253+
// filenames are printed: src, then dest
254+
tabIndex := strings.LastIndexByte(line, '\t')
255+
if tabIndex < 1 {
256+
return fmt.Errorf("misformatted line: %s", line)
257+
}
258+
entryPath, err := state.cleanEntryPath(line[tabIndex+1:])
259+
if err != nil {
260+
return err
261+
}
262+
if _, ok := seenPaths[entryPath]; !ok {
263+
if state.update(entryPath, commit) {
264+
coldStreak = 0
265+
}
266+
seenPaths[entryPath] = struct{}{}
267+
}
268+
continue
269+
}
270+
271+
// a new commit
272+
commit, err = parseCommitInfo(line)
273+
if err != nil {
274+
return err
275+
}
276+
coldStreak++
277+
}
278+
return scanner.Err()
279+
}
280+
281+
// parseCommitInfo parse a commit from a line of `git log` output. Expects the
282+
// line to be formatted according to getCommitsInfoPretty.
283+
func parseCommitInfo(line string) (*Commit, error) {
284+
if len(line) < 43 {
285+
return nil, fmt.Errorf("invalid git output: %s", line)
286+
}
287+
ref, err := NewIDFromString(line[:40])
288+
if err != nil {
289+
return nil, err
290+
}
291+
spaceIndex := strings.IndexByte(line[41:], ' ')
292+
if spaceIndex < 0 {
293+
return nil, fmt.Errorf("invalid git output: %s", line)
294+
}
295+
unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
296+
if err != nil {
297+
return nil, err
298+
}
299+
message := line[spaceIndex+42:]
300+
return &Commit{
301+
ID: ref,
302+
CommitMessage: message,
303+
Committer: &Signature{
304+
When: time.Unix(int64(unixSeconds), 0),
305+
},
306+
}, nil
307+
}

commit_info_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package git
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
"time"
8+
)
9+
10+
const benchmarkReposDir = "benchmark/repos/"
11+
12+
func setupGitRepo(url string, name string) (string, error) {
13+
repoDir := filepath.Join(benchmarkReposDir, name)
14+
if _, err := os.Stat(repoDir); err == nil {
15+
return repoDir, nil
16+
}
17+
return repoDir, Clone(url, repoDir, CloneRepoOptions{
18+
Mirror: false,
19+
Bare: false,
20+
Quiet: true,
21+
Timeout: 5 * time.Minute,
22+
})
23+
}
24+
25+
func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
26+
benchmarks := []struct {
27+
url string
28+
name string
29+
}{
30+
{url: "https://github.com/go-gitea/gitea.git", name: "gitea"},
31+
{url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"},
32+
{url: "https://github.com/moby/moby.git", name: "moby"},
33+
{url: "https://github.com/golang/go.git", name: "go"},
34+
{url: "https://github.com/torvalds/linux.git", name: "linux"},
35+
}
36+
for _, benchmark := range benchmarks {
37+
var commit *Commit
38+
var entries Entries
39+
if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil {
40+
b.Fatal(err)
41+
} else if repo, err := OpenRepository(repoPath); err != nil {
42+
b.Fatal(err)
43+
} else if commit, err = repo.GetBranchCommit("master"); err != nil {
44+
b.Fatal(err)
45+
} else if entries, err = commit.Tree.ListEntries(); err != nil {
46+
b.Fatal(err)
47+
}
48+
entries.Sort()
49+
b.ResetTimer()
50+
b.Run(benchmark.name, func(b *testing.B) {
51+
for i := 0; i < b.N; i++ {
52+
_, err := entries.GetCommitsInfo(commit, "")
53+
if err != nil {
54+
b.Fatal(err)
55+
}
56+
}
57+
})
58+
}
59+
}

0 commit comments

Comments
 (0)