|
| 1 | +// Copyright 2017 The Gitea Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a MIT-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package git |
| 6 | + |
| 7 | +import ( |
| 8 | + "bufio" |
| 9 | + "context" |
| 10 | + "fmt" |
| 11 | + "os/exec" |
| 12 | + "path" |
| 13 | + "runtime" |
| 14 | + "strconv" |
| 15 | + "strings" |
| 16 | + "sync" |
| 17 | + "time" |
| 18 | +) |
| 19 | + |
| 20 | +const ( |
| 21 | + // parameters for searching for commit infos. If the untargeted search has |
| 22 | + // not found any entries in the past 5 commits, and 12 or fewer entries |
| 23 | + // remain, then we'll just let the targeted-searching threads finish off, |
| 24 | + // and stop the untargeted search to not interfere. |
| 25 | + deferToTargetedSearchColdStreak = 5 |
| 26 | + deferToTargetedSearchNumRemainingEntries = 12 |
| 27 | +) |
| 28 | + |
| 29 | +// getCommitsInfoState shared state while getting commit info for entries |
| 30 | +type getCommitsInfoState struct { |
| 31 | + lock sync.Mutex |
| 32 | + /* read-only fields, can be read without the mutex */ |
| 33 | + // entries and entryPaths are read-only after initialization, so they can |
| 34 | + // safely be read without the mutex |
| 35 | + entries []*TreeEntry |
| 36 | + // set of filepaths to get info for |
| 37 | + entryPaths map[string]struct{} |
| 38 | + treePath string |
| 39 | + headCommit *Commit |
| 40 | + |
| 41 | + /* mutable fields, must hold mutex to read or write */ |
| 42 | + // map from filepath to commit |
| 43 | + commits map[string]*Commit |
| 44 | + // set of filepaths that have been or are being searched for in a target search |
| 45 | + targetedPaths map[string]struct{} |
| 46 | +} |
| 47 | + |
| 48 | +func (state *getCommitsInfoState) numRemainingEntries() int { |
| 49 | + state.lock.Lock() |
| 50 | + defer state.lock.Unlock() |
| 51 | + return len(state.entries) - len(state.commits) |
| 52 | +} |
| 53 | + |
| 54 | +// getTargetEntryPath Returns the next path for a targeted-searching thread to |
| 55 | +// search for, or returns the empty string if nothing left to search for |
| 56 | +func (state *getCommitsInfoState) getTargetedEntryPath() string { |
| 57 | + var targetedEntryPath string |
| 58 | + state.lock.Lock() |
| 59 | + defer state.lock.Unlock() |
| 60 | + for _, entry := range state.entries { |
| 61 | + entryPath := path.Join(state.treePath, entry.Name()) |
| 62 | + if _, ok := state.commits[entryPath]; ok { |
| 63 | + continue |
| 64 | + } else if _, ok = state.targetedPaths[entryPath]; ok { |
| 65 | + continue |
| 66 | + } |
| 67 | + targetedEntryPath = entryPath |
| 68 | + state.targetedPaths[entryPath] = struct{}{} |
| 69 | + break |
| 70 | + } |
| 71 | + return targetedEntryPath |
| 72 | +} |
| 73 | + |
| 74 | +// repeatedly perform targeted searches for unpopulated entries |
| 75 | +func targetedSearch(state *getCommitsInfoState, done chan error) { |
| 76 | + for { |
| 77 | + entryPath := state.getTargetedEntryPath() |
| 78 | + if len(entryPath) == 0 { |
| 79 | + done <- nil |
| 80 | + return |
| 81 | + } |
| 82 | + command := NewCommand("rev-list", "-1", "HEAD", "--", entryPath) |
| 83 | + output, err := command.RunInDir(state.headCommit.repo.Path) |
| 84 | + if err != nil { |
| 85 | + done <- err |
| 86 | + return |
| 87 | + } |
| 88 | + id, err := NewIDFromString(strings.TrimSpace(output)) |
| 89 | + if err != nil { |
| 90 | + done <- err |
| 91 | + return |
| 92 | + } |
| 93 | + commit, err := state.headCommit.repo.getCommit(id) |
| 94 | + if err != nil { |
| 95 | + done <- err |
| 96 | + return |
| 97 | + } |
| 98 | + state.update(entryPath, commit) |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState { |
| 103 | + entryPaths := make(map[string]struct{}, len(entries)) |
| 104 | + for _, entry := range entries { |
| 105 | + entryPaths[path.Join(treePath, entry.Name())] = struct{}{} |
| 106 | + } |
| 107 | + if treePath = path.Clean(treePath); treePath == "." { |
| 108 | + treePath = "" |
| 109 | + } |
| 110 | + return &getCommitsInfoState{ |
| 111 | + entries: entries, |
| 112 | + entryPaths: entryPaths, |
| 113 | + commits: make(map[string]*Commit, len(entries)), |
| 114 | + targetedPaths: make(map[string]struct{}, len(entries)), |
| 115 | + treePath: treePath, |
| 116 | + headCommit: headCommit, |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +// GetCommitsInfo gets information of all commits that are corresponding to these entries |
| 121 | +func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) { |
| 122 | + state := initGetCommitInfoState(tes, commit, treePath) |
| 123 | + if err := getCommitsInfo(state); err != nil { |
| 124 | + return nil, err |
| 125 | + } |
| 126 | + if len(state.commits) < len(state.entryPaths) { |
| 127 | + return nil, fmt.Errorf("could not find commits for all entries") |
| 128 | + } |
| 129 | + |
| 130 | + commitsInfo := make([][]interface{}, len(tes)) |
| 131 | + for i, entry := range tes { |
| 132 | + commit, ok := state.commits[path.Join(treePath, entry.Name())] |
| 133 | + if !ok { |
| 134 | + return nil, fmt.Errorf("could not find commit for %s", entry.Name()) |
| 135 | + } |
| 136 | + switch entry.Type { |
| 137 | + case ObjectCommit: |
| 138 | + subModuleURL := "" |
| 139 | + if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil { |
| 140 | + return nil, err |
| 141 | + } else if subModule != nil { |
| 142 | + subModuleURL = subModule.URL |
| 143 | + } |
| 144 | + subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String()) |
| 145 | + commitsInfo[i] = []interface{}{entry, subModuleFile} |
| 146 | + default: |
| 147 | + commitsInfo[i] = []interface{}{entry, commit} |
| 148 | + } |
| 149 | + } |
| 150 | + return commitsInfo, nil |
| 151 | +} |
| 152 | + |
| 153 | +func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) { |
| 154 | + if rawEntryPath[0] == '"' { |
| 155 | + var err error |
| 156 | + rawEntryPath, err = strconv.Unquote(rawEntryPath) |
| 157 | + if err != nil { |
| 158 | + return rawEntryPath, err |
| 159 | + } |
| 160 | + } |
| 161 | + var entryNameStartIndex int |
| 162 | + if len(state.treePath) > 0 { |
| 163 | + entryNameStartIndex = len(state.treePath) + 1 |
| 164 | + } |
| 165 | + |
| 166 | + if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 { |
| 167 | + return rawEntryPath[:entryNameStartIndex+index], nil |
| 168 | + } |
| 169 | + return rawEntryPath, nil |
| 170 | +} |
| 171 | + |
| 172 | +// update report that the given path was last modified by the given commit. |
| 173 | +// Returns whether state.commits was updated |
| 174 | +func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool { |
| 175 | + if _, ok := state.entryPaths[entryPath]; !ok { |
| 176 | + return false |
| 177 | + } |
| 178 | + |
| 179 | + var updated bool |
| 180 | + state.lock.Lock() |
| 181 | + defer state.lock.Unlock() |
| 182 | + if _, ok := state.commits[entryPath]; !ok { |
| 183 | + state.commits[entryPath] = commit |
| 184 | + updated = true |
| 185 | + } |
| 186 | + return updated |
| 187 | +} |
| 188 | + |
| 189 | +const getCommitsInfoPretty = "--pretty=format:%H %ct %s" |
| 190 | + |
| 191 | +func getCommitsInfo(state *getCommitsInfoState) error { |
| 192 | + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) |
| 193 | + defer cancel() |
| 194 | + |
| 195 | + args := []string{"log", getCommitsInfoPretty, "--name-status", "-c"} |
| 196 | + if len(state.treePath) > 0 { |
| 197 | + args = append(args, "--", state.treePath) |
| 198 | + } |
| 199 | + cmd := exec.CommandContext(ctx, "git", args...) |
| 200 | + cmd.Dir = state.headCommit.repo.Path |
| 201 | + |
| 202 | + readCloser, err := cmd.StdoutPipe() |
| 203 | + if err != nil { |
| 204 | + return err |
| 205 | + } |
| 206 | + |
| 207 | + if err := cmd.Start(); err != nil { |
| 208 | + return err |
| 209 | + } |
| 210 | + |
| 211 | + numThreads := runtime.NumCPU() |
| 212 | + done := make(chan error, numThreads) |
| 213 | + for i := 0; i < numThreads; i++ { |
| 214 | + go targetedSearch(state, done) |
| 215 | + } |
| 216 | + |
| 217 | + scanner := bufio.NewScanner(readCloser) |
| 218 | + err = state.processGitLogOutput(scanner) |
| 219 | + for i := 0; i < numThreads; i++ { |
| 220 | + doneErr := <-done |
| 221 | + if doneErr != nil && err == nil { |
| 222 | + err = doneErr |
| 223 | + } |
| 224 | + } |
| 225 | + return err |
| 226 | +} |
| 227 | + |
| 228 | +func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error { |
| 229 | + // keep a local cache of seen paths to avoid acquiring a lock for paths |
| 230 | + // we've already seen |
| 231 | + seenPaths := make(map[string]struct{}, len(state.entryPaths)) |
| 232 | + // number of consecutive commits without any finds |
| 233 | + coldStreak := 0 |
| 234 | + var commit *Commit |
| 235 | + var err error |
| 236 | + for scanner.Scan() { |
| 237 | + line := scanner.Text() |
| 238 | + if len(line) == 0 { // in-between commits |
| 239 | + numRemainingEntries := state.numRemainingEntries() |
| 240 | + if numRemainingEntries == 0 { |
| 241 | + break |
| 242 | + } |
| 243 | + if coldStreak >= deferToTargetedSearchColdStreak && |
| 244 | + numRemainingEntries <= deferToTargetedSearchNumRemainingEntries { |
| 245 | + // stop this untargeted search, and let the targeted-search threads |
| 246 | + // finish the work |
| 247 | + break |
| 248 | + } |
| 249 | + continue |
| 250 | + } |
| 251 | + if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit |
| 252 | + // look for the last tab, since for copies (C) and renames (R) two |
| 253 | + // filenames are printed: src, then dest |
| 254 | + tabIndex := strings.LastIndexByte(line, '\t') |
| 255 | + if tabIndex < 1 { |
| 256 | + return fmt.Errorf("misformatted line: %s", line) |
| 257 | + } |
| 258 | + entryPath, err := state.cleanEntryPath(line[tabIndex+1:]) |
| 259 | + if err != nil { |
| 260 | + return err |
| 261 | + } |
| 262 | + if _, ok := seenPaths[entryPath]; !ok { |
| 263 | + if state.update(entryPath, commit) { |
| 264 | + coldStreak = 0 |
| 265 | + } |
| 266 | + seenPaths[entryPath] = struct{}{} |
| 267 | + } |
| 268 | + continue |
| 269 | + } |
| 270 | + |
| 271 | + // a new commit |
| 272 | + commit, err = parseCommitInfo(line) |
| 273 | + if err != nil { |
| 274 | + return err |
| 275 | + } |
| 276 | + coldStreak++ |
| 277 | + } |
| 278 | + return scanner.Err() |
| 279 | +} |
| 280 | + |
| 281 | +// parseCommitInfo parse a commit from a line of `git log` output. Expects the |
| 282 | +// line to be formatted according to getCommitsInfoPretty. |
| 283 | +func parseCommitInfo(line string) (*Commit, error) { |
| 284 | + if len(line) < 43 { |
| 285 | + return nil, fmt.Errorf("invalid git output: %s", line) |
| 286 | + } |
| 287 | + ref, err := NewIDFromString(line[:40]) |
| 288 | + if err != nil { |
| 289 | + return nil, err |
| 290 | + } |
| 291 | + spaceIndex := strings.IndexByte(line[41:], ' ') |
| 292 | + if spaceIndex < 0 { |
| 293 | + return nil, fmt.Errorf("invalid git output: %s", line) |
| 294 | + } |
| 295 | + unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex]) |
| 296 | + if err != nil { |
| 297 | + return nil, err |
| 298 | + } |
| 299 | + message := line[spaceIndex+42:] |
| 300 | + return &Commit{ |
| 301 | + ID: ref, |
| 302 | + CommitMessage: message, |
| 303 | + Committer: &Signature{ |
| 304 | + When: time.Unix(int64(unixSeconds), 0), |
| 305 | + }, |
| 306 | + }, nil |
| 307 | +} |
0 commit comments