Skip to content

Commit 032875c

Browse files
authored
Merge pull request #59 from fluxcd/go-native-tar
2 parents 6b2556d + 9498e7d commit 032875c

File tree

4 files changed

+111
-35
lines changed

4 files changed

+111
-35
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o source-c
2020

2121
FROM alpine:3.11
2222

23-
RUN apk add --no-cache ca-certificates tar tini 'git>=2.12.0' socat curl bash
23+
RUN apk add --no-cache ca-certificates tini
2424

2525
COPY --from=builder /workspace/source-controller /usr/local/bin/
2626

controllers/gitrepository_controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ func (r *GitRepositoryReconciler) sync(ctx context.Context, repository sourcev1.
186186
defer unlock()
187187

188188
// archive artifact and check integrity
189-
err = r.Storage.Archive(artifact, tmpGit, true)
189+
err = r.Storage.Archive(artifact, tmpGit)
190190
if err != nil {
191191
err = fmt.Errorf("storage archive error: %w", err)
192192
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err

controllers/storage.go

+89-31
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,29 @@ limitations under the License.
1717
package controllers
1818

1919
import (
20-
"context"
20+
"archive/tar"
21+
"bufio"
22+
"compress/gzip"
2123
"crypto/sha1"
2224
"fmt"
25+
"io"
2326
"io/ioutil"
2427
"os"
25-
"os/exec"
2628
"path/filepath"
2729
"strings"
2830
"time"
2931

32+
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
3033
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3134

3235
sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1"
3336
"github.com/fluxcd/source-controller/internal/lockedfile"
3437
)
3538

3639
const (
37-
excludeFile = ".sourceignore"
38-
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
39-
defaultExcludes = "jpg,jpeg,gif,png,wmv,flv,tar.gz,zip"
40+
excludeFile = ".sourceignore"
41+
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
42+
excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,.*flv,.*tar.gz,*.zip"
4043
)
4144

4245
// Storage manages artifacts
@@ -120,44 +123,74 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
120123

121124
// Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific
122125
// files and directories, or any of the excludes defined in the excludeFiles.
123-
func (s *Storage) Archive(artifact sourcev1.Artifact, dir string, integrityCheck bool) error {
124-
ctx, cancel := context.WithTimeout(context.Background(), s.Timeout)
125-
defer cancel()
126-
127-
var tarExcludes []string
128-
if _, err := os.Stat(filepath.Join(dir, excludeFile)); !os.IsNotExist(err) {
129-
tarExcludes = append(tarExcludes, "--exclude-file="+excludeFile)
130-
} else {
131-
tarExcludes = append(tarExcludes, fmt.Sprintf("--exclude=\\*.{%s}", defaultExcludes))
126+
func (s *Storage) Archive(artifact sourcev1.Artifact, dir string) error {
127+
if _, err := os.Stat(dir); err != nil {
128+
return err
132129
}
133-
for _, excl := range strings.Split(excludeVCS, ",") {
134-
tarExcludes = append(tarExcludes, "--exclude="+excl)
130+
131+
ps, err := loadExcludePatterns(dir)
132+
if err != nil {
133+
return err
135134
}
136-
cmd := fmt.Sprintf("cd %s && tar -c %s -f - . | gzip > %s", dir, strings.Join(tarExcludes, " "), artifact.Path)
137-
command := exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
135+
matcher := gitignore.NewMatcher(ps)
138136

139-
err := command.Run()
137+
gzFile, err := os.Create(artifact.Path)
140138
if err != nil {
141-
return fmt.Errorf("command '%s' failed: %w", cmd, err)
139+
return err
142140
}
141+
defer gzFile.Close()
142+
143+
gw := gzip.NewWriter(gzFile)
144+
defer gw.Close()
145+
146+
tw := tar.NewWriter(gw)
147+
defer tw.Close()
143148

144-
if integrityCheck {
145-
cmd = fmt.Sprintf("gunzip -t %s", artifact.Path)
146-
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
147-
err = command.Run()
149+
return filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
148150
if err != nil {
149-
return fmt.Errorf("gzip integrity check failed")
151+
return err
152+
}
153+
154+
// Ignore anything that is not a file (directories, symlinks)
155+
if !fi.Mode().IsRegular() {
156+
return nil
150157
}
151158

152-
cmd = fmt.Sprintf("tar -tzf %s >/dev/null", artifact.Path)
153-
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
154-
err = command.Run()
159+
// Ignore excluded extensions and files
160+
if matcher.Match(strings.Split(p, "/"), false) {
161+
return nil
162+
}
163+
164+
header, err := tar.FileInfoHeader(fi, p)
155165
if err != nil {
156-
return fmt.Errorf("tar integrity check failed")
166+
return err
157167
}
158-
}
168+
// The name needs to be modified to maintain directory structure
169+
// as tar.FileInfoHeader only has access to the base name of the file.
170+
// Ref: https://golang.org/src/archive/tar/common.go?#L626
171+
relFilePath := p
172+
if filepath.IsAbs(dir) {
173+
relFilePath, err = filepath.Rel(dir, p)
174+
if err != nil {
175+
return err
176+
}
177+
}
178+
header.Name = relFilePath
159179

160-
return nil
180+
if err := tw.WriteHeader(header); err != nil {
181+
return err
182+
}
183+
184+
f, err := os.Open(p)
185+
if err != nil {
186+
return err
187+
}
188+
if _, err := io.Copy(tw, f); err != nil {
189+
f.Close()
190+
return err
191+
}
192+
return f.Close()
193+
})
161194
}
162195

163196
// WriteFile writes the given bytes to the artifact path if the checksum differs
@@ -207,3 +240,28 @@ func (s *Storage) Lock(artifact sourcev1.Artifact) (unlock func(), err error) {
207240
mutex := lockedfile.MutexAt(lockFile)
208241
return mutex.Lock()
209242
}
243+
244+
func loadExcludePatterns(dir string) ([]gitignore.Pattern, error) {
245+
path := strings.Split(dir, "/")
246+
var ps []gitignore.Pattern
247+
for _, p := range strings.Split(excludeVCS, ",") {
248+
ps = append(ps, gitignore.ParsePattern(p, path))
249+
}
250+
for _, p := range strings.Split(excludeExt, ",") {
251+
ps = append(ps, gitignore.ParsePattern(p, path))
252+
}
253+
if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil {
254+
defer f.Close()
255+
256+
scanner := bufio.NewScanner(f)
257+
for scanner.Scan() {
258+
s := scanner.Text()
259+
if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 {
260+
ps = append(ps, gitignore.ParsePattern(s, path))
261+
}
262+
}
263+
} else if !os.IsNotExist(err) {
264+
return nil, err
265+
}
266+
return ps, nil
267+
}

docs/spec/v1alpha1/gitrepositories.md

+20-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Git Repositories
22

33
The `GitRepository` API defines a source for artifacts coming from Git. The
4-
resource exposes the latest synchronized state from Git as an artifact in
5-
an archive.
4+
resource exposes the latest synchronized state from Git as an artifact in a
5+
[gzip compressed TAR archive](#artifact).
66

77
## Specification
88

@@ -112,6 +112,24 @@ const (
112112
)
113113
```
114114

115+
## Artifact
116+
117+
The `GitRepository` API defines a source for artifacts coming from Git. The
118+
resource exposes the latest synchronized state from Git as an artifact in a
119+
gzip compressed TAR archive (`<commit hash>.tar.gz`).
120+
121+
### Excluding files
122+
123+
Git files (`.git/`, `.gitignore`, `.gitmodules`, and `.gitattributes`) are
124+
excluded from the archive by default, as well as some extensions (`.jpg, .jpeg,
125+
.gif, .png, .wmv, .flv, .tar.gz, .zip`)
126+
127+
Excluding additional files from the archive is possible by adding a
128+
`.sourceignore` file in the root of the repository. The `.sourceignore` file
129+
follows [the `.gitignore` pattern
130+
format](https://git-scm.com/docs/gitignore#_pattern_format), pattern
131+
entries may overrule default exclusions.
132+
115133
## Spec examples
116134

117135
Pull the master branch of a public repository every minute:

0 commit comments

Comments
 (0)