Skip to content

Commit f3e9ef0

Browse files
committed
Make Storage#Archive file filtering configurable
This commit makes the filtering applied during the archiving configurable by introducing an optional `ArchiveFileFilter` callback argument and a `SourceIgnoreFilter` implementation. `SourceIgnoreFilter` filters out files matching sourceignore.VCSPatterns and any of the provided patterns. If an empty gitignore.Pattern slice is given, the matcher is set to sourceignore.NewDefaultMatcher. The `GitRepository` now loads the ignore patterns before archiving the repository contents by calling `sourceignore.LoadIgnorePatterns` and other helpers. The loading behavior is **breaking** as `.sourceignore` files in the (subdirectories of the) repository are now still taken into account if `spec.ignore` for a resource is defined, overwriting is still possible by creating an overwriting rule in the `spec.ignore` of the resource. This change also makes it possible for the `BucketReconciler` to not configure a callback at all and prevent looking for ignore matches twice. To finalize the bucket refactor, a change to the reconciler has been made to look for a `.sourceignore` file in the root of the bucket to provide an additional way of configuring (global) exclusions. Signed-off-by: Hidde Beydals <hello@hidde.co>
1 parent 804b04a commit f3e9ef0

File tree

7 files changed

+562
-253
lines changed

7 files changed

+562
-253
lines changed

controllers/bucket_controller.go

+19-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ limitations under the License.
1717
package controllers
1818

1919
import (
20-
"bytes"
2120
"context"
2221
"crypto/sha1"
2322
"fmt"
@@ -204,7 +203,23 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
204203
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
205204
}
206205

207-
ps := sourceignore.GetPatterns(bytes.NewBufferString(*bucket.Spec.Ignore), nil)
206+
// Look for file with ignore rules first
207+
// NB: S3 has flat filepath keys making it impossible to look
208+
// for files in "subdirectories" without building up a tree first.
209+
path := filepath.Join(tempDir, sourceignore.IgnoreFile)
210+
if err := s3Client.FGetObject(ctxTimeout, bucket.Spec.BucketName, sourceignore.IgnoreFile, path, minio.GetObjectOptions{}); err != nil {
211+
if resp, ok := err.(minio.ErrorResponse); ok && resp.Code != "NoSuchKey" {
212+
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
213+
}
214+
}
215+
ps, err := sourceignore.ReadIgnoreFile(path, nil)
216+
if err != nil {
217+
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
218+
}
219+
// In-spec patterns take precedence
220+
if bucket.Spec.Ignore != nil {
221+
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*bucket.Spec.Ignore), nil)...)
222+
}
208223
matcher := sourceignore.NewMatcher(ps)
209224

210225
// download bucket content
@@ -217,7 +232,7 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
217232
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
218233
}
219234

220-
if strings.HasSuffix(object.Key, "/") {
235+
if strings.HasSuffix(object.Key, "/") || object.Key == sourceignore.IgnoreFile {
221236
continue
222237
}
223238

@@ -264,7 +279,7 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
264279
defer unlock()
265280

266281
// archive artifact and check integrity
267-
if err := r.Storage.Archive(&artifact, tempDir, bucket.Spec.Ignore); err != nil {
282+
if err := r.Storage.Archive(&artifact, tempDir, nil); err != nil {
268283
err = fmt.Errorf("storage archive error: %w", err)
269284
return sourcev1.BucketNotReady(bucket, sourcev1.StorageOperationFailedReason, err.Error()), err
270285
}

controllers/gitrepository_controller.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"io/ioutil"
2323
"os"
24+
"strings"
2425
"time"
2526

2627
"github.com/go-logr/logr"
@@ -45,6 +46,7 @@ import (
4546
sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
4647
"github.com/fluxcd/source-controller/pkg/git"
4748
"github.com/fluxcd/source-controller/pkg/git/strategy"
49+
"github.com/fluxcd/source-controller/pkg/sourceignore"
4850
)
4951

5052
// +kubebuilder:rbac:groups=source.toolkit.fluxcd.io,resources=gitrepositories,verbs=get;list;watch;create;update;patch;delete
@@ -270,7 +272,15 @@ func (r *GitRepositoryReconciler) reconcile(ctx context.Context, repository sour
270272
defer unlock()
271273

272274
// archive artifact and check integrity
273-
if err := r.Storage.Archive(&artifact, tmpGit, repository.Spec.Ignore); err != nil {
275+
ps, err := sourceignore.LoadIgnorePatterns(tmpGit, nil)
276+
if err != nil {
277+
err = fmt.Errorf(".sourceignore error: %w", err)
278+
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
279+
}
280+
if repository.Spec.Ignore != nil {
281+
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*repository.Spec.Ignore), nil)...)
282+
}
283+
if err := r.Storage.Archive(&artifact, tmpGit, SourceIgnoreFilter(ps, nil)); err != nil {
274284
err = fmt.Errorf("storage archive error: %w", err)
275285
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
276286
}

controllers/storage.go

+61-60
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,6 @@ import (
4040
"github.com/fluxcd/source-controller/pkg/sourceignore"
4141
)
4242

43-
const (
44-
excludeFile = ".sourceignore"
45-
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
46-
excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,*.flv,*.tar.gz,*.zip"
47-
excludeCI = ".github/,.circleci/,.travis.yml,.gitlab-ci.yml,appveyor.yml,.drone.yml,cloudbuild.yaml,codeship-services.yml,codeship-steps.yml"
48-
excludeExtra = "**/.goreleaser.yml,**/.sops.yaml,**/.flux.yaml"
49-
)
50-
5143
// Storage manages artifacts
5244
type Storage struct {
5345
// BasePath is the local directory path where the source artifacts are stored.
@@ -150,19 +142,35 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
150142
return fi.Mode().IsRegular()
151143
}
152144

153-
// Archive atomically archives the given directory as a tarball to the given v1beta1.Artifact
154-
// path, excluding any VCS specific files and directories, or any of the excludes defined in
155-
// the excludeFiles. If successful, it sets the checksum and last update time on the artifact.
156-
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *string) (err error) {
157-
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
158-
return fmt.Errorf("invalid dir path: %s", dir)
145+
// ArchiveFileFilter must return true if a file should not be included
146+
// in the archive after inspecting the given path and/or os.FileInfo.
147+
type ArchiveFileFilter func(p string, fi os.FileInfo) bool
148+
149+
// SourceIgnoreFilter returns an ArchiveFileFilter that filters out
150+
// files matching sourceignore.VCSPatterns and any of the provided
151+
// patterns. If an empty gitignore.Pattern slice is given, the matcher
152+
// is set to sourceignore.NewDefaultMatcher.
153+
func SourceIgnoreFilter(ps []gitignore.Pattern, domain []string) ArchiveFileFilter {
154+
matcher := sourceignore.NewDefaultMatcher(ps, domain)
155+
if len(ps) > 0 {
156+
ps = append(sourceignore.VCSPatterns(domain), ps...)
157+
matcher = sourceignore.NewMatcher(ps)
158+
}
159+
return func(p string, fi os.FileInfo) bool {
160+
// The directory is always false as the archiver does already skip
161+
// directories.
162+
return matcher.Match(strings.Split(p, string(filepath.Separator)), false)
159163
}
164+
}
160165

161-
ps, err := sourceignore.LoadExcludePatterns(dir, ignore)
162-
if err != nil {
163-
return err
166+
// Archive atomically archives the given directory as a tarball to the
167+
// given v1beta1.Artifact path, excluding directories and any
168+
// ArchiveFileFilter matches. If successful, it sets the checksum and
169+
// last update time on the artifact.
170+
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, filter ArchiveFileFilter) (err error) {
171+
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
172+
return fmt.Errorf("invalid dir path: %s", dir)
164173
}
165-
matcher := sourceignore.NewMatcher(ps)
166174

167175
localPath := s.LocalPath(*artifact)
168176
tf, err := ioutil.TempFile(filepath.Split(localPath))
@@ -181,43 +189,7 @@ func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *strin
181189

182190
gw := gzip.NewWriter(mw)
183191
tw := tar.NewWriter(gw)
184-
if err := writeToArchiveExcludeMatches(dir, matcher, tw); err != nil {
185-
tw.Close()
186-
gw.Close()
187-
tf.Close()
188-
return err
189-
}
190-
191-
if err := tw.Close(); err != nil {
192-
gw.Close()
193-
tf.Close()
194-
return err
195-
}
196-
if err := gw.Close(); err != nil {
197-
tf.Close()
198-
return err
199-
}
200-
if err := tf.Close(); err != nil {
201-
return err
202-
}
203-
204-
if err := os.Chmod(tmpName, 0644); err != nil {
205-
return err
206-
}
207-
208-
if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
209-
return err
210-
}
211-
212-
artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
213-
artifact.LastUpdateTime = metav1.Now()
214-
return nil
215-
}
216-
217-
// writeToArchiveExcludeMatches walks over the given dir and writes any regular file that does
218-
// not match the given gitignore.Matcher.
219-
func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer *tar.Writer) error {
220-
fn := func(p string, fi os.FileInfo, err error) error {
192+
if err := filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
221193
if err != nil {
222194
return err
223195
}
@@ -227,8 +199,8 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
227199
return nil
228200
}
229201

230-
// Ignore excluded extensions and files
231-
if matcher.Match(strings.Split(p, "/"), false) {
202+
// Skip filtered files
203+
if filter != nil && filter(p, fi) {
232204
return nil
233205
}
234206

@@ -248,7 +220,7 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
248220
}
249221
header.Name = relFilePath
250222

251-
if err := writer.WriteHeader(header); err != nil {
223+
if err := tw.WriteHeader(header); err != nil {
252224
return err
253225
}
254226

@@ -257,13 +229,42 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
257229
f.Close()
258230
return err
259231
}
260-
if _, err := io.Copy(writer, f); err != nil {
232+
if _, err := io.Copy(tw, f); err != nil {
261233
f.Close()
262234
return err
263235
}
264236
return f.Close()
237+
}); err != nil {
238+
tw.Close()
239+
gw.Close()
240+
tf.Close()
241+
return err
265242
}
266-
return filepath.Walk(dir, fn)
243+
244+
if err := tw.Close(); err != nil {
245+
gw.Close()
246+
tf.Close()
247+
return err
248+
}
249+
if err := gw.Close(); err != nil {
250+
tf.Close()
251+
return err
252+
}
253+
if err := tf.Close(); err != nil {
254+
return err
255+
}
256+
257+
if err := os.Chmod(tmpName, 0644); err != nil {
258+
return err
259+
}
260+
261+
if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
262+
return err
263+
}
264+
265+
artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
266+
artifact.LastUpdateTime = metav1.Now()
267+
return nil
267268
}
268269

269270
// AtomicWriteFile atomically writes the io.Reader contents to the v1beta1.Artifact path.

0 commit comments

Comments
 (0)