Skip to content

Commit 033d929

Browse files
Allow skipping forks and mirrors from being indexed (#23187)
This PR adds two new options to disable repo/code search indexing of both forks and mirrors. Related: #22842
1 parent cff4e37 commit 033d929

File tree

4 files changed

+51
-16
lines changed

4 files changed

+51
-16
lines changed

custom/conf/app.example.ini

+4
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,10 @@ LEVEL = Info
13621362
;; repo indexer by default disabled, since it uses a lot of disk space
13631363
;REPO_INDEXER_ENABLED = false
13641364
;;
1365+
;; repo indexer units, the items to index, could be `sources`, `forks`, `mirrors`, `templates` or any combination of them separated by a comma.
1366+
;; If empty then it defaults to `sources` only, as if you'd like to disable fully please see REPO_INDEXER_ENABLED.
1367+
;REPO_INDEXER_REPO_TYPES = sources,forks,mirrors,templates
1368+
;;
13651369
;; Code search engine type, could be `bleve` or `elasticsearch`.
13661370
;REPO_INDEXER_TYPE = bleve
13671371
;;

docs/content/doc/administration/config-cheat-sheet.en-us.md

+1
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ relation to port exhaustion.
465465
- `ISSUE_INDEXER_PATH`: **indexers/issues.bleve**: Index file used for issue search; available when ISSUE_INDEXER_TYPE is bleve and elasticsearch. Relative paths will be made absolute against _`AppWorkPath`_.
466466

467467
- `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size).
468+
- `REPO_INDEXER_REPO_TYPES`: **sources,forks,mirrors,templates**: Repo indexer units. The items to index could be `sources`, `forks`, `mirrors`, `templates` or any combination of them separated by a comma. If empty then it defaults to `sources` only, as if you'd like to disable fully please see `REPO_INDEXER_ENABLED`.
468469
- `REPO_INDEXER_TYPE`: **bleve**: Code search engine type, could be `bleve` or `elasticsearch`.
469470
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
470471
- `REPO_INDEXER_CONN_STR`: ****: Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:changeme@localhost:9200

modules/indexer/code/indexer.go

+27
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"code.gitea.io/gitea/modules/queue"
2020
"code.gitea.io/gitea/modules/setting"
2121
"code.gitea.io/gitea/modules/timeutil"
22+
"code.gitea.io/gitea/modules/util"
2223
)
2324

2425
// SearchResult result of performing a search in a repo
@@ -91,6 +92,32 @@ func index(ctx context.Context, indexer Indexer, repoID int64) error {
9192
return err
9293
}
9394

95+
repoTypes := setting.Indexer.RepoIndexerRepoTypes
96+
97+
if len(repoTypes) == 0 {
98+
repoTypes = []string{"sources"}
99+
}
100+
101+
// skip forks from being indexed if unit is not present
102+
if !util.SliceContains(repoTypes, "forks") && repo.IsFork {
103+
return nil
104+
}
105+
106+
// skip mirrors from being indexed if unit is not present
107+
if !util.SliceContains(repoTypes, "mirrors") && repo.IsMirror {
108+
return nil
109+
}
110+
111+
// skip templates from being indexed if unit is not present
112+
if !util.SliceContains(repoTypes, "templates") && repo.IsTemplate {
113+
return nil
114+
}
115+
116+
// skip regular repos from being indexed if unit is not present
117+
if !util.SliceContains(repoTypes, "sources") && !repo.IsFork && !repo.IsMirror && !repo.IsTemplate {
118+
return nil
119+
}
120+
94121
sha, err := getDefaultBranchSha(ctx, repo)
95122
if err != nil {
96123
return err

modules/setting/indexer.go

+19-16
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,31 @@ var Indexer = struct {
2323
IssueIndexerName string
2424
StartupTimeout time.Duration
2525

26-
RepoIndexerEnabled bool
27-
RepoType string
28-
RepoPath string
29-
RepoConnStr string
30-
RepoIndexerName string
31-
MaxIndexerFileSize int64
32-
IncludePatterns []glob.Glob
33-
ExcludePatterns []glob.Glob
34-
ExcludeVendored bool
26+
RepoIndexerEnabled bool
27+
RepoIndexerRepoTypes []string
28+
RepoType string
29+
RepoPath string
30+
RepoConnStr string
31+
RepoIndexerName string
32+
MaxIndexerFileSize int64
33+
IncludePatterns []glob.Glob
34+
ExcludePatterns []glob.Glob
35+
ExcludeVendored bool
3536
}{
3637
IssueType: "bleve",
3738
IssuePath: "indexers/issues.bleve",
3839
IssueConnStr: "",
3940
IssueConnAuth: "",
4041
IssueIndexerName: "gitea_issues",
4142

42-
RepoIndexerEnabled: false,
43-
RepoType: "bleve",
44-
RepoPath: "indexers/repos.bleve",
45-
RepoConnStr: "",
46-
RepoIndexerName: "gitea_codes",
47-
MaxIndexerFileSize: 1024 * 1024,
48-
ExcludeVendored: true,
43+
RepoIndexerEnabled: false,
44+
RepoIndexerRepoTypes: []string{"sources", "forks", "mirrors", "templates"},
45+
RepoType: "bleve",
46+
RepoPath: "indexers/repos.bleve",
47+
RepoConnStr: "",
48+
RepoIndexerName: "gitea_codes",
49+
MaxIndexerFileSize: 1024 * 1024,
50+
ExcludeVendored: true,
4951
}
5052

5153
func loadIndexerFrom(rootCfg ConfigProvider) {
@@ -71,6 +73,7 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
7173
Indexer.IssueIndexerName = sec.Key("ISSUE_INDEXER_NAME").MustString(Indexer.IssueIndexerName)
7274

7375
Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
76+
Indexer.RepoIndexerRepoTypes = strings.Split(sec.Key("REPO_INDEXER_REPO_TYPES").MustString("sources,forks,mirrors,templates"), ",")
7477
Indexer.RepoType = sec.Key("REPO_INDEXER_TYPE").MustString("bleve")
7578
Indexer.RepoPath = filepath.ToSlash(sec.Key("REPO_INDEXER_PATH").MustString(filepath.ToSlash(filepath.Join(AppDataPath, "indexers/repos.bleve"))))
7679
if !filepath.IsAbs(Indexer.RepoPath) {

0 commit comments

Comments
 (0)