Skip to content

Commit 2a278b9

Browse files
authored
Add support for linguist-detectable and linguist-documentation (#29267)
Add support for `linguist-detectable` and `linguist-documentation` Add tests for the attributes https://github.com/github-linguist/linguist/blob/master/docs/overrides.md#detectable https://github.com/github-linguist/linguist/blob/master/docs/overrides.md#documentation
1 parent 7d0903b commit 2a278b9

File tree

4 files changed

+363
-69
lines changed

4 files changed

+363
-69
lines changed

modules/git/repo_attribute.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"os"
1212

1313
"code.gitea.io/gitea/modules/log"
14+
"code.gitea.io/gitea/modules/optional"
1415
)
1516

1617
// CheckAttributeOpts represents the possible options to CheckAttribute
@@ -291,7 +292,7 @@ func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeRe
291292
}
292293

293294
checker := &CheckAttributeReader{
294-
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
295+
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"},
295296
Repo: repo,
296297
IndexFile: indexFilename,
297298
WorkTree: worktree,
@@ -316,3 +317,23 @@ func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeRe
316317

317318
return checker, deferable
318319
}
320+
321+
// true if "set"/"true", false if "unset"/"false", none otherwise
322+
func attributeToBool(attr map[string]string, name string) optional.Option[bool] {
323+
if value, has := attr[name]; has && value != "unspecified" {
324+
switch value {
325+
case "set", "true":
326+
return optional.Some(true)
327+
case "unset", "false":
328+
return optional.Some(false)
329+
}
330+
}
331+
return optional.None[bool]()
332+
}
333+
334+
func attributeToString(attr map[string]string, name string) optional.Option[string] {
335+
if value, has := attr[name]; has && value != "unspecified" {
336+
return optional.Some(value)
337+
}
338+
return optional.None[string]()
339+
}

modules/git/repo_language_stats_gogit.go

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212

1313
"code.gitea.io/gitea/modules/analyze"
14+
"code.gitea.io/gitea/modules/optional"
1415

1516
"github.com/go-enry/go-enry/v2"
1617
"github.com/go-git/go-git/v5"
@@ -57,25 +58,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
5758
return nil
5859
}
5960

60-
notVendored := false
61-
notGenerated := false
61+
isVendored := optional.None[bool]()
62+
isGenerated := optional.None[bool]()
63+
isDocumentation := optional.None[bool]()
64+
isDetectable := optional.None[bool]()
6265

6366
if checker != nil {
6467
attrs, err := checker.CheckPath(f.Name)
6568
if err == nil {
66-
if vendored, has := attrs["linguist-vendored"]; has {
67-
if vendored == "set" || vendored == "true" {
68-
return nil
69-
}
70-
notVendored = vendored == "false"
69+
isVendored = attributeToBool(attrs, "linguist-vendored")
70+
if isVendored.ValueOrDefault(false) {
71+
return nil
72+
}
73+
74+
isGenerated = attributeToBool(attrs, "linguist-generated")
75+
if isGenerated.ValueOrDefault(false) {
76+
return nil
7177
}
72-
if generated, has := attrs["linguist-generated"]; has {
73-
if generated == "set" || generated == "true" {
74-
return nil
78+
79+
isDocumentation = attributeToBool(attrs, "linguist-documentation")
80+
if isDocumentation.ValueOrDefault(false) {
81+
return nil
82+
}
83+
84+
isDetectable = attributeToBool(attrs, "linguist-detectable")
85+
if !isDetectable.ValueOrDefault(true) {
86+
return nil
87+
}
88+
89+
hasLanguage := attributeToString(attrs, "linguist-language")
90+
if hasLanguage.Value() == "" {
91+
hasLanguage = attributeToString(attrs, "gitlab-language")
92+
if hasLanguage.Has() {
93+
language := hasLanguage.Value()
94+
if idx := strings.IndexByte(language, '?'); idx >= 0 {
95+
hasLanguage = optional.Some(language[:idx])
96+
}
7597
}
76-
notGenerated = generated == "false"
7798
}
78-
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
99+
if hasLanguage.Value() != "" {
100+
language := hasLanguage.Value()
101+
79102
// group languages, such as Pug -> HTML; SCSS -> CSS
80103
group := enry.GetLanguageGroup(language)
81104
if len(group) != 0 {
@@ -85,28 +108,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
85108
// this language will always be added to the size
86109
sizes[language] += f.Size
87110
return nil
88-
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
89-
// strip off a ? if present
90-
if idx := strings.IndexByte(language, '?'); idx >= 0 {
91-
language = language[:idx]
92-
}
93-
if len(language) != 0 {
94-
// group languages, such as Pug -> HTML; SCSS -> CSS
95-
group := enry.GetLanguageGroup(language)
96-
if len(group) != 0 {
97-
language = group
98-
}
99-
100-
// this language will always be added to the size
101-
sizes[language] += f.Size
102-
return nil
103-
}
104111
}
105112
}
106113
}
107114

108-
if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
109-
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
115+
if (!isVendored.Has() && analyze.IsVendor(f.Name)) ||
116+
enry.IsDotFile(f.Name) ||
117+
(!isDocumentation.Has() && enry.IsDocumentation(f.Name)) ||
118+
enry.IsConfiguration(f.Name) {
110119
return nil
111120
}
112121

@@ -115,12 +124,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
115124
if f.Size <= bigFileSize {
116125
content, _ = readFile(f, fileSizeLimit)
117126
}
118-
if !notGenerated && enry.IsGenerated(f.Name, content) {
127+
if !isGenerated.Has() && enry.IsGenerated(f.Name, content) {
119128
return nil
120129
}
121130

122-
// TODO: Use .gitattributes file for linguist overrides
123-
124131
language := analyze.GetCodeLanguage(f.Name, content)
125132
if language == enry.OtherLanguage || language == "" {
126133
return nil
@@ -138,7 +145,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
138145
included = langtype == enry.Programming || langtype == enry.Markup
139146
includedLanguage[language] = included
140147
}
141-
if included {
148+
if included || isDetectable.ValueOrDefault(false) {
142149
sizes[language] += f.Size
143150
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
144151
firstExcludedLanguage = language

modules/git/repo_language_stats_nogogit.go

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"code.gitea.io/gitea/modules/analyze"
1414
"code.gitea.io/gitea/modules/log"
15+
"code.gitea.io/gitea/modules/optional"
1516

1617
"github.com/go-enry/go-enry/v2"
1718
)
@@ -88,25 +89,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
8889
continue
8990
}
9091

91-
notVendored := false
92-
notGenerated := false
92+
isVendored := optional.None[bool]()
93+
isGenerated := optional.None[bool]()
94+
isDocumentation := optional.None[bool]()
95+
isDetectable := optional.None[bool]()
9396

9497
if checker != nil {
9598
attrs, err := checker.CheckPath(f.Name())
9699
if err == nil {
97-
if vendored, has := attrs["linguist-vendored"]; has {
98-
if vendored == "set" || vendored == "true" {
99-
continue
100-
}
101-
notVendored = vendored == "false"
100+
isVendored = attributeToBool(attrs, "linguist-vendored")
101+
if isVendored.ValueOrDefault(false) {
102+
continue
103+
}
104+
105+
isGenerated = attributeToBool(attrs, "linguist-generated")
106+
if isGenerated.ValueOrDefault(false) {
107+
continue
102108
}
103-
if generated, has := attrs["linguist-generated"]; has {
104-
if generated == "set" || generated == "true" {
105-
continue
109+
110+
isDocumentation = attributeToBool(attrs, "linguist-documentation")
111+
if isDocumentation.ValueOrDefault(false) {
112+
continue
113+
}
114+
115+
isDetectable = attributeToBool(attrs, "linguist-detectable")
116+
if !isDetectable.ValueOrDefault(true) {
117+
continue
118+
}
119+
120+
hasLanguage := attributeToString(attrs, "linguist-language")
121+
if hasLanguage.Value() == "" {
122+
hasLanguage = attributeToString(attrs, "gitlab-language")
123+
if hasLanguage.Has() {
124+
language := hasLanguage.Value()
125+
if idx := strings.IndexByte(language, '?'); idx >= 0 {
126+
hasLanguage = optional.Some(language[:idx])
127+
}
106128
}
107-
notGenerated = generated == "false"
108129
}
109-
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
130+
if hasLanguage.Value() != "" {
131+
language := hasLanguage.Value()
132+
110133
// group languages, such as Pug -> HTML; SCSS -> CSS
111134
group := enry.GetLanguageGroup(language)
112135
if len(group) != 0 {
@@ -116,29 +139,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
116139
// this language will always be added to the size
117140
sizes[language] += f.Size()
118141
continue
119-
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
120-
// strip off a ? if present
121-
if idx := strings.IndexByte(language, '?'); idx >= 0 {
122-
language = language[:idx]
123-
}
124-
if len(language) != 0 {
125-
// group languages, such as Pug -> HTML; SCSS -> CSS
126-
group := enry.GetLanguageGroup(language)
127-
if len(group) != 0 {
128-
language = group
129-
}
130-
131-
// this language will always be added to the size
132-
sizes[language] += f.Size()
133-
continue
134-
}
135142
}
136-
137143
}
138144
}
139145

140-
if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
141-
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
146+
if (!isVendored.Has() && analyze.IsVendor(f.Name())) ||
147+
enry.IsDotFile(f.Name()) ||
148+
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
149+
enry.IsConfiguration(f.Name()) {
142150
continue
143151
}
144152

@@ -170,7 +178,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
170178
return nil, err
171179
}
172180
}
173-
if !notGenerated && enry.IsGenerated(f.Name(), content) {
181+
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
174182
continue
175183
}
176184

@@ -193,13 +201,12 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
193201
included = langType == enry.Programming || langType == enry.Markup
194202
includedLanguage[language] = included
195203
}
196-
if included {
204+
if included || isDetectable.ValueOrDefault(false) {
197205
sizes[language] += f.Size()
198206
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
199207
firstExcludedLanguage = language
200208
firstExcludedLanguageSize += f.Size()
201209
}
202-
continue
203210
}
204211

205212
// If there are no included languages add the first excluded language

0 commit comments

Comments
 (0)