Skip to content

Commit a435d64

Browse files
committed
Rework file highlight rendering and fix yaml copy-paste
1 parent 3310dd1 commit a435d64

File tree

3 files changed

+254
-141
lines changed

3 files changed

+254
-141
lines changed

modules/highlight/highlight.go

+93-53
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"bytes"
1111
"fmt"
1212
gohtml "html"
13+
"io"
1314
"path/filepath"
1415
"strings"
1516
"sync"
@@ -26,7 +27,13 @@ import (
2627
)
2728

2829
// don't index files larger than this many bytes for performance purposes
29-
const sizeLimit = 1000000
30+
const sizeLimit = 1024 * 1024
31+
32+
// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept
33+
// this option is here for 2 purposes:
34+
// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future
35+
// (2) make it clear to do tests: "
" is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored
36+
var newLineInHTML = "
"
3037

3138
var (
3239
// For custom user mapping
@@ -46,7 +53,6 @@ func NewContext() {
4653
highlightMapping[keys[i].Name()] = keys[i].Value()
4754
}
4855
}
49-
5056
// The size 512 is simply a conservative rule of thumb
5157
c, err := lru.New2Q(512)
5258
if err != nil {
@@ -60,7 +66,7 @@ func NewContext() {
6066
func Code(fileName, language, code string) string {
6167
NewContext()
6268

63-
// diff view newline will be passed as empty, change to literal \n so it can be copied
69+
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
6470
// preserve literal newline in blame view
6571
if code == "" || code == "\n" {
6672
return "\n"
@@ -128,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
128134
return code
129135
}
130136

131-
htmlw.Flush()
137+
_ = htmlw.Flush()
132138
// Chroma will add newlines for certain lexers in order to highlight them properly
133-
// Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output
139+
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
134140
return strings.TrimSuffix(htmlbuf.String(), "\n")
135141
}
136142

137-
// File returns a slice of chroma syntax highlighted lines of code
138-
func File(numLines int, fileName, language string, code []byte) []string {
143+
// File returns a slice of chroma syntax highlighted HTML lines of code
144+
func File(fileName, language string, code []byte) ([]string, error) {
139145
NewContext()
140146

141147
if len(code) > sizeLimit {
142-
return plainText(string(code), numLines)
148+
return PlainText(code), nil
143149
}
150+
144151
formatter := html.New(html.WithClasses(true),
145152
html.WithLineNumbers(false),
146153
html.PreventSurroundingPre(true),
147154
)
148155

149-
if formatter == nil {
150-
log.Error("Couldn't create chroma formatter")
151-
return plainText(string(code), numLines)
152-
}
153-
154-
htmlbuf := bytes.Buffer{}
155-
htmlw := bufio.NewWriter(&htmlbuf)
156+
htmlBuf := bytes.Buffer{}
157+
htmlWriter := bufio.NewWriter(&htmlBuf)
156158

157159
var lexer chroma.Lexer
158160

159161
// provided language overrides everything
160-
if len(language) > 0 {
162+
if language != "" {
161163
lexer = lexers.Get(language)
162164
}
163165

@@ -168,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string {
168170
}
169171

170172
if lexer == nil {
171-
language := analyze.GetCodeLanguage(fileName, code)
173+
guessLanguage := analyze.GetCodeLanguage(fileName, code)
172174

173-
lexer = lexers.Get(language)
175+
lexer = lexers.Get(guessLanguage)
174176
if lexer == nil {
175177
lexer = lexers.Match(fileName)
176178
if lexer == nil {
@@ -181,54 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string {
181183

182184
iterator, err := lexer.Tokenise(nil, string(code))
183185
if err != nil {
184-
log.Error("Can't tokenize code: %v", err)
185-
return plainText(string(code), numLines)
186+
return nil, fmt.Errorf("can't tokenize code: %w", err)
186187
}
187188

188-
err = formatter.Format(htmlw, styles.GitHub, iterator)
189+
err = formatter.Format(htmlWriter, styles.GitHub, iterator)
189190
if err != nil {
190-
log.Error("Can't format code: %v", err)
191-
return plainText(string(code), numLines)
191+
return nil, fmt.Errorf("can't format code: %w", err)
192192
}
193193

194-
htmlw.Flush()
195-
finalNewLine := false
196-
if len(code) > 0 {
197-
finalNewLine = code[len(code)-1] == '\n'
198-
}
194+
_ = htmlWriter.Flush()
195+
196+
m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
199197

200-
m := make([]string, 0, numLines)
201-
for _, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) {
202-
content := v
203-
// need to keep lines that are only \n so copy/paste works properly in browser
204-
if content == "" {
205-
content = "\n"
206-
} else if content == `</span><span class="w">` {
207-
content += "\n</span>"
208-
} else if content == `</span></span><span class="line"><span class="cl">` {
209-
content += "\n"
198+
htmlStr := htmlBuf.String()
199+
line := strings.Builder{}
200+
insideLine := 0 // every <span class="cl"> makes it increase one level, every closed <span class="cl"> makes it decrease one level
201+
tagStack := make([]string, 0, 4)
202+
for len(htmlStr) > 0 {
203+
pos1 := strings.IndexByte(htmlStr, '<')
204+
pos2 := strings.IndexByte(htmlStr, '>')
205+
if pos1 == -1 || pos2 == -1 || pos1 > pos2 {
206+
break
210207
}
211-
content = strings.TrimSuffix(content, `<span class="w">`)
212-
content = strings.TrimPrefix(content, `</span>`)
213-
m = append(m, content)
208+
tag := htmlStr[pos1 : pos2+1]
209+
if insideLine > 0 {
210+
line.WriteString(htmlStr[:pos1])
211+
}
212+
if tag[1] == '/' {
213+
if len(tagStack) == 0 {
214+
return nil, fmt.Errorf("can't find matched tag: %q", tag)
215+
}
216+
popped := tagStack[len(tagStack)-1]
217+
tagStack = tagStack[:len(tagStack)-1]
218+
if popped == `<span class="cl">` {
219+
insideLine--
220+
lineStr := line.String()
221+
if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' {
222+
lineStr = lineStr[:len(lineStr)-1] + newLineInHTML
223+
}
224+
m = append(m, lineStr)
225+
line = strings.Builder{}
226+
}
227+
if insideLine > 0 {
228+
line.WriteString(tag)
229+
}
230+
} else {
231+
tagStack = append(tagStack, tag)
232+
if insideLine > 0 {
233+
line.WriteString(tag)
234+
}
235+
if tag == `<span class="cl">` {
236+
insideLine++
237+
}
238+
}
239+
htmlStr = htmlStr[pos2+1:]
214240
}
215-
if finalNewLine {
216-
m = append(m, "<span class=\"w\">\n</span>")
241+
242+
if len(m) == 0 {
243+
m = append(m, "") // maybe we do not want to return 0 lines
217244
}
218245

219-
return m
246+
return m, nil
220247
}
221248

222-
// return unhiglighted map
223-
func plainText(code string, numLines int) []string {
224-
m := make([]string, 0, numLines)
225-
for _, v := range strings.SplitN(code, "\n", numLines) {
226-
content := v
227-
// need to keep lines that are only \n so copy/paste works properly in browser
228-
if content == "" {
229-
content = "\n"
249+
// PlainText returns non-highlighted HTML for code
250+
func PlainText(code []byte) []string {
251+
r := bufio.NewReader(bytes.NewReader(code))
252+
m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
253+
for {
254+
content, err := r.ReadString('\n')
255+
if err != nil && err != io.EOF {
256+
log.Error("failed to read string from buffer: %v", err)
257+
break
258+
}
259+
if content == "" && err == io.EOF {
260+
break
261+
}
262+
s := gohtml.EscapeString(content)
263+
if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' {
264+
s = s[:len(s)-1] + newLineInHTML
230265
}
231-
m = append(m, gohtml.EscapeString(content))
266+
m = append(m, s)
232267
}
268+
269+
if len(m) == 0 {
270+
m = append(m, "") // maybe we do not want to return 0 lines
271+
}
272+
233273
return m
234274
}

0 commit comments

Comments
 (0)