@@ -10,6 +10,7 @@ import (
10
10
"bytes"
11
11
"fmt"
12
12
gohtml "html"
13
+ "io"
13
14
"path/filepath"
14
15
"strings"
15
16
"sync"
@@ -26,7 +27,13 @@ import (
26
27
)
27
28
28
29
// don't index files larger than this many bytes for performance purposes
29
- const sizeLimit = 1000000
30
+ const sizeLimit = 1024 * 1024
31
+
32
+ // newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept
33
+ // this option is here for 2 purposes:
34
+ // (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future
35
+ // (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored
36
+ var newLineInHTML = " "
30
37
31
38
var (
32
39
// For custom user mapping
@@ -46,7 +53,6 @@ func NewContext() {
46
53
highlightMapping [keys [i ].Name ()] = keys [i ].Value ()
47
54
}
48
55
}
49
-
50
56
// The size 512 is simply a conservative rule of thumb
51
57
c , err := lru .New2Q (512 )
52
58
if err != nil {
@@ -60,7 +66,7 @@ func NewContext() {
60
66
func Code (fileName , language , code string ) string {
61
67
NewContext ()
62
68
63
- // diff view newline will be passed as empty, change to literal \n so it can be copied
69
+ // diff view newline will be passed as empty, change to literal '\n' so it can be copied
64
70
// preserve literal newline in blame view
65
71
if code == "" || code == "\n " {
66
72
return "\n "
@@ -128,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
128
134
return code
129
135
}
130
136
131
- htmlw .Flush ()
137
+ _ = htmlw .Flush ()
132
138
// Chroma will add newlines for certain lexers in order to highlight them properly
133
- // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output
139
+ // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
134
140
return strings .TrimSuffix (htmlbuf .String (), "\n " )
135
141
}
136
142
137
- // File returns a slice of chroma syntax highlighted lines of code
138
- func File (numLines int , fileName , language string , code []byte ) []string {
143
+ // File returns a slice of chroma syntax highlighted HTML lines of code
144
+ func File (fileName , language string , code []byte ) ( []string , error ) {
139
145
NewContext ()
140
146
141
147
if len (code ) > sizeLimit {
142
- return plainText ( string ( code ), numLines )
148
+ return PlainText ( code ), nil
143
149
}
150
+
144
151
formatter := html .New (html .WithClasses (true ),
145
152
html .WithLineNumbers (false ),
146
153
html .PreventSurroundingPre (true ),
147
154
)
148
155
149
- if formatter == nil {
150
- log .Error ("Couldn't create chroma formatter" )
151
- return plainText (string (code ), numLines )
152
- }
153
-
154
- htmlbuf := bytes.Buffer {}
155
- htmlw := bufio .NewWriter (& htmlbuf )
156
+ htmlBuf := bytes.Buffer {}
157
+ htmlWriter := bufio .NewWriter (& htmlBuf )
156
158
157
159
var lexer chroma.Lexer
158
160
159
161
// provided language overrides everything
160
- if len ( language ) > 0 {
162
+ if language != "" {
161
163
lexer = lexers .Get (language )
162
164
}
163
165
@@ -168,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string {
168
170
}
169
171
170
172
if lexer == nil {
171
- language := analyze .GetCodeLanguage (fileName , code )
173
+ guessLanguage := analyze .GetCodeLanguage (fileName , code )
172
174
173
- lexer = lexers .Get (language )
175
+ lexer = lexers .Get (guessLanguage )
174
176
if lexer == nil {
175
177
lexer = lexers .Match (fileName )
176
178
if lexer == nil {
@@ -181,54 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string {
181
183
182
184
iterator , err := lexer .Tokenise (nil , string (code ))
183
185
if err != nil {
184
- log .Error ("Can't tokenize code: %v" , err )
185
- return plainText (string (code ), numLines )
186
+ return nil , fmt .Errorf ("can't tokenize code: %w" , err )
186
187
}
187
188
188
- err = formatter .Format (htmlw , styles .GitHub , iterator )
189
+ err = formatter .Format (htmlWriter , styles .GitHub , iterator )
189
190
if err != nil {
190
- log .Error ("Can't format code: %v" , err )
191
- return plainText (string (code ), numLines )
191
+ return nil , fmt .Errorf ("can't format code: %w" , err )
192
192
}
193
193
194
- htmlw .Flush ()
195
- finalNewLine := false
196
- if len (code ) > 0 {
197
- finalNewLine = code [len (code )- 1 ] == '\n'
198
- }
194
+ _ = htmlWriter .Flush ()
195
+
196
+ m := make ([]string , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
199
197
200
- m := make ([]string , 0 , numLines )
201
- for _ , v := range strings .SplitN (htmlbuf .String (), "\n " , numLines ) {
202
- content := v
203
- // need to keep lines that are only \n so copy/paste works properly in browser
204
- if content == "" {
205
- content = "\n "
206
- } else if content == `</span><span class="w">` {
207
- content += "\n </span>"
208
- } else if content == `</span></span><span class="line"><span class="cl">` {
209
- content += "\n "
198
+ htmlStr := htmlBuf .String ()
199
+ line := strings.Builder {}
200
+ insideLine := 0 // every <span class="cl"> makes it increase one level, every closed <span class="cl"> makes it decrease one level
201
+ tagStack := make ([]string , 0 , 4 )
202
+ for len (htmlStr ) > 0 {
203
+ pos1 := strings .IndexByte (htmlStr , '<' )
204
+ pos2 := strings .IndexByte (htmlStr , '>' )
205
+ if pos1 == - 1 || pos2 == - 1 || pos1 > pos2 {
206
+ break
210
207
}
211
- content = strings .TrimSuffix (content , `<span class="w">` )
212
- content = strings .TrimPrefix (content , `</span>` )
213
- m = append (m , content )
208
+ tag := htmlStr [pos1 : pos2 + 1 ]
209
+ if insideLine > 0 {
210
+ line .WriteString (htmlStr [:pos1 ])
211
+ }
212
+ if tag [1 ] == '/' {
213
+ if len (tagStack ) == 0 {
214
+ return nil , fmt .Errorf ("can't find matched tag: %q" , tag )
215
+ }
216
+ popped := tagStack [len (tagStack )- 1 ]
217
+ tagStack = tagStack [:len (tagStack )- 1 ]
218
+ if popped == `<span class="cl">` {
219
+ insideLine --
220
+ lineStr := line .String ()
221
+ if newLineInHTML != "" && lineStr != "" && lineStr [len (lineStr )- 1 ] == '\n' {
222
+ lineStr = lineStr [:len (lineStr )- 1 ] + newLineInHTML
223
+ }
224
+ m = append (m , lineStr )
225
+ line = strings.Builder {}
226
+ }
227
+ if insideLine > 0 {
228
+ line .WriteString (tag )
229
+ }
230
+ } else {
231
+ tagStack = append (tagStack , tag )
232
+ if insideLine > 0 {
233
+ line .WriteString (tag )
234
+ }
235
+ if tag == `<span class="cl">` {
236
+ insideLine ++
237
+ }
238
+ }
239
+ htmlStr = htmlStr [pos2 + 1 :]
214
240
}
215
- if finalNewLine {
216
- m = append (m , "<span class=\" w\" >\n </span>" )
241
+
242
+ if len (m ) == 0 {
243
+ m = append (m , "" ) // maybe we do not want to return 0 lines
217
244
}
218
245
219
- return m
246
+ return m , nil
220
247
}
221
248
222
- // return unhiglighted map
223
- func plainText (code string , numLines int ) []string {
224
- m := make ([]string , 0 , numLines )
225
- for _ , v := range strings .SplitN (code , "\n " , numLines ) {
226
- content := v
227
- // need to keep lines that are only \n so copy/paste works properly in browser
228
- if content == "" {
229
- content = "\n "
249
+ // PlainText returns non-highlighted HTML for code
250
+ func PlainText (code []byte ) []string {
251
+ r := bufio .NewReader (bytes .NewReader (code ))
252
+ m := make ([]string , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
253
+ for {
254
+ content , err := r .ReadString ('\n' )
255
+ if err != nil && err != io .EOF {
256
+ log .Error ("failed to read string from buffer: %v" , err )
257
+ break
258
+ }
259
+ if content == "" && err == io .EOF {
260
+ break
261
+ }
262
+ s := gohtml .EscapeString (content )
263
+ if newLineInHTML != "" && s != "" && s [len (s )- 1 ] == '\n' {
264
+ s = s [:len (s )- 1 ] + newLineInHTML
230
265
}
231
- m = append (m , gohtml . EscapeString ( content ) )
266
+ m = append (m , s )
232
267
}
268
+
269
+ if len (m ) == 0 {
270
+ m = append (m , "" ) // maybe we do not want to return 0 lines
271
+ }
272
+
233
273
return m
234
274
}
0 commit comments