Skip to content

Commit 8a0fbd7

Browse files
Meroviusgopherbot
authored andcommitted
encoding/xml: reject XML declaration after start of document
The XML specification requires an XML declaration, if present, to only appear at the very beginning of the document, not even preceded by whitespace. The parser currently accepts it at any part of the input. Rejecting whitespace at the beginning of the file might break too many users. This change instead only rejects an XML declaration preceded by a non-whitespace token *and* allows the Encoder to emit whitespace before an XML declaration. This means that a token stream produced by the Decoder can be passed to the Encoder without error, while we still don't emit clearly invalid XML. This might break programs depending on Decoder allowing arbitrary XML before the XML declaration. Fixes #65691. Change-Id: Ib1d4b3116aee63f40fd377f90595780b4befd1ee Reviewed-on: https://go-review.googlesource.com/c/go/+/564035 Auto-Submit: Ian Lance Taylor <iant@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Than McIntosh <thanm@google.com> Reviewed-by: Ian Lance Taylor <iant@google.com>
1 parent d892cb4 commit 8a0fbd7

File tree

4 files changed

+41
-4
lines changed

4 files changed

+41
-4
lines changed

src/encoding/xml/marshal.go

+24-4
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ var (
208208
// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token
209209
// in the stream.
210210
func (enc *Encoder) EncodeToken(t Token) error {
211-
212211
p := &enc.p
213212
switch t := t.(type) {
214213
case StartElement:
@@ -228,11 +227,10 @@ func (enc *Encoder) EncodeToken(t Token) error {
228227
p.WriteString("<!--")
229228
p.Write(t)
230229
p.WriteString("-->")
231-
return p.cachedWriteError()
232230
case ProcInst:
233231
// First token to be encoded which is also a ProcInst with target of xml
234232
// is the xml declaration. The only ProcInst where target of xml is allowed.
235-
if t.Target == "xml" && p.w.Buffered() != 0 {
233+
if t.Target == "xml" && p.wroteNonWS {
236234
return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded")
237235
}
238236
if !isNameString(t.Target) {
@@ -257,9 +255,30 @@ func (enc *Encoder) EncodeToken(t Token) error {
257255
p.WriteString(">")
258256
default:
259257
return fmt.Errorf("xml: EncodeToken of invalid token type")
258+
}
259+
if err := p.cachedWriteError(); err != nil || enc.p.wroteNonWS {
260+
return err
261+
}
262+
enc.p.wroteNonWS = !isWhitespace(t)
263+
return nil
264+
}
260265

266+
// isWhitespace reports whether t is a CharData token consisting entirely of
267+
// XML whitespace.
268+
func isWhitespace(t Token) bool {
269+
switch t := t.(type) {
270+
case CharData:
271+
for _, b := range t {
272+
switch b {
273+
case ' ', '\r', '\n', '\t':
274+
default:
275+
return false
276+
}
277+
}
278+
return true
279+
default:
280+
return false
261281
}
262-
return p.cachedWriteError()
263282
}
264283

265284
// isValidDirective reports whether dir is a valid directive text,
@@ -329,6 +348,7 @@ type printer struct {
329348
prefixes []string
330349
tags []Name
331350
closed bool
351+
wroteNonWS bool
332352
err error
333353
}
334354

src/encoding/xml/marshal_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -2356,6 +2356,9 @@ func TestProcInstEncodeToken(t *testing.T) {
23562356
var buf bytes.Buffer
23572357
enc := NewEncoder(&buf)
23582358

2359+
if err := enc.EncodeToken(CharData(" \n\r\t")); err != nil {
2360+
t.Fatal("enc.EncodeToken: expected to be able to encode whitespace as first token")
2361+
}
23592362
if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil {
23602363
t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err)
23612364
}

src/encoding/xml/xml.go

+12
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ type Decoder struct {
212212
line int
213213
linestart int64
214214
offset int64
215+
readNonWS bool
215216
unmarshalDepth int
216217
}
217218

@@ -559,6 +560,8 @@ func (d *Decoder) rawToken() (Token, error) {
559560
return EndElement{d.toClose}, nil
560561
}
561562

563+
readNonWS := d.readNonWS
564+
562565
b, ok := d.getc()
563566
if !ok {
564567
return nil, d.err
@@ -571,8 +574,12 @@ func (d *Decoder) rawToken() (Token, error) {
571574
if data == nil {
572575
return nil, d.err
573576
}
577+
if !d.readNonWS && !isWhitespace(CharData(data)) {
578+
d.readNonWS = true
579+
}
574580
return CharData(data), nil
575581
}
582+
d.readNonWS = true
576583

577584
if b, ok = d.mustgetc(); !ok {
578585
return nil, d.err
@@ -623,6 +630,11 @@ func (d *Decoder) rawToken() (Token, error) {
623630
data = data[0 : len(data)-2] // chop ?>
624631

625632
if target == "xml" {
633+
if readNonWS {
634+
d.err = errors.New("xml: XML declaration after start of document")
635+
return nil, d.err
636+
}
637+
626638
content := string(data)
627639
ver := procInst("version", content)
628640
if ver != "" && ver != "1.0" {

src/encoding/xml/xml_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -1350,10 +1350,12 @@ func TestParseErrors(t *testing.T) {
13501350

13511351
// Header-related errors.
13521352
{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
1353+
{`<foo><?xml version="1.0"?>`, `XML declaration after start of document`},
13531354

13541355
// Cases below are for "no errors".
13551356
{withDefaultHeader(`<?ok?>`), ``},
13561357
{withDefaultHeader(`<?ok version="ok"?>`), ``},
1358+
{` <?xml version="1.0"?>`, ``},
13571359
}
13581360

13591361
for _, test := range tests {

0 commit comments

Comments
 (0)