Skip to content

Commit 1e18ddc

Browse files
authored
Fix MXParser improve error reporting (#136) (#137)
- when parsing large char entities. - when mixing invalid encoding declarations and file encodings.
1 parent 48e444f commit 1e18ddc

File tree

13 files changed

+399
-8
lines changed

13 files changed

+399
-8
lines changed

src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

+43-8
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import java.io.EOFException;
1313
import java.io.IOException;
14+
import java.io.InputStreamReader;
1415
import java.io.Reader;
1516
import java.io.UnsupportedEncodingException;
1617

@@ -122,6 +123,8 @@ private String newStringIntern( char[] cbuf, int off, int len )
122123
// private String elValue[];
123124
private int elNamespaceCount[];
124125

126+
private String fileEncoding = "UTF8";
127+
125128
/**
126129
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
127130
* slot then current depth
@@ -659,6 +662,15 @@ public void setInput( Reader in )
659662
{
660663
reset();
661664
reader = in;
665+
666+
if ( reader instanceof InputStreamReader )
667+
{
668+
InputStreamReader isr = (InputStreamReader) reader;
669+
if ( isr.getEncoding() != null )
670+
{
671+
fileEncoding = isr.getEncoding().toUpperCase();
672+
}
673+
}
662674
}
663675

664676
@Override
@@ -1771,6 +1783,17 @@ private int parseProlog()
17711783
// skipping UNICODE int Order Mark (so called BOM)
17721784
ch = more();
17731785
}
1786+
else if ( ch == '\uFFFD' )
1787+
{
1788+
// UTF-16 BOM in an UTF-8 encoded file?
1789+
// This is a hack...not the best way to check for BOM in UTF-16
1790+
ch = more();
1791+
if ( ch == '\uFFFD' )
1792+
{
1793+
throw new XmlPullParserException( "UTF-16 BOM in a UTF-8 encoded file is incompatible", this,
1794+
null );
1795+
}
1796+
}
17741797
}
17751798
seenMarkup = false;
17761799
boolean gotS = false;
@@ -2723,18 +2746,19 @@ else if ( ch >= 'A' && ch <= 'F' )
27232746
}
27242747
posEnd = pos - 1;
27252748

2726-
int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
2727-
boolean isValidCodePoint = isValidCodePoint( codePoint );
2728-
if ( isValidCodePoint )
2749+
boolean isValidCodePoint = true;
2750+
try
27292751
{
2730-
try
2752+
int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
2753+
isValidCodePoint = isValidCodePoint( codePoint );
2754+
if ( isValidCodePoint )
27312755
{
27322756
charRefOneCharBuf = Character.toChars( codePoint );
27332757
}
2734-
catch ( IllegalArgumentException e )
2735-
{
2736-
isValidCodePoint = false;
2737-
}
2758+
}
2759+
catch ( IllegalArgumentException e )
2760+
{
2761+
isValidCodePoint = false;
27382762
}
27392763

27402764
if ( !isValidCodePoint )
@@ -3328,6 +3352,17 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
33283352

33293353
// TODO reconcile with setInput encodingName
33303354
inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );
3355+
3356+
if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
3357+
{
3358+
throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
3359+
this, null );
3360+
}
3361+
else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
3362+
{
3363+
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
3364+
this, null );
3365+
}
33313366
}
33323367

33333368
ch = more();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
package org.codehaus.plexus.util.xml.pull;
2+
3+
import static org.junit.Assert.assertTrue;
4+
import static org.junit.Assert.fail;
5+
6+
import java.io.File;
7+
import java.io.FileInputStream;
8+
import java.io.FileReader;
9+
import java.io.IOException;
10+
import java.io.InputStreamReader;
11+
import java.io.Reader;
12+
import java.nio.charset.StandardCharsets;
13+
14+
import org.junit.Before;
15+
import org.junit.Test;
16+
17+
/**
18+
* Test class that execute a particular set of tests associated to a TESCASES tag from the XML W3C Conformance Tests.
19+
* TESCASES PROFILE: <pre>Bjoern Hoehrmann via HST 2013-09-18</pre>
20+
* XML test files base folder: <pre>xmlconf/eduni/misc/</pre>
21+
*
22+
* @author <a href="mailto:belingueres@gmail.com">Gabriel Belingueres</a>
23+
*/
24+
public class eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test
25+
{
26+
27+
final static File testResourcesDir = new File("src/test/resources/", "xmlconf/eduni/misc/");
28+
29+
MXParser parser;
30+
31+
@Before
32+
public void setUp()
33+
{
34+
parser = new MXParser();
35+
}
36+
37+
/**
38+
* Test ID: <pre>hst-bh-001</pre>
39+
* Test URI: <pre>001.xml</pre>
40+
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
41+
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
42+
* Version:
43+
*
44+
* @throws IOException if there is an I/O error
45+
*/
46+
@Test
47+
public void testhst_bh_001()
48+
throws IOException
49+
{
50+
try ( Reader reader = new FileReader( new File( testResourcesDir, "001.xml" ) ) )
51+
{
52+
parser.setInput( reader );
53+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
54+
;
55+
fail( "decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
56+
}
57+
catch ( XmlPullParserException e )
58+
{
59+
assertTrue( e.getMessage().contains( "character reference (with hex value FF000000F6) is invalid" ) );
60+
}
61+
}
62+
63+
/**
64+
* Test ID: <pre>hst-bh-002</pre>
65+
* Test URI: <pre>002.xml</pre>
66+
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
67+
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
68+
* Version:
69+
*
70+
* @throws IOException if there is an I/O error
71+
*/
72+
@Test
73+
public void testhst_bh_002()
74+
throws IOException
75+
{
76+
try ( Reader reader = new FileReader( new File( testResourcesDir, "002.xml" ) ) )
77+
{
78+
parser.setInput( reader );
79+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
80+
;
81+
fail( "hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
82+
}
83+
catch ( XmlPullParserException e )
84+
{
85+
assertTrue( e.getMessage().contains( "character reference (with decimal value 4294967542) is invalid" ) );
86+
}
87+
}
88+
89+
/**
90+
* Test ID: <pre>hst-bh-003</pre>
91+
* Test URI: <pre>003.xml</pre>
92+
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
93+
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
94+
* Version:
95+
*
96+
* @throws IOException if there is an I/O error
97+
*/
98+
@Test
99+
public void testhst_bh_003()
100+
throws IOException
101+
{
102+
try ( Reader reader = new FileReader( new File( testResourcesDir, "003.xml" ) ) )
103+
{
104+
parser.setInput( reader );
105+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
106+
;
107+
fail( "decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
108+
}
109+
catch ( XmlPullParserException e )
110+
{
111+
assertTrue( e.getMessage().contains( "character reference (with hex value FFFFFFFF000000F6) is invalid" ) );
112+
}
113+
}
114+
115+
/**
116+
* Test ID: <pre>hst-bh-004</pre>
117+
* Test URI: <pre>004.xml</pre>
118+
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
119+
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
120+
* Version:
121+
*
122+
* @throws IOException if there is an I/O error
123+
*/
124+
@Test
125+
public void testhst_bh_004()
126+
throws IOException
127+
{
128+
try ( Reader reader = new FileReader( new File( testResourcesDir, "004.xml" ) ) )
129+
{
130+
parser.setInput( reader );
131+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
132+
;
133+
fail( "hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
134+
}
135+
catch ( XmlPullParserException e )
136+
{
137+
assertTrue( e.getMessage().contains( "character reference (with decimal value 18446744073709551862) is invalid" ) );
138+
}
139+
}
140+
141+
/**
142+
* Test ID: <pre>hst-bh-005</pre>
143+
* Test URI: <pre>005.xml</pre>
144+
* Comment: <pre>xmlns:xml is an attribute as far as validation is concerned and must be declared</pre>
145+
* Sections: <pre>3.1 [41]</pre>
146+
* Version:
147+
*
148+
* @throws IOException if there is an I/O error
149+
*
150+
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
151+
*/
152+
// @Test
153+
public void testhst_bh_005()
154+
throws IOException
155+
{
156+
try ( Reader reader = new FileReader( new File( testResourcesDir, "005.xml" ) ) )
157+
{
158+
parser.setInput( reader );
159+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
160+
;
161+
fail( "xmlns:xml is an attribute as far as validation is concerned and must be declared" );
162+
}
163+
catch ( XmlPullParserException e )
164+
{
165+
assertTrue( true );
166+
}
167+
}
168+
169+
/**
170+
* Test ID: <pre>hst-bh-006</pre>
171+
* Test URI: <pre>006.xml</pre>
172+
* Comment: <pre>xmlns:foo is an attribute as far as validation is concerned and must be declared</pre>
173+
* Sections: <pre>3.1 [41]</pre>
174+
* Version:
175+
*
176+
* @throws IOException if there is an I/O error
177+
*
178+
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
179+
*/
180+
// @Test
181+
public void testhst_bh_006()
182+
throws IOException
183+
{
184+
try ( Reader reader = new FileReader( new File( testResourcesDir, "006.xml" ) ) )
185+
{
186+
parser.setInput( reader );
187+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
188+
;
189+
fail( "xmlns:foo is an attribute as far as validation is concerned and must be declared" );
190+
}
191+
catch ( XmlPullParserException e )
192+
{
193+
assertTrue( true );
194+
}
195+
}
196+
197+
/**
198+
* Test ID: <pre>hst-lhs-007</pre>
199+
* Test URI: <pre>007.xml</pre>
200+
* Comment: <pre>UTF-8 BOM plus xml decl of iso-8859-1 incompatible</pre>
201+
* Sections: <pre>4.3.3</pre>
202+
* Version:
203+
*
204+
* @throws IOException if there is an I/O error
205+
*/
206+
@Test
207+
public void testhst_lhs_007()
208+
throws IOException
209+
{
210+
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
211+
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
212+
{
213+
parser.setInput( reader );
214+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
215+
;
216+
fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
217+
}
218+
catch ( XmlPullParserException e )
219+
{
220+
assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
221+
}
222+
}
223+
224+
/**
225+
* Test ID: <pre>hst-lhs-008</pre>
226+
* Test URI: <pre>008.xml</pre>
227+
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible</pre>
228+
* Sections: <pre>4.3.3</pre>
229+
* Version:
230+
*
231+
* @throws IOException if there is an I/O error
232+
*/
233+
@Test
234+
public void testhst_lhs_008()
235+
throws IOException
236+
{
237+
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
238+
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
239+
{
240+
parser.setInput( reader );
241+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
242+
;
243+
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
244+
}
245+
catch ( XmlPullParserException e )
246+
{
247+
assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
248+
}
249+
}
250+
251+
/**
252+
* Test ID: <pre>hst-lhs-009</pre>
253+
* Test URI: <pre>009.xml</pre>
254+
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible</pre>
255+
* Sections: <pre>4.3.3</pre>
256+
* Version:
257+
*
258+
* @throws IOException if there is an I/O error
259+
*/
260+
@Test
261+
public void testhst_lhs_009()
262+
throws IOException
263+
{
264+
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
265+
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
266+
{
267+
parser.setInput( reader );
268+
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
269+
;
270+
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
271+
}
272+
catch ( XmlPullParserException e )
273+
{
274+
assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
275+
}
276+
}
277+
278+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE p [
2+
<!ELEMENT p (#PCDATA)>
3+
]>
4+
<p>Fa&#xFF000000F6;il</p> <!-- 32 bit integer overflow -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE p [
2+
<!ELEMENT p (#PCDATA)>
3+
]>
4+
<p>Fa&#4294967542;il</p> <!-- 32 bit integer overflow -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE p [
2+
<!ELEMENT p (#PCDATA)>
3+
]>
4+
<p>Fa&#xFFFFFFFF000000F6;il</p> <!-- 64 bit integer overflow -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE p [
2+
<!ELEMENT p (#PCDATA)>
3+
]>
4+
<p>Fa&#18446744073709551862;il</p> <!-- 64 bit integer overflow -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<!DOCTYPE x [ <!ELEMENT x EMPTY> ]>
2+
<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>

0 commit comments

Comments
 (0)