Skip to content

Commit 761ac42

Browse files
authored
Fixed MXParser do not fail when encountering invalid characters in comments (#126) (#127)
* Fixed MXParser do not fail when encountering invalid characters in comments (#126) * Force tests testibm_not_wf_P02_ibm02n32xml and testibm_not_wf_P02_ibm02n33xml to open XML file with UTF-8 encoding, since Windows default encoding (cp1252) decodes another char. fix #126
1 parent 60bee8a commit 761ac42

35 files changed

+1114
-1
lines changed

src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

+18-1
Original file line numberDiff line numberDiff line change
@@ -2865,6 +2865,19 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
28652865
}
28662866
}
28672867

2868+
/**
2869+
* Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
2870+
*
2871+
* @param codePoint the numeric value to check
2872+
* @return true if it is a valid numeric character reference. False otherwise.
2873+
*/
2874+
private static boolean isValidCodePoint( int codePoint )
2875+
{
2876+
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
2877+
return codePoint == 0x9 || codePoint == 0xA || codePoint == 0xD || ( 0x20 <= codePoint && codePoint <= 0xD7FF )
2878+
|| ( 0xE000 <= codePoint && codePoint <= 0xFFFD ) || ( 0x10000 <= codePoint && codePoint <= 0x10FFFF );
2879+
}
2880+
28682881
private char[] lookuEntityReplacement( int entityNameLen )
28692882
throws XmlPullParserException, IOException
28702883

@@ -2954,10 +2967,14 @@ else if ( ch == '>' )
29542967
}
29552968
seenDash = false;
29562969
}
2957-
else
2970+
else if (isValidCodePoint( ch ))
29582971
{
29592972
seenDash = false;
29602973
}
2974+
else
2975+
{
2976+
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
2977+
}
29612978
if ( normalizeIgnorableWS )
29622979
{
29632980
if ( ch == '\r' )

0 commit comments

Comments
 (0)