codehaus-plexus
diff --git a/‎src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+43-8 b/‎src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+43-8
diff --git a/‎src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
+278 b/‎src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
+278
diff --git a/‎src/test/resources/xmlconf/eduni/misc/001.xml
+4 b/‎src/test/resources/xmlconf/eduni/misc/001.xml
+4
diff --git a/‎src/test/resources/xmlconf/eduni/misc/002.xml
+4 b/‎src/test/resources/xmlconf/eduni/misc/002.xml
+4
diff --git a/‎src/test/resources/xmlconf/eduni/misc/003.xml
+4 b/‎src/test/resources/xmlconf/eduni/misc/003.xml
+4
diff --git a/‎src/test/resources/xmlconf/eduni/misc/004.xml
+4 b/‎src/test/resources/xmlconf/eduni/misc/004.xml
+4
diff --git a/‎src/test/resources/xmlconf/eduni/misc/005.xml
+2 b/‎src/test/resources/xmlconf/eduni/misc/005.xml
+2
@@ -11,6 +11,7 @@
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 
@@ -122,6 +123,8 @@ private String newStringIntern( char[] cbuf, int off, int len )
     // private String elValue[];
     private int elNamespaceCount[];
 
+    private String fileEncoding = "UTF8";
+
     /**
      * Make sure that we have enough space to keep element stack if passed size. It will always create one additional
      * slot then current depth
@@ -659,6 +662,15 @@ public void setInput( Reader in )
     {
         reset();
         reader = in;
+
+        if ( reader instanceof InputStreamReader )
+        {
+            InputStreamReader isr = (InputStreamReader) reader;
+            if ( isr.getEncoding() != null )
+            {
+                fileEncoding = isr.getEncoding().toUpperCase();
+            }
+        }
     }
 
     @Override
@@ -1771,6 +1783,17 @@ private int parseProlog()
                 // skipping UNICODE int Order Mark (so called BOM)
                 ch = more();
             }
+            else if ( ch == '\uFFFD' )
+            {
+                // UTF-16 BOM in an UTF-8 encoded file?
+                // This is a hack...not the best way to check for BOM in UTF-16
+                ch = more();
+                if ( ch == '\uFFFD' )
+                {
+                    throw new XmlPullParserException( "UTF-16 BOM in a UTF-8 encoded file is incompatible", this,
+                                                      null );
+                }
+            }
         }
         seenMarkup = false;
         boolean gotS = false;
@@ -2723,18 +2746,19 @@ else if ( ch >= 'A' && ch <= 'F' )
             }
             posEnd = pos - 1;
 
-            int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
-            boolean isValidCodePoint = isValidCodePoint( codePoint );
-            if ( isValidCodePoint )
+            boolean isValidCodePoint = true;
+            try
             {
-                try
+                int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
+                isValidCodePoint = isValidCodePoint( codePoint );
+                if ( isValidCodePoint )
                 {
                     charRefOneCharBuf = Character.toChars( codePoint );
                 }
-                catch ( IllegalArgumentException e )
-                {
-                    isValidCodePoint = false;
-                }
+            }
+            catch ( IllegalArgumentException e )
+            {
+                isValidCodePoint = false;
             }
 
             if ( !isValidCodePoint )
@@ -3328,6 +3352,17 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
 
             // TODO reconcile with setInput encodingName
             inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );
+
+            if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
+            {
+                throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
+                                                  this, null );
+            }
+            else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
+            {
+                throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
+                                                  this, null );
+            }
         }
 
         ch = more();
 
@@ -0,0 +1,278 @@
+package org.codehaus.plexus.util.xml.pull;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test class that execute a particular set of tests associated to a TESCASES tag from the XML W3C Conformance Tests.
+ * TESCASES PROFILE: <pre>Bjoern Hoehrmann via HST 2013-09-18</pre>
+ * XML test files base folder: <pre>xmlconf/eduni/misc/</pre>
+ *
+ * @author <a href="mailto:belingueres@gmail.com">Gabriel Belingueres</a>
+ */
+public class eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test
+{
+
+    final static File testResourcesDir = new File("src/test/resources/", "xmlconf/eduni/misc/");
+
+    MXParser parser;
+
+    @Before
+    public void setUp()
+    {
+        parser = new MXParser();
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-001</pre>
+     * Test URI: <pre>001.xml</pre>
+     * Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
+     * Sections: <pre>2.2 [2], 4.1 [66]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_bh_001()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "001.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with hex value FF000000F6) is invalid" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-002</pre>
+     * Test URI: <pre>002.xml</pre>
+     * Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
+     * Sections: <pre>2.2 [2], 4.1 [66]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_bh_002()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "002.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with decimal value 4294967542) is invalid" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-003</pre>
+     * Test URI: <pre>003.xml</pre>
+     * Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
+     * Sections: <pre>2.2 [2], 4.1 [66]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_bh_003()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "003.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with hex value FFFFFFFF000000F6) is invalid" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-004</pre>
+     * Test URI: <pre>004.xml</pre>
+     * Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
+     * Sections: <pre>2.2 [2], 4.1 [66]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_bh_004()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "004.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with decimal value 18446744073709551862) is invalid" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-005</pre>
+     * Test URI: <pre>005.xml</pre>
+     * Comment: <pre>xmlns:xml is an attribute as far as validation is concerned and must be declared</pre>
+     * Sections: <pre>3.1 [41]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     *
+     * NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
+     */
+    // @Test
+    public void testhst_bh_005()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "005.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "xmlns:xml is an attribute as far as validation is concerned and must be declared" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( true );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-bh-006</pre>
+     * Test URI: <pre>006.xml</pre>
+     * Comment: <pre>xmlns:foo is an attribute as far as validation is concerned and must be declared</pre>
+     * Sections: <pre>3.1 [41]</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     *
+     * NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
+     */
+    // @Test
+    public void testhst_bh_006()
+        throws IOException
+    {
+        try ( Reader reader = new FileReader( new File( testResourcesDir, "006.xml" ) ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "xmlns:foo is an attribute as far as validation is concerned and must be declared" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( true );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-lhs-007</pre>
+     * Test URI: <pre>007.xml</pre>
+     * Comment: <pre>UTF-8 BOM plus xml decl of iso-8859-1 incompatible</pre>
+     * Sections: <pre>4.3.3</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_lhs_007()
+        throws IOException
+    {
+        try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
+                        InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-lhs-008</pre>
+     * Test URI: <pre>008.xml</pre>
+     * Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible</pre>
+     * Sections: <pre>4.3.3</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_lhs_008()
+        throws IOException
+    {
+        try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
+                        InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
+        {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
+        }
+    }
+
+    /**
+     * Test ID: <pre>hst-lhs-009</pre>
+     * Test URI: <pre>009.xml</pre>
+     * Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible</pre>
+     * Sections: <pre>4.3.3</pre>
+     * Version:
+     *
+     * @throws IOException if there is an I/O error
+     */
+    @Test
+    public void testhst_lhs_009()
+        throws IOException
+    {
+        try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
+                        InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
+       {
+            parser.setInput( reader );
+            while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+                ;
+            fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
+        }
+    }
+
+}
@@ -0,0 +1,4 @@
+<!DOCTYPE p [
+<!ELEMENT p (#PCDATA)>
+]>
+<p>Fa&#xFF000000F6;il</p>          <!-- 32 bit integer overflow -->
@@ -0,0 +1,4 @@
+<!DOCTYPE p [
+<!ELEMENT p (#PCDATA)>
+]>
+<p>Fa&#4294967542;il</p>           <!-- 32 bit integer overflow -->
@@ -0,0 +1,4 @@
+<!DOCTYPE p [
+<!ELEMENT p (#PCDATA)>
+]>
+<p>Fa&#xFFFFFFFF000000F6;il</p>    <!-- 64 bit integer overflow -->
@@ -0,0 +1,4 @@
+<!DOCTYPE p [
+<!ELEMENT p (#PCDATA)>
+]>
+<p>Fa&#18446744073709551862;il</p> <!-- 64 bit integer overflow -->
@@ -0,0 +1,2 @@
+<!DOCTYPE x [ <!ELEMENT x EMPTY> ]>
+<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+<!DOCTYPE x [ <!ELEMENT x EMPTY> ]>`
	`2`	`+<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>`