Skip to content

Commit 7d23382

Browse files
committed
Fix MXParser fails to parse xml declaration properly (#138)
- Fix bugs. - Added tests. - Improved error messages.
1 parent b52d0e5 commit 7d23382

File tree

3 files changed

+68
-15
lines changed

3 files changed

+68
-15
lines changed

src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

+18-7
Original file line numberDiff line numberDiff line change
@@ -3296,6 +3296,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
32963296
}
32973297
xmlDeclVersion = newString( buf, versionStart, versionEnd - versionStart );
32983298

3299+
String lastParsedAttr = "version";
3300+
32993301
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
33003302
char ch = more();
33013303
char prevCh = ch;
@@ -3310,8 +3312,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
33103312
{
33113313
if ( !isS( prevCh ) )
33123314
{
3313-
throw new XmlPullParserException( "expected a space after version and not " + printable( ch ), this,
3314-
null );
3315+
throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not "
3316+
+ printable( ch ), this, null );
33153317
}
33163318
ch = more();
33173319
ch = requireInput( ch, NCODING );
@@ -3363,13 +3365,23 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF
33633365
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
33643366
this, null );
33653367
}
3368+
3369+
lastParsedAttr = "encoding";
3370+
3371+
ch = more();
3372+
prevCh = ch;
3373+
ch = skipS( ch );
33663374
}
33673375

3368-
ch = more();
3369-
ch = skipS( ch );
33703376
// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
33713377
if ( ch == 's' )
33723378
{
3379+
if ( !isS( prevCh ) )
3380+
{
3381+
throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not "
3382+
+ printable( ch ), this, null );
3383+
}
3384+
33733385
ch = more();
33743386
ch = requireInput( ch, TANDALONE );
33753387
ch = skipS( ch );
@@ -3382,11 +3394,10 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF
33823394
ch = skipS( ch );
33833395
if ( ch != '\'' && ch != '"' )
33843396
{
3385-
throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after encoding and not "
3397+
throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after standalone and not "
33863398
+ printable( ch ), this, null );
33873399
}
33883400
char quotChar = ch;
3389-
int standaloneStart = pos;
33903401
ch = more();
33913402
if ( ch == 'y' )
33923403
{
@@ -3411,9 +3422,9 @@ else if ( ch == 'n' )
34113422
+ printable( ch ), this, null );
34123423
}
34133424
ch = more();
3425+
ch = skipS( ch );
34143426
}
34153427

3416-
ch = skipS( ch );
34173428
if ( ch != '?' )
34183429
{
34193430
throw new XmlPullParserException( "expected ?> as last part of <?xml not " + printable( ch ), this, null );

src/test/java/org/codehaus/plexus/util/xml/pull/IBMXML10Tests_Test_IBMXMLConformanceTestSuite_not_wftests_Test_IBMXMLConformanceTestSuite_Production32_Test.java

+10-8
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public void testibm_not_wf_P32_ibm32n01xml()
5353
}
5454
catch ( XmlPullParserException e )
5555
{
56-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
56+
assertTrue( e.getMessage().contains( "expected a space after version and not s" ) );
5757
}
5858
}
5959

@@ -79,7 +79,7 @@ public void testibm_not_wf_P32_ibm32n02xml()
7979
}
8080
catch ( XmlPullParserException e )
8181
{
82-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
82+
assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) );
8383
}
8484
}
8585

@@ -131,7 +131,7 @@ public void testibm_not_wf_P32_ibm32n04xml()
131131
}
132132
catch ( XmlPullParserException e )
133133
{
134-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
134+
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) );
135135
}
136136
}
137137

@@ -158,7 +158,7 @@ public void testibm_not_wf_P32_ibm32n05xml()
158158
}
159159
catch ( XmlPullParserException e )
160160
{
161-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
161+
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) );
162162
}
163163
}
164164

@@ -184,7 +184,7 @@ public void testibm_not_wf_P32_ibm32n06xml()
184184
}
185185
catch ( XmlPullParserException e )
186186
{
187-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
187+
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) );
188188
}
189189
}
190190

@@ -210,7 +210,7 @@ public void testibm_not_wf_P32_ibm32n07xml()
210210
}
211211
catch ( XmlPullParserException e )
212212
{
213-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
213+
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) );
214214
}
215215
}
216216

@@ -236,7 +236,7 @@ public void testibm_not_wf_P32_ibm32n08xml()
236236
}
237237
catch ( XmlPullParserException e )
238238
{
239-
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
239+
assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) );
240240
}
241241
}
242242

@@ -248,8 +248,10 @@ public void testibm_not_wf_P32_ibm32n08xml()
248248
* Version:
249249
*
250250
* @throws IOException if there is an I/O error
251+
*
252+
* NOTE: This test is SKIPPED as MXParser does not support parsing inside DOCTYPEDECL.
251253
*/
252-
@Test
254+
// @Test
253255
public void testibm_not_wf_P32_ibm32n09xml()
254256
throws IOException
255257
{

src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java

+40
Original file line numberDiff line numberDiff line change
@@ -661,4 +661,44 @@ public void testMalformedXMLRootElement5()
661661
}
662662
}
663663

664+
@Test
665+
public void testXMLDeclVersionOnly()
666+
throws Exception
667+
{
668+
String input = "<?xml version='1.0'?><hello/>";
669+
670+
MXParser parser = new MXParser();
671+
parser.setInput( new StringReader( input ) );
672+
673+
try
674+
{
675+
assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() );
676+
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
677+
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
678+
}
679+
catch ( Exception e )
680+
{
681+
fail( "Should not throw Exception" );
682+
}
683+
}
684+
685+
@Test
686+
public void testXMLDeclVersionEncodingStandaloneNoSpace()
687+
throws Exception
688+
{
689+
String input = "<?xml version='1.0' encoding='ASCII'standalone='yes'?><hello/>";
690+
691+
MXParser parser = new MXParser();
692+
parser.setInput( new StringReader( input ) );
693+
694+
try
695+
{
696+
parser.nextToken();
697+
}
698+
catch ( XmlPullParserException e )
699+
{
700+
assertTrue( e.getMessage().contains( "expected a space after encoding and not s" ));
701+
}
702+
}
703+
664704
}

0 commit comments

Comments
 (0)