Skip to content

Commit 48e6445

Browse files
committed
Fixed regressions:
* #163 - new case: Don't assume UTF8 as default, to allow parsing from String. * #194 - Incorrect getText() after parsing the DOCDECL section.
1 parent 3896620 commit 48e6445

File tree

4 files changed

+352
-26
lines changed

4 files changed

+352
-26
lines changed

src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

+74-26
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
124124
// private String elValue[];
125125
private int elNamespaceCount[];
126126

127-
private String fileEncoding = "UTF8";
127+
private String fileEncoding = null;
128128

129129
/**
130130
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
587587
}
588588
}
589589

590-
/**
591-
* Unknown properties are <strong>always</strong> returned as false
590+
/**
591+
* Unknown properties are <strong>always</strong> returned as false
592592
*/
593593
@Override
594594
public boolean getFeature( String name )
@@ -2677,7 +2677,15 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
26772677

26782678
private char[] charRefOneCharBuf = new char[1];
26792679

2680-
private char[] parseEntityRef()
2680+
/**
2681+
* parse Entity Ref, either a character entity or one of the predefined name entities.
2682+
*
2683+
* @return -1 if found a valid character reference, or one of the predefined character reference names
2684+
* (charRefOneCharBuf contains the replaced char). Returns the length of the found entity name, otherwise.
2685+
* @throws XmlPullParserException if invalid XML is detected.
2686+
* @throws IOException if an I/O error is found.
2687+
*/
2688+
private int parseCharOrPredefinedEntityRef()
26812689
throws XmlPullParserException, IOException
26822690
{
26832691
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
@@ -2777,12 +2785,12 @@ else if ( ch >= 'A' && ch <= 'F' )
27772785
{
27782786
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
27792787
}
2780-
return charRefOneCharBuf;
2788+
return -1;
27812789
}
27822790
else
27832791
{
27842792
// [68] EntityRef ::= '&' Name ';'
2785-
// scan anem until ;
2793+
// scan name until ;
27862794
if ( !isNameStartChar( ch ) )
27872795
{
27882796
throw new XmlPullParserException( "entity reference names can not start with character '"
@@ -2811,7 +2819,7 @@ else if ( ch >= 'A' && ch <= 'F' )
28112819
text = "<";
28122820
}
28132821
charRefOneCharBuf[0] = '<';
2814-
return charRefOneCharBuf;
2822+
return -1;
28152823
// if(paramPC || isParserTokenizing) {
28162824
// if(pcEnd >= pc.length) ensurePC();
28172825
// pc[pcEnd++] = '<';
@@ -2824,7 +2832,7 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
28242832
text = "&";
28252833
}
28262834
charRefOneCharBuf[0] = '&';
2827-
return charRefOneCharBuf;
2835+
return -1;
28282836
}
28292837
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
28302838
{
@@ -2833,7 +2841,7 @@ else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
28332841
text = ">";
28342842
}
28352843
charRefOneCharBuf[0] = '>';
2836-
return charRefOneCharBuf;
2844+
return -1;
28372845
}
28382846
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
28392847
&& buf[posStart + 3] == 's' )
@@ -2843,7 +2851,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
28432851
text = "'";
28442852
}
28452853
charRefOneCharBuf[0] = '\'';
2846-
return charRefOneCharBuf;
2854+
return -1;
28472855
}
28482856
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
28492857
&& buf[posStart + 3] == 't' )
@@ -2853,20 +2861,51 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
28532861
text = "\"";
28542862
}
28552863
charRefOneCharBuf[0] = '"';
2856-
return charRefOneCharBuf;
2857-
}
2858-
else
2859-
{
2860-
final char[] result = lookuEntityReplacement( len );
2861-
if ( result != null )
2862-
{
2863-
return result;
2864-
}
2864+
return -1;
28652865
}
2866-
if ( tokenize )
2867-
text = null;
2868-
return null;
2866+
return len; // name not found
2867+
}
2868+
}
2869+
2870+
/**
2871+
* Parse an entity reference inside the DOCDECL section.
2872+
*
2873+
* @throws XmlPullParserException if invalid XML is detected.
2874+
* @throws IOException if an I/O error is found.
2875+
*/
2876+
private void parseEntityRefInDocDecl()
2877+
throws XmlPullParserException, IOException
2878+
{
2879+
final int len = parseCharOrPredefinedEntityRef();
2880+
if ( len < 0 )
2881+
return;
2882+
if ( tokenize )
2883+
text = null;
2884+
}
2885+
2886+
/**
2887+
* Parse an entity reference inside a tag or attribute.
2888+
*
2889+
* @return the char array with the replaced character entity, the replaced custom entity, or null if no replacement
2890+
* could be found.
2891+
* @throws XmlPullParserException if invalid XML is detected.
2892+
* @throws IOException if an I/O error is found.
2893+
*/
2894+
private char[] parseEntityRef()
2895+
throws XmlPullParserException, IOException
2896+
{
2897+
final int len = parseCharOrPredefinedEntityRef();
2898+
if ( len < 0 )
2899+
return charRefOneCharBuf;
2900+
2901+
final char[] result = lookuEntityReplacement( len );
2902+
if ( result != null )
2903+
{
2904+
return result;
28692905
}
2906+
if ( tokenize )
2907+
text = null;
2908+
return null;
28702909
}
28712910

28722911
/**
@@ -2883,8 +2922,6 @@ private static boolean isValidCodePoint( int codePoint )
28832922
}
28842923

28852924
private char[] lookuEntityReplacement( int entityNameLen )
2886-
throws XmlPullParserException, IOException
2887-
28882925
{
28892926
if ( !allStringsInterned )
28902927
{
@@ -2977,7 +3014,7 @@ else if (isValidCodePoint( ch ))
29773014
}
29783015
else
29793016
{
2980-
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
3017+
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString((ch)) + " found in comment", this, null );
29813018
}
29823019
if ( normalizeIgnorableWS )
29833020
{
@@ -3484,7 +3521,7 @@ else if ( ch == '>' && bracketLevel == 0 )
34843521
break;
34853522
else if ( ch == '&' )
34863523
{
3487-
extractEntityRef();
3524+
extractEntityRefInDocDecl();
34883525
}
34893526
if ( normalizeIgnorableWS )
34903527
{
@@ -3538,6 +3575,17 @@ else if ( ch == '\n' )
35383575
posEnd = pos - 1;
35393576
}
35403577

3578+
private void extractEntityRefInDocDecl()
3579+
throws XmlPullParserException, IOException
3580+
{
3581+
// extractEntityRef
3582+
posEnd = pos - 1;
3583+
3584+
int prevPosStart = posStart;
3585+
parseEntityRefInDocDecl();
3586+
posStart = prevPosStart;
3587+
}
3588+
35413589
private void extractEntityRef()
35423590
throws XmlPullParserException, IOException
35433591
{

0 commit comments

Comments
 (0)