@@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
124
124
// private String elValue[];
125
125
private int elNamespaceCount [];
126
126
127
- private String fileEncoding = "UTF8" ;
127
+ private String fileEncoding = null ;
128
128
129
129
/**
130
130
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
587
587
}
588
588
}
589
589
590
- /**
591
- * Unknown properties are <strong>always</strong> returned as false
590
+ /**
591
+ * Unknown properties are <strong>always</strong> returned as false
592
592
*/
593
593
@ Override
594
594
public boolean getFeature ( String name )
@@ -1596,11 +1596,11 @@ else if ( ch == '&' )
1596
1596
}
1597
1597
final int oldStart = posStart + bufAbsoluteStart ;
1598
1598
final int oldEnd = posEnd + bufAbsoluteStart ;
1599
- final char [] resolvedEntity = parseEntityRef ();
1599
+ parseEntityRef ();
1600
1600
if ( tokenize )
1601
1601
return eventType = ENTITY_REF ;
1602
1602
// check if replacement text can be resolved !!!
1603
- if ( resolvedEntity == null )
1603
+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
1604
1604
{
1605
1605
if ( entityRefName == null )
1606
1606
{
@@ -1628,7 +1628,7 @@ else if ( ch == '&' )
1628
1628
}
1629
1629
// assert usePC == true;
1630
1630
// write into PC replacement text - do merge for replacement text!!!!
1631
- for ( char aResolvedEntity : resolvedEntity )
1631
+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
1632
1632
{
1633
1633
if ( pcEnd >= pc .length )
1634
1634
{
@@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
2675
2675
return ch ;
2676
2676
}
2677
2677
2678
- private char [] charRefOneCharBuf = new char [1 ];
2678
+ // state representing that no entity ref have been resolved
2679
+ private static final char [] BUF_NOT_RESOLVED = new char [0 ];
2680
+
2681
+ // predefined entity refs
2682
+ private static final char [] BUF_LT = new char [] { '<' };
2683
+ private static final char [] BUF_AMP = new char [] { '&' };
2684
+ private static final char [] BUF_GT = new char [] { '>' };
2685
+ private static final char [] BUF_APO = new char [] { '\'' };
2686
+ private static final char [] BUF_QUOT = new char [] { '"' };
2679
2687
2680
- private char [] parseEntityRef ()
2688
+ private char [] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED ;
2689
+
2690
+ /**
2691
+ * parse Entity Ref, either a character entity or one of the predefined name entities.
2692
+ *
2693
+ * @return the length of the valid found character reference, which may be one of the predefined character reference
2694
+ * names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity
2695
+ * name, otherwise.
2696
+ * @throws XmlPullParserException if invalid XML is detected.
2697
+ * @throws IOException if an I/O error is found.
2698
+ */
2699
+ private int parseCharOrPredefinedEntityRef ()
2681
2700
throws XmlPullParserException , IOException
2682
2701
{
2683
2702
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
@@ -2686,6 +2705,8 @@ private char[] parseEntityRef()
2686
2705
// ASSUMPTION just after &
2687
2706
entityRefName = null ;
2688
2707
posStart = pos ;
2708
+ int len = 0 ;
2709
+ resolvedEntityRefCharBuf = BUF_NOT_RESOLVED ;
2689
2710
char ch = more ();
2690
2711
if ( ch == '#' )
2691
2712
{
@@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' )
2750
2771
ch = more ();
2751
2772
}
2752
2773
}
2753
- posEnd = pos - 1 ;
2754
2774
2755
2775
boolean isValidCodePoint = true ;
2756
2776
try
@@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' )
2759
2779
isValidCodePoint = isValidCodePoint ( codePoint );
2760
2780
if ( isValidCodePoint )
2761
2781
{
2762
- charRefOneCharBuf = Character .toChars ( codePoint );
2782
+ resolvedEntityRefCharBuf = Character .toChars ( codePoint );
2763
2783
}
2764
2784
}
2765
2785
catch ( IllegalArgumentException e )
@@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' )
2775
2795
2776
2796
if ( tokenize )
2777
2797
{
2778
- text = newString ( charRefOneCharBuf , 0 , charRefOneCharBuf .length );
2798
+ text = newString ( resolvedEntityRefCharBuf , 0 , resolvedEntityRefCharBuf .length );
2779
2799
}
2780
- return charRefOneCharBuf ;
2800
+ len = resolvedEntityRefCharBuf . length ;
2781
2801
}
2782
2802
else
2783
2803
{
2784
2804
// [68] EntityRef ::= '&' Name ';'
2785
- // scan anem until ;
2805
+ // scan name until ;
2786
2806
if ( !isNameStartChar ( ch ) )
2787
2807
{
2788
2808
throw new XmlPullParserException ( "entity reference names can not start with character '"
@@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' )
2801
2821
+ printable ( ch ) + "'" , this , null );
2802
2822
}
2803
2823
}
2804
- posEnd = pos - 1 ;
2805
2824
// determine what name maps to
2806
- final int len = posEnd - posStart ;
2825
+ len = ( pos - 1 ) - posStart ;
2807
2826
if ( len == 2 && buf [posStart ] == 'l' && buf [posStart + 1 ] == 't' )
2808
2827
{
2809
2828
if ( tokenize )
2810
2829
{
2811
2830
text = "<" ;
2812
2831
}
2813
- charRefOneCharBuf [0 ] = '<' ;
2814
- return charRefOneCharBuf ;
2832
+ resolvedEntityRefCharBuf = BUF_LT ;
2815
2833
// if(paramPC || isParserTokenizing) {
2816
2834
// if(pcEnd >= pc.length) ensurePC();
2817
2835
// pc[pcEnd++] = '<';
@@ -2823,17 +2841,15 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
2823
2841
{
2824
2842
text = "&" ;
2825
2843
}
2826
- charRefOneCharBuf [0 ] = '&' ;
2827
- return charRefOneCharBuf ;
2844
+ resolvedEntityRefCharBuf = BUF_AMP ;
2828
2845
}
2829
2846
else if ( len == 2 && buf [posStart ] == 'g' && buf [posStart + 1 ] == 't' )
2830
2847
{
2831
2848
if ( tokenize )
2832
2849
{
2833
2850
text = ">" ;
2834
2851
}
2835
- charRefOneCharBuf [0 ] = '>' ;
2836
- return charRefOneCharBuf ;
2852
+ resolvedEntityRefCharBuf = BUF_GT ;
2837
2853
}
2838
2854
else if ( len == 4 && buf [posStart ] == 'a' && buf [posStart + 1 ] == 'p' && buf [posStart + 2 ] == 'o'
2839
2855
&& buf [posStart + 3 ] == 's' )
@@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
2842
2858
{
2843
2859
text = "'" ;
2844
2860
}
2845
- charRefOneCharBuf [0 ] = '\'' ;
2846
- return charRefOneCharBuf ;
2861
+ resolvedEntityRefCharBuf = BUF_APO ;
2847
2862
}
2848
2863
else if ( len == 4 && buf [posStart ] == 'q' && buf [posStart + 1 ] == 'u' && buf [posStart + 2 ] == 'o'
2849
2864
&& buf [posStart + 3 ] == 't' )
@@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
2852
2867
{
2853
2868
text = "\" " ;
2854
2869
}
2855
- charRefOneCharBuf [0 ] = '"' ;
2856
- return charRefOneCharBuf ;
2857
- }
2858
- else
2859
- {
2860
- final char [] result = lookuEntityReplacement ( len );
2861
- if ( result != null )
2862
- {
2863
- return result ;
2864
- }
2870
+ resolvedEntityRefCharBuf = BUF_QUOT ;
2865
2871
}
2866
- if ( tokenize )
2867
- text = null ;
2868
- return null ;
2869
2872
}
2873
+
2874
+ posEnd = pos ;
2875
+
2876
+ return len ;
2877
+ }
2878
+
2879
+ /**
2880
+ * Parse an entity reference inside the DOCDECL section.
2881
+ *
2882
+ * @throws XmlPullParserException if invalid XML is detected.
2883
+ * @throws IOException if an I/O error is found.
2884
+ */
2885
+ private void parseEntityRefInDocDecl ()
2886
+ throws XmlPullParserException , IOException
2887
+ {
2888
+ parseCharOrPredefinedEntityRef ();
2889
+ if (usePC ) {
2890
+ posStart --; // include in PC the starting '&' of the entity
2891
+ joinPC ();
2892
+ }
2893
+
2894
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2895
+ return ;
2896
+ if ( tokenize )
2897
+ text = null ;
2898
+ }
2899
+
2900
+ /**
2901
+ * Parse an entity reference inside a tag or attribute.
2902
+ *
2903
+ * @throws XmlPullParserException if invalid XML is detected.
2904
+ * @throws IOException if an I/O error is found.
2905
+ */
2906
+ private void parseEntityRef ()
2907
+ throws XmlPullParserException , IOException
2908
+ {
2909
+ final int len = parseCharOrPredefinedEntityRef ();
2910
+
2911
+ posEnd --; // don't involve the final ';' from the entity in the search
2912
+
2913
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) {
2914
+ return ;
2915
+ }
2916
+
2917
+ resolvedEntityRefCharBuf = lookuEntityReplacement ( len );
2918
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2919
+ {
2920
+ return ;
2921
+ }
2922
+ if ( tokenize )
2923
+ text = null ;
2870
2924
}
2871
2925
2872
2926
/**
2873
- * Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
2927
+ * Check if the provided parameter is a valid Char. According to
2928
+ * <a href="https://www.w3.org/TR/REC-xml/#NT-Char">https://www.w3.org/TR/REC-xml/#NT-Char</a>
2874
2929
*
2875
2930
* @param codePoint the numeric value to check
2876
2931
* @return true if it is a valid numeric character reference. False otherwise.
@@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint )
2883
2938
}
2884
2939
2885
2940
private char [] lookuEntityReplacement ( int entityNameLen )
2886
- throws XmlPullParserException , IOException
2887
-
2888
2941
{
2889
2942
if ( !allStringsInterned )
2890
2943
{
@@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen )
2919
2972
}
2920
2973
}
2921
2974
}
2922
- return null ;
2975
+ return BUF_NOT_RESOLVED ;
2923
2976
}
2924
2977
2925
2978
private void parseComment ()
@@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch ))
2977
3030
}
2978
3031
else
2979
3032
{
2980
- throw new XmlPullParserException ( "Illegal character 0x" + Integer .toHexString ((( int ) ch ) ) + " found in comment" , this , null );
3033
+ throw new XmlPullParserException ( "Illegal character 0x" + Integer .toHexString (ch ) + " found in comment" , this , null );
2981
3034
}
2982
3035
if ( normalizeIgnorableWS )
2983
3036
{
@@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 )
3484
3537
break ;
3485
3538
else if ( ch == '&' )
3486
3539
{
3487
- extractEntityRef ();
3540
+ extractEntityRefInDocDecl ();
3541
+ continue ;
3488
3542
}
3489
3543
if ( normalizeIgnorableWS )
3490
3544
{
@@ -3536,6 +3590,19 @@ else if ( ch == '\n' )
3536
3590
3537
3591
}
3538
3592
posEnd = pos - 1 ;
3593
+ text = null ;
3594
+ }
3595
+
3596
+ private void extractEntityRefInDocDecl ()
3597
+ throws XmlPullParserException , IOException
3598
+ {
3599
+ // extractEntityRef
3600
+ posEnd = pos - 1 ;
3601
+
3602
+ int prevPosStart = posStart ;
3603
+ parseEntityRefInDocDecl ();
3604
+
3605
+ posStart = prevPosStart ;
3539
3606
}
3540
3607
3541
3608
private void extractEntityRef ()
@@ -3559,9 +3626,9 @@ private void extractEntityRef()
3559
3626
}
3560
3627
// assert usePC == true;
3561
3628
3562
- final char [] resolvedEntity = parseEntityRef ();
3629
+ parseEntityRef ();
3563
3630
// check if replacement text can be resolved !!!
3564
- if ( resolvedEntity == null )
3631
+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
3565
3632
{
3566
3633
if ( entityRefName == null )
3567
3634
{
@@ -3571,7 +3638,7 @@ private void extractEntityRef()
3571
3638
+ "'" , this , null );
3572
3639
}
3573
3640
// write into PC replacement text - do merge for replacement text!!!!
3574
- for ( char aResolvedEntity : resolvedEntity )
3641
+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
3575
3642
{
3576
3643
if ( pcEnd >= pc .length )
3577
3644
{
0 commit comments