Skip to content

Commit b074384

Browse files
authored
Fix use of a mismatching unicode path extra field in zip unarchiving
The zip spec (§4.6.9) says that a unicode path extra field is only to be used when its CRC matches the current file name. Using it unconditionally, as introduced in a080e0a, is wrong. Commons.compress already does it right internally, no need to do anything here. The bug affected extracting until 2aec2ba as a side effect replaced use of fileInfo.getName() by ze.getName(); recently only direct use of FileInfo, e.g. in file selectors, was affected. Tests for proper use of unicode path extra fields in zip unarchiving. Two test files included, one with the EFS bit (language encoding flag) set, the other without. Only efsclear.zip is used in tests because the zip spec does not specify which prevails when both the bit is set and an extra field is present - plexus-archiver and all other unarchivers I've tried ignore the extra field in that case.
1 parent e3a2cb6 commit b074384

File tree

7 files changed

+143
-14
lines changed

7 files changed

+143
-14
lines changed

src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java

+1-14
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,7 @@ private static class ZipEntryFileInfo
102102

103103
public String getName()
104104
{
105-
try
106-
{
107-
final UnicodePathExtraField unicodePath =
108-
(UnicodePathExtraField) zipEntry.getExtraField( UnicodePathExtraField.UPATH_ID );
109-
110-
return unicodePath != null
111-
? new String( unicodePath.getUnicodeName(), "UTF-8" )
112-
: zipEntry.getName();
113-
114-
}
115-
catch ( final UnsupportedEncodingException e )
116-
{
117-
throw new AssertionError( e );
118-
}
105+
return zipEntry.getName();
119106
}
120107

121108
@Override

src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java

+19
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.InputStream;
66
import java.io.InputStreamReader;
77
import java.net.URL;
8+
import java.util.Arrays;
89
import java.util.HashMap;
910
import java.util.HashSet;
1011
import java.util.Iterator;
@@ -122,4 +123,22 @@ public void testSymlinkEntries()
122123
assertTrue( symLinks.isEmpty() );
123124
}
124125

126+
public void testUnarchiveUnicodePathExtra()
127+
throws Exception
128+
{
129+
PlexusIoZipFileResourceCollection prc = new PlexusIoZipFileResourceCollection();
130+
prc.setFile( getTestFile( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
131+
Set<String> names = new HashSet<>();
132+
final Iterator<PlexusIoResource> entries = prc.getEntries();
133+
while ( entries.hasNext() )
134+
{
135+
final PlexusIoResource next = entries.next();
136+
names.add(next.getName());
137+
}
138+
// a Unicode Path extra field should only be used when its CRC matches the header file name
139+
assertEquals( "should use good extra fields but not bad ones",
140+
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
141+
names );
142+
}
143+
125144
}

src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java

+61
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
package org.codehaus.plexus.archiver.zip;
22

33
import java.io.File;
4+
import java.io.IOException;
45
import java.lang.reflect.Method;
6+
import java.util.Arrays;
7+
import java.util.HashSet;
8+
import java.util.Set;
9+
510
import org.codehaus.plexus.PlexusTestCase;
611
import org.codehaus.plexus.archiver.Archiver;
712
import org.codehaus.plexus.archiver.UnArchiver;
13+
import org.codehaus.plexus.components.io.fileselectors.FileInfo;
814
import org.codehaus.plexus.components.io.fileselectors.FileSelector;
915
import org.codehaus.plexus.components.io.fileselectors.IncludeExcludeFileSelector;
1016
import org.codehaus.plexus.util.FileUtils;
@@ -88,6 +94,61 @@ public void testUnarchiveUtf8()
8894
assertTrue( new File( dest, "\u20acuro.txt" ).exists() );
8995
}
9096

97+
public void testUnarchiveUnicodePathExtra()
98+
throws Exception
99+
{
100+
File dest = new File( "target/output/unzip/unicodePathExtra" );
101+
dest.mkdirs();
102+
for ( String name : dest.list() )
103+
{
104+
new File( dest, name ).delete();
105+
}
106+
assertEquals( 0, dest.list().length );
107+
108+
final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
109+
unarchiver.setDestFile( dest );
110+
unarchiver.extract();
111+
// a Unicode Path extra field should only be used when its CRC matches the header file name
112+
assertEquals( "should use good extra fields but not bad ones",
113+
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
114+
new HashSet<>( Arrays.asList( dest.list() ) ) );
115+
}
116+
117+
public void testUnarchiveUnicodePathExtraSelector()
118+
throws Exception
119+
{
120+
File dest = new File( "target/output/unzip/unicodePathExtraSelector" );
121+
dest.mkdirs();
122+
for ( String name : dest.list() )
123+
{
124+
new File( dest, name ).delete();
125+
}
126+
assertEquals( 0, dest.list().length );
127+
128+
class CollectingSelector implements FileSelector
129+
{
130+
public Set<String> collection = new HashSet<>();
131+
@Override
132+
public boolean isSelected( FileInfo fileInfo ) throws IOException
133+
{
134+
collection.add( fileInfo.getName() );
135+
return false;
136+
}
137+
}
138+
CollectingSelector selector = new CollectingSelector();
139+
140+
final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
141+
unarchiver.setDestFile( dest );
142+
unarchiver.setFileSelectors( new FileSelector[] { selector } );
143+
unarchiver.extract();
144+
145+
assertEquals( "should not extract anything", 0, dest.list().length );
146+
// a Unicode Path extra field should only be used when its CRC matches the header file name
147+
assertEquals( "should use good extra fields but not bad ones",
148+
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
149+
selector.collection );
150+
}
151+
91152
private void runUnarchiver( String path, FileSelector[] selectors, boolean[] results )
92153
throws Exception
93154
{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import java.io.File;
2+
import java.io.IOException;
3+
import java.nio.charset.Charset;
4+
5+
import org.apache.commons.compress.archivers.zip.UnicodePathExtraField;
6+
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
7+
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
8+
9+
public class GenerateZips {
10+
11+
private static void generate(String outfilename, boolean languageEncodingFlag) {
12+
try {
13+
ZipArchiveOutputStream zs = new ZipArchiveOutputStream(new File(outfilename));
14+
zs.setUseLanguageEncodingFlag(languageEncodingFlag);
15+
zs.setMethod(ZipArchiveOutputStream.STORED);
16+
ZipArchiveEntry ze = new ZipArchiveEntry("nameonly-name");
17+
ze.setTime(0);
18+
zs.putArchiveEntry(ze);
19+
//zs.write(nothing);
20+
zs.closeArchiveEntry();
21+
ze = new ZipArchiveEntry("goodextra-name");
22+
ze.setTime(0);
23+
ze.addExtraField(new UnicodePathExtraField("goodextra-extra", "goodextra-name".getBytes(Charset.forName("UTF-8"))));
24+
zs.putArchiveEntry(ze);
25+
//zs.write(nothing);
26+
zs.closeArchiveEntry();
27+
ze = new ZipArchiveEntry("badextra-name");
28+
ze.setTime(0);
29+
ze.addExtraField(new UnicodePathExtraField("badextra-extra", "bogus".getBytes(Charset.forName("UTF-8"))));
30+
zs.putArchiveEntry(ze);
31+
//zs.write(nothing);
32+
zs.closeArchiveEntry();
33+
zs.finish();
34+
zs.close();
35+
} catch (IOException e) {
36+
e.printStackTrace();
37+
}
38+
}
39+
40+
public static void main(String[] args) {
41+
generate("efsclear.zip", false);
42+
// with the flag set, decoders tend to not look at the extra field
43+
generate("efsset.zip", true);
44+
System.out.println("done");
45+
}
46+
47+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Test ZIP Files for Unicode Path Extra Field
2+
3+
These files are used to test for proper use of Unicode Path extra fields ([zip specification §4.6.9](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT)) when unarchiving zip archives.
4+
5+
Both contain three empty files, one without a Unicode Path extra field, one with a good extra field (CRC matches the header file name), one with a stale extra field (mismatched CRC). By using different values in the header file name and the Unicode path, it can be distinguished which one was used. A compliant unarchiver will use the names marked in bold.
6+
7+
File name in header | CRC | Unicode Path
8+
--------------------|-----------------------|--------------------
9+
**nameonly-name** |
10+
goodextra-name | CRC("goodextra-name") | **goodextra-extra**
11+
**badextra-name** | CRC("bogus") | badextra-extra
12+
13+
The difference between the two archives is whether the Language Encoding Flag (EFS) is set, which indicates that the header file names are already in UTF-8. The specification is not explicit about which one wins when both the flag is set and a Unicode Path extra field is present (it only says archivers shouldn’t do that). In practice, all unarchivers I have seen (including Apache Commons Compress used by Plexus-Archiver) ignore the extra field when the flag is set, which is why only _efsclear.zip_ is useful for testing.
14+
15+
The archives were created by the included _GenerateZips.java_ using Commons Compress.
484 Bytes
Binary file not shown.
484 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)