I am zipping a large number (about 300,000) of small text files using Java. I am running out of heap space during the operation and I think that this is because the zipping process uses a hashtable which becomes too large. It fails at about 148,000 entries. Here is the code and the error message:
package zippingDriver;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.zip.*;
public class ZipTools {
public static final int BIG_STEP = 1000;
/**
* Zips a set of files to the specified directory
*
* @param zipPath The path to the zip file that will be created
* @param filesToZip An array of file objects that will be added to the zip file
* @param directoryName The directory in the zip file to add the files to
* @return
*/
public static boolean zipFiles(String zipPath, File[] filesToZip, String directoryName) {
System.out.println("zipFiles(" + zipPath + ", " + filesToZip + ", " + directoryName + ")");
// Create the file
File zipFile = new File(zipPath);
try {
zipFile.createNewFile();
} catch (IOException e) {
System.out.println("File already exists. Will not override the file.");
e.printStackTrace();
}
ZipOutputStream zipOut = null;
try {
zipOut = new ZipOutputStream(new FileOutputStream(zipFile));
// Set the compression ratio
zipOut.setLevel(Deflater.DEFAULT_COMPRESSION);
performZip(zipOut, zipPath, filesToZip, directoryName);
} catch (FileNotFoundException e) {
System.out.println("A File was not found (either the zip file or the files to " +
"place in the zip file).");
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
}
// Close the ZipOutPutStream
try {
zipOut.close();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
private static void performZip(ZipOutputStream zipOut, String zipPath,
File[] filesToZip, String directoryName) throws IOException {
// Create buffer for writing the zip file
byte[] buffer = new byte[1024];
FileInputStream in = null;
ZipEntry currentZipEntry;
int length = filesToZip.length;
int i = 0;
for(i = 0; i < length; i += BIG_STEP) {
int limit = i + BIG_STEP;
if(limit > length)
limit = length;
System.out.println(i + "\t" + filesToZip.getName() + " : " + filesToZip[i].getParent());
for(int j = i; j < limit; j++) {
File curr = filesToZip[j];
//System.out.println(curr.getName() + " : " + curr.getParent());
// Zip the files inside a subdirectory if necessary
if(curr.isDirectory()) {
performZip(zipOut, zipPath, curr.listFiles(), directoryName + File.separator + curr.getName());
continue;
}
// Associate a file input stream for the current file
in = new FileInputStream(curr);
// Add ZIP entry to output stream.
if(directoryName != null)
currentZipEntry = new ZipEntry(directoryName + File.separator + curr.getName());
else
currentZipEntry = new ZipEntry(curr.getName());
zipOut.putNextEntry(currentZipEntry);
// Transfer bytes from the current file to the ZIP file
//out.write(buffer, 0, in.read(buffer));
int len;
while ((len = in.read(buffer)) > 0)
{
zipOut.write(buffer, 0, len);
}
// Close the current entry
zipOut.closeEntry();
// Close the current file input stream
in.close();
zipOut.flush();
}
System.gc();
System.runFinalization();
}
}
}
Error Message:
+Exception in thread "main" java.lang.OutOfMemoryError: Java heap space+
+at java.util.Hashtable.rehash(Unknown Source)+
+at java.util.Hashtable.put(Unknown Source)+
+at java.util.zip.ZipOutputStream.putNextEntry(Unknown Source)+
+at zippingDriver.ZipTools.performZip(ZipTools.java:107)+
+at zippingDriver.ZipTools.performZip(ZipTools.java:93)+
+at zippingDriver.ZipTools.zipFiles(ZipTools.java:42)+
+at zippingDriver.ZipDriver.main(ZipDriver.java:17)+
Thanks!