Issue:
Here is a test program that parses 1000 times the same XML file,
resuing the same SAXParser (see new reset() method added in JAXP 1.3).
When the parser performs validation against a compiled XML schema instance,
the memory used never stops increasing, eventually causing an OutOfMemoryError.
Increasing the JVM max. heap size just delays the inevitable OutOfMemoryError!
Periodically forcing a garbage collection doesn't help either.
I also tried changing the XML or XSD file, but it doesn't seem to depend on those.
The ones I tested with come from the Xerces distribution and are pretty simple (personal.xml and personal.xsd).
Usage:
java ReusableParser personal.xml personal.xsd
Program output:
Schema validation enabled.
0) Memory (used/total): 442/1984 Kb - Max. 65088 Kb
100) Memory (used/total): 5508/8120 Kb - Max. 65088 Kb
200) Memory (used/total): 11081/16800 Kb - Max. 65088 Kb
300) Memory (used/total): 16391/19228 Kb - Max. 65088 Kb
400) Memory (used/total): 20988/28332 Kb - Max. 65088 Kb
500) Memory (used/total): 26940/37604 Kb - Max. 65088 Kb
600) Memory (used/total): 32453/46876 Kb - Max. 65088 Kb
700) Memory (used/total): 36752/56144 Kb - Max. 65088 Kb
800) Memory (used/total): 43187/65088 Kb - Max. 65088 Kb
900) Memory (used/total): 48417/65088 Kb - Max. 65088 Kb
1000) Memory (used/total): 53437/65088 Kb - Max. 65088 Kb
The problem doesn't occur if the SAXParserFactory "schema" property isn't set
(no schema validation is peformed by the parser);
the memory consumption remains constant.
Usage:
java ReusableParser personal.xml
Program output:
Schema validation disabled.
0) Memory (used/total): 316/1984 Kb - Max. 65088 Kb
100) Memory (used/total): 502/1984 Kb - Max. 65088 Kb
200) Memory (used/total): 512/1984 Kb - Max. 65088 Kb
300) Memory (used/total): 499/1984 Kb - Max. 65088 Kb
400) Memory (used/total): 501/1984 Kb - Max. 65088 Kb
500) Memory (used/total): 489/1984 Kb - Max. 65088 Kb
600) Memory (used/total): 521/1984 Kb - Max. 65088 Kb
700) Memory (used/total): 509/1984 Kb - Max. 65088 Kb
800) Memory (used/total): 501/1984 Kb - Max. 65088 Kb
900) Memory (used/total): 488/1984 Kb - Max. 65088 Kb
1000) Memory (used/total): 464/1984 Kb - Max. 65088 Kb
Am I doing something wrong or is this a bug?
The only workaround that I've found is to recreate the SAXParser each time.
Thanks in advance.
Environment:
- JDK: java version "1.5.0_04"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_04-b05)
Java HotSpot(TM) Client VM (build 1.5.0_04-b05, mixed mode, sharing)
- OS: Windows XP Professional Version 2002 SP 1
ReusableParser.java
import java.io.File;
import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ReusableParser {
private final SAXParserFactory parserFactory;
private final DefaultHandler saxHandler;
private SAXParser saxParser;
public ReusableParser(File schemaFile) throws Exception {
this.parserFactory = SAXParserFactory.newInstance();
// The parser is required to be namespace-aware for Schema validation.
this.parserFactory.setNamespaceAware(true);
if (schemaFile == null) {
System.out.println("Schema validation disabled.");
} else {
System.out.println("Schema validation enabled.");
Schema schema = loadSchema(schemaFile);
this.parserFactory.setSchema(schema);
}
this.saxParser = this.parserFactory.newSAXParser();
this.saxHandler = new DefaultHandler();
}
private Schema loadSchema(File schemaFile) throws SAXException {
// Get a SchemaFactory for the W3C XML Schema constraint language.
final SchemaFactory schemaFactory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
// Load and return a schema instance.
return schemaFactory.newSchema(schemaFile);
}
public void parse(File file, int times) throws Exception {
for (int i = 0; i < times; i++) {
this.saxParser.parse(file, saxHandler);
this.saxParser.reset();
if ((i % 100) == 0) {
// Monitor memory consumption.
printMemoryStatistics(i);
// Force garbage collection.
System.gc();
Thread.currentThread().yield();
}
}
printMemoryStatistics(times);
}
private void printMemoryStatistics(int itemNb) {
Runtime rt = Runtime.getRuntime();
long freeMem = rt.freeMemory();
long totalMem = rt.totalMemory();
long usedMem = totalMem - freeMem;
StringBuilder builder = new StringBuilder()
.append(itemNb).append(") ")
.append("Memory (used/total): ")
.append(usedMem / 1024).append("/")
.append(totalMem / 1024).append(" Kb - ")
.append("Max. ").append(rt.maxMemory() / 1024).append(" Kb");
System.out.println(builder.toString());
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Arguments: <XML file> [<XSD file>]");
return;
}
File xmlFile = new File(args[0]);
File schemaFile = null;
if (args.length > 1) {
schemaFile = new File(args[1]);
}
ReusableParser parser = new ReusableParser(schemaFile);
parser.parse(xmlFile, 1000);
}
}
personal.xml
<?xml version="1.0" encoding="UTF-8"?>
<personnel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation='personal.xsd'>
<person id="Big.Boss" >
<name><family>Boss</family> <given>Big</given></name>
<email>chief@foo.com</email>
<link subordinates="one.worker two.worker three.worker four.worker five.worker"/>
</person>
<person id="one.worker">
<name><family>Worker</family> <given>One</given></name>
<email>one@foo.com</email>
<link manager="Big.Boss"/>
</person>
<person id="two.worker">
<name><family>Worker</family> <given>Two</given></name>
<email>two@foo.com</email>
<link manager="Big.Boss"/>
</person>
<person id="three.worker">
<name><family>Worker</family> <given>Three</given></name>
<email>three@foo.com</email>
<link manager="Big.Boss"/>
</person>
<person id="four.worker">
<name><family>Worker</family> <given>Four</given></name>
<email>four@foo.com</email>
<link manager="Big.Boss"/>
</person>
<person id="five.worker">
<name><family>Worker</family> <given>Five</given></name>
<email>five@foo.com</email>
<link manager="Big.Boss"/>
</person>
</personnel>
personal.xsd
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>
<xs:element name="personnel">
<xs:complexType>
<xs:sequence>
<xs:element ref="person" minOccurs='1' maxOccurs='unbounded'/>
</xs:sequence>
</xs:complexType>
<xs:unique name="unique1">
<xs:selector xpath="person"/>
<xs:field xpath="name/given"/>
<xs:field xpath="name/family"/>
</xs:unique>
<xs:key name='empid'>
<xs:selector xpath="person"/>
<xs:field xpath="@id"/>
</xs:key>
<xs:keyref name="keyref1" refer='empid'>
<xs:selector xpath="person"/>
<xs:field xpath="link/@manager"/>
</xs:keyref>
</xs:element>
<xs:element name="person">
<xs:complexType>
<xs:sequence>
<xs:element ref="name"/>
<xs:element ref="email" minOccurs='0' maxOccurs='unbounded'/>
<xs:element ref="url" minOccurs='0' maxOccurs='unbounded'/>
<xs:element ref="link" minOccurs='0' maxOccurs='1'/>
</xs:sequence>
<xs:attribute name="id" type="xs:ID" use='required'/>
<xs:attribute name="note" type="xs:string"/>
<xs:attribute name="contr" default="false">
<xs:simpleType>
<xs:restriction base = "xs:string">
<xs:enumeration value="true"/>
<xs:enumeration value="false"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="salary" type="xs:integer"/>
</xs:complexType>
</xs:element>
<xs:element name="name">
<xs:complexType>
<xs:all>
<xs:element ref="family"/>
<xs:element ref="given"/>
</xs:all>
</xs:complexType>
</xs:element>
<xs:element name="family" type='xs:string'/>
<xs:element name="given" type='xs:string'/>
<xs:element name="email" type='xs:string'/>
<xs:element name="url">
<xs:complexType>
<xs:attribute name="href" type="xs:string" default="http://"/>
</xs:complexType>
</xs:element>
<xs:element name="link">
<xs:complexType>
<xs:attribute name="manager" type="xs:IDREF"/>
<xs:attribute name="subordinates" type="xs:IDREFS"/>
</xs:complexType>
</xs:element>
<xs:notation name='gif' public='-//APP/Photoshop/4.0' system='photoshop.exe'/>
</xs:schema>