Skip to Main Content

Java EE (Java Enterprise Edition) General Discussion

Announcement

For appeals, questions and feedback about Oracle Forums, please email oracle-forums-moderators_us@oracle.com. Technical questions should be asked in the appropriate category. Thank you!

JAXP 1.3 - SAXParser reuse + Schema validation - Memory leak

843834Jul 29 2005
Issue:

Here is a test program that parses 1000 times the same XML file,
resuing the same SAXParser (see new reset() method added in JAXP 1.3).

When the parser performs validation against a compiled XML schema instance,
the memory used never stops increasing, eventually causing an OutOfMemoryError.

Increasing the JVM max. heap size just delays the inevitable OutOfMemoryError!
Periodically forcing a garbage collection doesn't help either.
I also tried changing the XML or XSD file, but it doesn't seem to depend on those.
The ones I tested with come from the Xerces distribution and are pretty simple (personal.xml and personal.xsd).

Usage: java ReusableParser personal.xml personal.xsd

Program output:

Schema validation enabled.
0) Memory (used/total): 442/1984 Kb - Max. 65088 Kb
100) Memory (used/total): 5508/8120 Kb - Max. 65088 Kb
200) Memory (used/total): 11081/16800 Kb - Max. 65088 Kb
300) Memory (used/total): 16391/19228 Kb - Max. 65088 Kb
400) Memory (used/total): 20988/28332 Kb - Max. 65088 Kb
500) Memory (used/total): 26940/37604 Kb - Max. 65088 Kb
600) Memory (used/total): 32453/46876 Kb - Max. 65088 Kb
700) Memory (used/total): 36752/56144 Kb - Max. 65088 Kb
800) Memory (used/total): 43187/65088 Kb - Max. 65088 Kb
900) Memory (used/total): 48417/65088 Kb - Max. 65088 Kb
1000) Memory (used/total): 53437/65088 Kb - Max. 65088 Kb

The problem doesn't occur if the SAXParserFactory "schema" property isn't set
(no schema validation is peformed by the parser);
the memory consumption remains constant.

Usage: java ReusableParser personal.xml

Program output:

Schema validation disabled.
0) Memory (used/total): 316/1984 Kb - Max. 65088 Kb
100) Memory (used/total): 502/1984 Kb - Max. 65088 Kb
200) Memory (used/total): 512/1984 Kb - Max. 65088 Kb
300) Memory (used/total): 499/1984 Kb - Max. 65088 Kb
400) Memory (used/total): 501/1984 Kb - Max. 65088 Kb
500) Memory (used/total): 489/1984 Kb - Max. 65088 Kb
600) Memory (used/total): 521/1984 Kb - Max. 65088 Kb
700) Memory (used/total): 509/1984 Kb - Max. 65088 Kb
800) Memory (used/total): 501/1984 Kb - Max. 65088 Kb
900) Memory (used/total): 488/1984 Kb - Max. 65088 Kb
1000) Memory (used/total): 464/1984 Kb - Max. 65088 Kb

Am I doing something wrong or is this a bug?
The only workaround that I've found is to recreate the SAXParser each time.
Thanks in advance.

Environment:

- JDK: java version "1.5.0_04"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_04-b05)
Java HotSpot(TM) Client VM (build 1.5.0_04-b05, mixed mode, sharing)

- OS: Windows XP Professional Version 2002 SP 1

ReusableParser.java
import java.io.File;
import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class ReusableParser {

    private final SAXParserFactory parserFactory;
    private final DefaultHandler saxHandler;
    private SAXParser saxParser;

    public ReusableParser(File schemaFile) throws Exception {
	this.parserFactory = SAXParserFactory.newInstance();

	// The parser is required to be namespace-aware for Schema validation.
	this.parserFactory.setNamespaceAware(true);

	if (schemaFile == null) {
	    System.out.println("Schema validation disabled.");
	} else {
	    System.out.println("Schema validation enabled.");
	    Schema schema = loadSchema(schemaFile);
	    this.parserFactory.setSchema(schema);
	}	
	this.saxParser = this.parserFactory.newSAXParser();
	this.saxHandler = new DefaultHandler();
    }

    private Schema loadSchema(File schemaFile) throws SAXException {
	// Get a SchemaFactory for the W3C XML Schema constraint language.
	final SchemaFactory schemaFactory = 
		SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);

	// Load and return a schema instance.
	return schemaFactory.newSchema(schemaFile);
    }

    public void parse(File file, int times) throws Exception {
	for (int i = 0; i < times; i++) {
	    this.saxParser.parse(file, saxHandler);
	    this.saxParser.reset();

	    if ((i % 100) == 0) {
		// Monitor memory consumption.
		printMemoryStatistics(i);
		
		// Force garbage collection.
		System.gc();
		
		Thread.currentThread().yield();
	    }
	}
	printMemoryStatistics(times);
    }

    private void printMemoryStatistics(int itemNb) {
	Runtime rt = Runtime.getRuntime();
	long freeMem = rt.freeMemory();
	long totalMem = rt.totalMemory();
	long usedMem = totalMem - freeMem;

	StringBuilder builder = new StringBuilder()
	    .append(itemNb).append(") ")
	    .append("Memory (used/total): ")
	    .append(usedMem / 1024).append("/")
	    .append(totalMem / 1024).append(" Kb - ")
	    .append("Max. ").append(rt.maxMemory() / 1024).append(" Kb");
	System.out.println(builder.toString());
    }

    public static void main(String[] args) throws Exception {
	if (args.length < 1) {
	    System.err.println("Arguments: <XML file> [<XSD file>]");
	    return;
	}
	File xmlFile = new File(args[0]);
	File schemaFile = null;
	
	if (args.length > 1) {
	    schemaFile = new File(args[1]);
	}
	ReusableParser parser = new ReusableParser(schemaFile);
	parser.parse(xmlFile, 1000);
    }

}
personal.xml
<?xml version="1.0" encoding="UTF-8"?>
<personnel xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	   xsi:noNamespaceSchemaLocation='personal.xsd'>

  <person id="Big.Boss" >
    <name><family>Boss</family> <given>Big</given></name>
    <email>chief@foo.com</email>
    <link subordinates="one.worker two.worker three.worker four.worker five.worker"/>
  </person>

  <person id="one.worker">
    <name><family>Worker</family> <given>One</given></name>
    <email>one@foo.com</email>
    <link manager="Big.Boss"/>
  </person>

  <person id="two.worker">
    <name><family>Worker</family> <given>Two</given></name>
    <email>two@foo.com</email>
    <link manager="Big.Boss"/>
  </person>

  <person id="three.worker">
    <name><family>Worker</family> <given>Three</given></name>
    <email>three@foo.com</email>
    <link manager="Big.Boss"/>
  </person>

  <person id="four.worker">
    <name><family>Worker</family> <given>Four</given></name>
    <email>four@foo.com</email>
    <link manager="Big.Boss"/>
  </person>

  <person id="five.worker">
    <name><family>Worker</family> <given>Five</given></name>
    <email>five@foo.com</email>
    <link manager="Big.Boss"/>
  </person>

</personnel>
personal.xsd
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>

 <xs:element name="personnel">
  <xs:complexType>
   <xs:sequence>
     <xs:element ref="person" minOccurs='1' maxOccurs='unbounded'/>
   </xs:sequence>
  </xs:complexType>

  <xs:unique name="unique1">
   <xs:selector xpath="person"/>
   <xs:field xpath="name/given"/>
   <xs:field xpath="name/family"/>
  </xs:unique>
  <xs:key name='empid'>
   <xs:selector xpath="person"/>
   <xs:field xpath="@id"/>
  </xs:key>
  <xs:keyref name="keyref1" refer='empid'>
   <xs:selector xpath="person"/> 
   <xs:field xpath="link/@manager"/>  
  </xs:keyref>

 </xs:element>

 <xs:element name="person">
  <xs:complexType>
   <xs:sequence>
     <xs:element ref="name"/>
     <xs:element ref="email" minOccurs='0' maxOccurs='unbounded'/>
     <xs:element ref="url"   minOccurs='0' maxOccurs='unbounded'/>
     <xs:element ref="link"  minOccurs='0' maxOccurs='1'/>
   </xs:sequence>
   <xs:attribute name="id"  type="xs:ID" use='required'/>
   <xs:attribute name="note" type="xs:string"/>
   <xs:attribute name="contr" default="false">
    <xs:simpleType>
     <xs:restriction base = "xs:string">
       <xs:enumeration value="true"/>
       <xs:enumeration value="false"/>
     </xs:restriction>
    </xs:simpleType>
   </xs:attribute>
   <xs:attribute name="salary" type="xs:integer"/>
  </xs:complexType>
 </xs:element>

 <xs:element name="name">
  <xs:complexType>
   <xs:all>
    <xs:element ref="family"/>
    <xs:element ref="given"/>
   </xs:all>
  </xs:complexType>
 </xs:element>

 <xs:element name="family" type='xs:string'/>

 <xs:element name="given" type='xs:string'/>

 <xs:element name="email" type='xs:string'/>

 <xs:element name="url">
  <xs:complexType>
   <xs:attribute name="href" type="xs:string" default="http://"/>
  </xs:complexType>
 </xs:element>

 <xs:element name="link">
  <xs:complexType>
   <xs:attribute name="manager" type="xs:IDREF"/>
   <xs:attribute name="subordinates" type="xs:IDREFS"/>
  </xs:complexType>
 </xs:element>

 <xs:notation name='gif' public='-//APP/Photoshop/4.0' system='photoshop.exe'/>

</xs:schema>
Comments
Locked Post
New comments cannot be posted to this locked post.
Post Details
Locked on Aug 26 2005
Added on Jul 29 2005
0 comments
118 views