parsing xml getting; Exception in thread "main" java.lang.OutOfMemoryEr
762333Mar 29 2010 — edited Mar 29 2010Hi,
I want to parse Large xml which might of 2GB. But presently I hold 330MB xml which I am trying to validate and parse with Oracle sax parser i.e xmlparserv2.jar. for 330MB file i tried to set java heap memory more than 1256MB but still getting outof memory exception.
Below is the command which I was trying to execute from command prompt
C:\newcvs\ddex_POC\jar> java -Xms1024m -Xmx1256m -classpath D:\lib\xmlparserv2.jar;C:\newcvs\ddex_POC\jar\javaParsers.jar;D:\lib\xml.jar;D:\jar\xschema.jar sample.SchemaValidator
When I use Sun parser and stax it is not throwing heap memory exception. I am getting when i do only with oracle sax parser.
Is Oracle sax parser suits for large files?
Below is full programme attaching: Let me know if I am doing wrong. I want to validate and parse the xml.
package sample;
import net.ddex.xml._2009.dsr_main._311.DealForRecordCompanies;
import net.ddex.xml._2009.dsr_main._311.DetailedDeal;
import net.ddex.xml._2009.dsr_main._311.ReleaseTransactionsToRecordCompany;
import net.ddex.xml._2009.dsr_main._311.SalesDataToRecordCompany;
import net.ddex.xml._2009.dsr_main._311.SalesReportToRecordCompany;
import net.ddex.xml._2009.dsr_main._311.SalesReportToRecordCompanyMessage;
import net.ddex.xml._2009.dsr_main._311.SalesToRecordCompanyByCommercialModel;
import net.ddex.xml._2009.dsr_main._311.SalesToRecordCompanyByTerritory;
import net.ddex.xml._2009.dsr_main._311.SalesTransactionToRecordCompany;
import net.ddex.xml._2009.dsr_main._311.TotalSalesByReleaseType;
import net.ddex.xml._20091221.ddexc.AmountByUseAndDistributionChannelType;
import net.ddex.xml._20091221.ddexc.CommercialModelType;
import net.ddex.xml._20091221.ddexc.DSP;
import net.ddex.xml._20091221.ddexc.DistributionChannelType;
import net.ddex.xml._20091221.ddexc.ICPN;
import net.ddex.xml._20091221.ddexc.MessageHeader;
import net.ddex.xml._20091221.ddexc.MessageNotificationPeriod;
import net.ddex.xml._20091221.ddexc.MessagingParty;
import net.ddex.xml._20091221.ddexc.Name;
import net.ddex.xml._20091221.ddexc.PartyId;
import net.ddex.xml._20091221.ddexc.PartyName;
import net.ddex.xml._20091221.ddexc.ReleaseId;
import net.ddex.xml._20091221.ddexc.ReleaseType;
import net.ddex.xml._20091221.ddexc.UseType;
import net.ddex.xml._20091221.iso4217a.CurrencyCode;
import oracle.xml.schemavalidator.XSDValidator;
import oracle.xml.parser.schema.XSDException;
import oracle.xml.parser.schema.XMLSchema;
import oracle.xml.parser.schema.XSDBuilder;
import oracle.xml.parser.v2.SAXParser;
import oracle.xml.parser.v2.XMLError;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.List;
public class SchemaValidator{
public void validateSchema(String XmlDocumentUrl, String schemaUrl)
{
try {
long startingHeapSize = Runtime.getRuntime().totalMemory();
long startTime = System.currentTimeMillis();
XSDValidator xsdValidator=new XSDValidator();
XSDBuilder builder = new XSDBuilder();
File file=new File(schemaUrl);
File xmlFile=new File(XmlDocumentUrl);
XMLSchema schemadoc = (XMLSchema)builder.build(file.toURL());
xsdValidator.setSchema(schemadoc);
Validator handler=new Validator();
XMLError xmlError=new XMLError();
xmlError.setErrorHandler(handler);
xsdValidator.setError(xmlError);
xsdValidator.validate(xmlFile.toURL());
if(handler.validationError==true){
System.err.println("XML Document has Error:" handler.validationError""+handler.saxParseException.getMessage());
} else {
System.out.println("XML Document is valid");
SAXParser saxParser = new SAXParser();
XMLHandler xMLHandler = new XMLHandler();
saxParser.setContentHandler(xMLHandler);
InputStream inputStream = new FileInputStream(new File(XmlDocumentUrl));
InputSource input =new InputSource(inputStream);
saxParser.parse(input);
System.err.println("Total execution time in Milliseconds:"+ (System.currentTimeMillis()- startTime));
System.out.println("Starting Heap size: " + startingHeapSize);
System.out.println("Ending Heap size: " + Runtime.getRuntime().totalMemory());
System.out.println("Total Heap Size Used = " + (Runtime.getRuntime().totalMemory() - startingHeapSize) );
}
} catch(java.io.IOException ioe)
{
System.out.println("IOException "+ioe.getMessage());
}catch (SAXException e) {
System.out.println("SAXException "+e.getMessage());
}
catch (XSDException e) {
System.out.println("SAXException "+e.getMessage());
}
}
private class Validator extends DefaultHandler
{
public boolean validationError = false;
public SAXParseException saxParseException=null;
public void error(SAXParseException exception) throws SAXException
{
validationError = true;
saxParseException=exception;
}
public void fatalError(SAXParseException exception) throws SAXException
{
validationError = true;
saxParseException=exception;
}
public void warning(SAXParseException exception) throws SAXException
{
}
}
public static void main(String[] argv){
SchemaValidator schemaValidator = new SchemaValidator();
schemaValidator.validateAndParse();
}
public void validateAndParse() {
String SchemaUrl="xsd/dsr-main.xsd";
String XmlDocumentUrl="d:/ddex/DDEx/DDEX DSR/Hand-0261 - Sample 7.4 (Main Profile).xml";
//String XmlDocumentUrl="ddexSamplexml.xml";
SchemaValidator validator=new SchemaValidator();
validator.validateSchema(XmlDocumentUrl, SchemaUrl);
}
}
class XMLHandler extends DefaultHandler {
Locator locator;
private String tempVal= null;
private MessageHeader header;
private MessagingParty messagingParty;
private PartyId partyId;
private MessageNotificationPeriod messageNotificationPeriod;
private SalesReportToRecordCompanyMessage salesReportToRecordCompanyMessage;
private SalesReportToRecordCompany salesReportToRecordCompany;
private DSP dsp;
private PartyName partyName;
private SalesToRecordCompanyByCommercialModel salesByCommercialModel;
private List<SalesReportToRecordCompany> listOfSalesReport;
private List<SalesToRecordCompanyByCommercialModel> listOfSalesByCommercialOrder;
private SalesToRecordCompanyByCommercialModel salesToRecordCompanyByCommercialModel;
private CommercialModelType commercialModelType;
private List<CommercialModelType> listOfCommercialModelType;
private List<SalesToRecordCompanyByTerritory> listOfSalesByTerritory;
private SalesToRecordCompanyByTerritory salesToRecordCompanyByTerritory;
private List<String> territoryCodes;
private List<ReleaseTransactionsToRecordCompany> listOfReleaseTransaction;
private ReleaseTransactionsToRecordCompany releaseTransactionsToRecordCompany ;
private List<ReleaseId> listOfReleaseIds;
private ReleaseId releaseId;
private ICPN icpn;
private List<SalesTransactionToRecordCompany> listOfSalesTransaction;
private SalesTransactionToRecordCompany salesTransactionToRecordCompany;
private UseType userType;
private List<UseType> listOfuseType;
private List<DistributionChannelType> listOfDistributionChannelType;
private DistributionChannelType distributionChannelType;
private DealForRecordCompanies dealForRecordCompanies;
private DetailedDeal detailedDeal;
private List<SalesDataToRecordCompany> listOfSalesDataToRecordCompany;
private SalesDataToRecordCompany salesDataToRecordCompany;
private List<TotalSalesByReleaseType> listOfTotalSalesByReleaseType;
private TotalSalesByReleaseType totalSalesByReleaseType;
private List<ReleaseType> listOfReleaseType;
private ReleaseType releaseType;
private AmountByUseAndDistributionChannelType amountByUseAndDistributionChannelType;
private List<UseType> listOfUseType;
private UseType useType;
public void XMLDefaultHandler() {
}
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if(qName.equalsIgnoreCase("dsr:SalesReportToRecordCompanyMessage")) {
salesReportToRecordCompanyMessage = new SalesReportToRecordCompanyMessage();
}else if(qName.equalsIgnoreCase("MessageHeader")) {
header = new MessageHeader();
}else if(qName.equalsIgnoreCase("MessageSender")) {
messagingParty = new MessagingParty();
}else if(qName.equalsIgnoreCase("MessageRecipient")) {
messagingParty = new MessagingParty();
}else if(qName.equalsIgnoreCase("MessageNotificationPeriod")) {
messageNotificationPeriod = new MessageNotificationPeriod();
}else if(qName.equalsIgnoreCase("SalesReport")) {
listOfSalesReport = salesReportToRecordCompanyMessage.getSalesReport();
salesReportToRecordCompany = new SalesReportToRecordCompany();
}else if(qName.equalsIgnoreCase("DSP")) {
dsp = new DSP();
salesToRecordCompanyByTerritory = new SalesToRecordCompanyByTerritory();
}else if(qName.equalsIgnoreCase("PartyName")) {
partyName = new PartyName();
}else if(qName.equalsIgnoreCase("SalesByCommercialModel")) {
listOfSalesByCommercialOrder = salesReportToRecordCompany.getSalesByCommercialModel();
salesToRecordCompanyByCommercialModel = new SalesToRecordCompanyByCommercialModel();
}else if(qName.equalsIgnoreCase("CommercialModelType")) {
listOfCommercialModelType = salesToRecordCompanyByCommercialModel.getCommercialModelType();
commercialModelType = new CommercialModelType();
}else if(qName.equalsIgnoreCase("SalesByTerritory")) {
listOfSalesByTerritory = salesToRecordCompanyByCommercialModel.getSalesByTerritory();
}else if(qName.equalsIgnoreCase("TerritoryCode")) {
territoryCodes = salesToRecordCompanyByTerritory.getTerritoryCode();
}else if(qName.equalsIgnoreCase("ReleaseTransactions")) {
listOfReleaseTransaction = salesToRecordCompanyByTerritory.getReleaseTransactions();
releaseTransactionsToRecordCompany = new ReleaseTransactionsToRecordCompany();
}else if(qName.equalsIgnoreCase("DspGrossRevenue")) {
amountByUseAndDistributionChannelType = new AmountByUseAndDistributionChannelType();
} else if(qName.equalsIgnoreCase("ReleaseId")) {
listOfReleaseIds = releaseTransactionsToRecordCompany.getReleaseId();
releaseId = new ReleaseId();
}else if(qName.equalsIgnoreCase("ICPN")) {
icpn = new ICPN();
}else if(qName.equalsIgnoreCase("SalesTransaction")) {
listOfSalesTransaction = releaseTransactionsToRecordCompany.getSalesTransaction();
salesTransactionToRecordCompany = new SalesTransactionToRecordCompany();
}else if(qName.equalsIgnoreCase("UseType")) {
listOfuseType = salesTransactionToRecordCompany.getUseType();
userType = new UseType();
}else if(qName.equalsIgnoreCase("DistributionChannelType")) {
listOfDistributionChannelType = salesTransactionToRecordCompany.getDistributionChannelType();
distributionChannelType = new DistributionChannelType();
} else if(qName.equalsIgnoreCase("Deal")) {
dealForRecordCompanies = new DealForRecordCompanies();
} else if(qName.equalsIgnoreCase("DetailedDeal")) {
detailedDeal = new DetailedDeal();
} else if(qName.equalsIgnoreCase("SalesData")) {
listOfSalesDataToRecordCompany = salesTransactionToRecordCompany.getSalesData();
salesDataToRecordCompany = new SalesDataToRecordCompany();
} else if(qName.equalsIgnoreCase("TotalSalesByReleaseType")) {
listOfTotalSalesByReleaseType = salesReportToRecordCompanyMessage.getTotalSalesByReleaseType();
totalSalesByReleaseType = new TotalSalesByReleaseType();
} else if(qName.equalsIgnoreCase("ReleaseType")) {
listOfReleaseType = totalSalesByReleaseType.getReleaseType();
releaseType = new ReleaseType();
} else if(qName.equalsIgnoreCase("UseType")) {
listOfUseType = amountByUseAndDistributionChannelType.getUseType();
useType = new UseType();
}
for (int i=0; i<atts.getLength(); i++)
{
qName = atts.getQName(i);
localName = atts.getLocalName(i);
uri = atts.getURI(i);
// System.out.println(" ATTRIBUTE Qualified Name :" + qName);
// System.out.println(" ATTRIBUTE Local Name :" + localName);
// System.out.println(" ATTRIBUTE Namespace :" + uri);
// You can get the type and value of the attributes either
// by index or by the Qualified Name.
String type = atts.getType(qName);
String value = atts.getValue(qName);
// System.out.println(" ATTRIBUTE Type :" + type);
// System.out.println(" ATTRIBUTE Value :" + value);//
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException
{
if(qName.equalsIgnoreCase("MessageThreadId")) {
header.setMessageThreadId(tempVal);
}else if(qName.equalsIgnoreCase("MessageHeader")) {
salesReportToRecordCompanyMessage.setMessageHeader(header);
}else if(qName.equalsIgnoreCase("MessageId")) {
header.setMessageId(tempVal);
}else if(qName.equalsIgnoreCase("partyId")) {
partyId = new PartyId();
partyId.setValue(tempVal);
messagingParty.setPartyId(partyId);
}else if(qName.equalsIgnoreCase("MessageSender")) {
header.setMessageSender(messagingParty);
}else if(qName.equalsIgnoreCase("MessageRecipient")) {
header.setMessageRecipient(messagingParty);
}else if(qName.equalsIgnoreCase("MessageCreatedDateTime")) {
header.setMessageCreatedDateTime(new XMLGregorianCalendarImpl(tempVal));
}else if (qName.equalsIgnoreCase("StartDate")) {
messageNotificationPeriod.setStartDate(new XMLGregorianCalendarImpl(tempVal));
}else if (qName.equalsIgnoreCase("EndDate")) {
messageNotificationPeriod.setEndDate(new XMLGregorianCalendarImpl(tempVal));
}else if (qName.equalsIgnoreCase("SalesReport")) {
listOfSalesReport.add(salesReportToRecordCompany);
} else if(qName.equalsIgnoreCase("FullName")) {
Name name = new Name();
name.setValue(tempVal);
partyName.setFullName(name);
}else if(qName.equalsIgnoreCase("TerritoryCode")) {
dsp.setTerritoryCode(tempVal);
territoryCodes.add(tempVal);
}else if(qName.equalsIgnoreCase("DSP")) {
dsp.setPartyId(partyId);
dsp.setPartyName(partyName);
salesReportToRecordCompany.setDSP(dsp);
}else if(qName.equalsIgnoreCase("CommercialModelType")) {
commercialModelType.setValue(net.ddex.xml._20091221.ddex.CommercialModelType.fromValue(tempVal));
listOfCommercialModelType.add(commercialModelType);
}else if(qName.equalsIgnoreCase("CurrencyOfAccounting")) {
salesToRecordCompanyByCommercialModel.setCurrencyOfAccounting(CurrencyCode.fromValue(tempVal));
}else if(qName.equalsIgnoreCase("SalesByTerritory")) {
listOfSalesByTerritory.add(salesToRecordCompanyByTerritory);
}else if(qName.equalsIgnoreCase("SalesByCommercialModel")) {
listOfSalesByCommercialOrder.add(salesToRecordCompanyByCommercialModel);
}else if(qName.equalsIgnoreCase("ReleaseTransactions")) {
listOfReleaseTransaction.add(releaseTransactionsToRecordCompany);
}else if(qName.equalsIgnoreCase("ReleaseId")) {
icpn.setIsEan(true);
icpn.setValue(tempVal);
releaseId.setICPN(icpn);
listOfReleaseIds .add(releaseId);
}else if(qName.equalsIgnoreCase("SalesTransaction")) {
listOfSalesTransaction.add(salesTransactionToRecordCompany);
}else if(qName.equalsIgnoreCase("UseType")) {
userType.setValue(net.ddex.xml._20091221.ddex.UseType.fromValue(tempVal));
listOfuseType.add(userType);
} else if(qName.equalsIgnoreCase("DistributionChannelType")) {
distributionChannelType.setValue(net.ddex.xml._20091221.ddex.DistributionChannelType.fromValue(tempVal));
listOfDistributionChannelType.add(distributionChannelType);
} else if(qName.equalsIgnoreCase("Deal")) {
salesTransactionToRecordCompany.setDeal(dealForRecordCompanies);
} else if(qName.equalsIgnoreCase("DetailedDeal")) {
dealForRecordCompanies.setDetailedDeal(detailedDeal);
}else if(qName.equalsIgnoreCase("AgreedUnitPriceExcSalesTax")) {
detailedDeal.setAgreedUnitPriceExcSalesTax(new BigDecimal(tempVal));
}else if(qName.equalsIgnoreCase("AmountPayable")) {
detailedDeal.setAmountPayable(new BigDecimal(tempVal));
} else if(qName.equalsIgnoreCase("SalesData")) {
listOfSalesDataToRecordCompany.add(salesDataToRecordCompany);
} else if(qName.equalsIgnoreCase("NumberOfConsumerSalesGross")) {
salesDataToRecordCompany.setNumberOfConsumerSalesGross(new BigInteger(tempVal));
} else if(qName.equalsIgnoreCase("NumberOfFreeUnitsToConsumers")) {
salesDataToRecordCompany.setNumberOfFreeUnitsToConsumers(new BigInteger(tempVal));
} else if(qName.equalsIgnoreCase("DataToBeForwarded")) {
salesTransactionToRecordCompany.setDataToBeForwarded(new Boolean(tempVal));
} else if(qName.equalsIgnoreCase("ReleaseType")) {
releaseType.setValue(net.ddex.xml._20091221.ddex.ReleaseType.fromValue(tempVal));
listOfReleaseType.add(releaseType);
} else if(qName.equalsIgnoreCase("UnitsSoldTotal")) {
totalSalesByReleaseType.setUnitsSoldTotal(new BigInteger(tempVal));
}else if(qName.equalsIgnoreCase("TotalSalesByReleaseType")) {
listOfTotalSalesByReleaseType.add(totalSalesByReleaseType);
} else if(qName.equalsIgnoreCase("dsr:SalesReportToRecordCompanyMessage")) {
} else if(qName.equalsIgnoreCase("MessageNotificationPeriod")) {
salesReportToRecordCompanyMessage.setMessageNotificationPeriod(messageNotificationPeriod);
} else if(qName.equalsIgnoreCase("Amount")) {
amountByUseAndDistributionChannelType.setAmount(new BigDecimal(tempVal));
}else if(qName.equalsIgnoreCase("DspGrossRevenue")) {
salesToRecordCompanyByTerritory.setDspGrossRevenue(amountByUseAndDistributionChannelType);
}
}
public void characters(char[] cbuf, int start, int len)
{
tempVal = new String(cbuf,start,len);
}
public void startDocument()
{
// System.out.println("StartDocument");
}
public void endDocument() throws SAXException
{
// System.out.println("EndDocument");
}
public void processingInstruction(String target, String data) throws SAXException{
System.out.println("ProcessingInstruction:"+target+" "+data);
}
public void ignorableWhitespace(char[] cbuf, int start, int len)
{
System.out.println("IgnorableWhiteSpace");
}
//////////////////////////////////////////////////////////////////////
// (6) Sample implementation of the EntityResolver interface.
//////////////////////////////////////////////////////////////////////
public InputSource resolveEntity (String publicId, String systemId)
throws SAXException
{
System.out.println("ResolveEntity:"+publicId+" "+systemId);
System.out.println("Locator:"+locator.getPublicId()+" "+
locator.getSystemId()+
" "+locator.getLineNumber()+" "+locator.getColumnNumber());
return null;
}
//////////////////////////////////////////////////////////////////////
// (7) Sample implementation of the DTDHandler interface.
//////////////////////////////////////////////////////////////////////
public void notationDecl (String name, String publicId, String systemId)
{
System.out.println("NotationDecl:"+name+" "+publicId+" "+systemId);
}
public void unparsedEntityDecl (String name, String publicId,
String systemId, String notationName)
{
System.out.println("UnparsedEntityDecl:"+name + " "+publicId+" "+
systemId+" "+notationName);
}
}
Edited by: user12842292 on Mar 29, 2010 6:47 AM
Edited by: user12842292 on Mar 29, 2010 6:48 AM