Skip to Main Content

Java EE (Java Enterprise Edition) General Discussion

Announcement

For appeals, questions and feedback about Oracle Forums, please email oracle-forums-moderators_us@oracle.com. Technical questions should be asked in the appropriate category. Thank you!

JAXB behaviour when unmarshalling documents with an encoding declaration

843834Apr 19 2005
The following is against JWSDP-1.5, 1.4.2_07-b05 on XP Pro SP 2...

When I unmarshall a document using JAXB 1.0, and obtain Strings from the resulting objects, I would expect the Strings to be encoded with the character encoding specified by the text declaration. Is this a correct assumption?

Unfortunately, from the sample code (given below) I am not experiencing this behaviour. I've included the code, sample output, and the RELAX NG schema used.

Any help very welcome.

ian
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.List;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;

import org.xml.sax.InputSource;

import com.chellomedia.transcoding.Category;
import com.chellomedia.transcoding.ObjectFactory;
import com.chellomedia.transcoding.Schedule;

public class TranscoderExemplar {
	private static final String CATEGORY_NAME = "\u00bfMa\u00f1ana?";
	
	public static void main(String[] args) throws Exception {
		try {
			ByteArrayOutputStream os = generateSampleDocument("US-ASCII");
			inspectDocument("US-ASCII", os);
			os = generateSampleDocument("ISO-8859-1");
			inspectDocument("ISO-8859-1", os);
			os = generateSampleDocument("UTF-8");
			inspectDocument("UTF-8", os);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (JAXBException e) {
			e.printStackTrace();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	private static void inspectDocument(String encoding, ByteArrayOutputStream os) throws Exception {
		byte[] b = os.toByteArray();
		System.out.println(encoding + " document byte length: " + b.length);
		
		InputSource is = new InputSource(new ByteArrayInputStream(b));
		is.setEncoding(encoding);
		JAXBContext context = JAXBContext.newInstance("com.chellomedia.transcoding");
		Schedule schedule = (Schedule)context.createUnmarshaller().unmarshal(is);
		context.createValidator().validate(schedule);
		
		List categories = schedule.getCategories();
		for (int i = 0, n = categories.size(); i < n; i++) {
			Category c = (Category)categories.get(i);
			String name = c.getName();
			
			System.out.println(encoding + " name string length: " + name.length());
			System.out.println(encoding + " name byte length: " + name.getBytes().length);
		}
	}
	
	private static ByteArrayOutputStream generateSampleDocument(String encoding) throws JAXBException,
		FileNotFoundException 
	{
        ObjectFactory of = new ObjectFactory();
		Schedule schedule = of.createSchedule();
		List categories = schedule.getCategories();
		Category category = of.createCategory();
		category.setId(1);
		category.setName(CATEGORY_NAME);
		categories.add(category);

		Marshaller m = of.createMarshaller();
		m.setProperty("jaxb.encoding", encoding);
        m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

		ByteArrayOutputStream os = new ByteArrayOutputStream();
		m.marshal(schedule, os);
		FileOutputStream fos = new FileOutputStream(encoding + "-representation.xml");
		m.marshal(schedule, fos);
		
		return os;
	}
}
...and the schema
<grammar xmlns="http://relaxng.org/ns/structure/1.0"
         datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes"
         xmlns:jaxb="http://java.sun.com/xml/ns/jaxb"
         xmlns:xjc="http://java.sun.com/xml/ns/jaxb/xjc"
         jaxb:extensionBindingPrefixes="xjc"
         jaxb:version="1.0">
  
  <jaxb:schemaBindings>
    <jaxb:package name="com.chellomedia.transcoding"/>
  </jaxb:schemaBindings>
  
  <start>
    <ref name="Schedule"/>
  </start>

  <define name="Schedule">
    <element name="schedule">
      <ref name="Categories"/>
      <zeroOrMore>
        <ref name="Event"/>
      </zeroOrMore>
    </element>
  </define>

  <define name="Categories">
    <element name="categories">
      <zeroOrMore>
        <ref name="Category"/>
      </zeroOrMore>
    </element>
  </define>

  <define name="Category">
    <element name="category">
      <attribute name="id"><data type="int"/></attribute>
      <attribute name="name"/>
      <zeroOrMore>
        <ref name="Subcategory"/>
      </zeroOrMore>
    </element>
  </define>

  <define name="Subcategory">
    <element name="subcategory">
      <attribute name="id"/>
      <attribute name="name"/>
    </element>
  </define>

  <define name="Event">
    <element name="event">
      <attribute name="id"/>
      <attribute name="title"/>
      <attribute name="description"/>
      <attribute name="category"/>
      <attribute name="subcategory"/>
    </element>
  </define>
</grammar>
...with sample output
US-ASCII document byte length: 171
US-ASCII name string length: 8
US-ASCII name byte length: 8
ISO-8859-1 document byte length: 163
ISO-8859-1 name string length: 8
ISO-8859-1 name byte length: 8
UTF-8 document byte length: 160
UTF-8 name string length: 8
UTF-8 name byte length: 8
Comments
Locked Post
New comments cannot be posted to this locked post.
Post Details
Locked on May 17 2005
Added on Apr 19 2005
0 comments
630 views