Sunday, 10 November 2013

Read UTF-8 XML file using SAX Parser


In this tutorial we are going to see how to read XML file with UTF-8 character using SAX parser.
1. Create Project Called JavaXML.
2. Create package called com.javatutorialscorner.xml.sax under JavaXML.
3. Create Java class called ReadUTF8XMLFile.java under com.javatutorialscorner.xml.sax package.
ReadUTF8XMLFile.java
package com.javatutorialscorner.xml.sax;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class ReadUTF8XMLFile {

/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
SAXParser parser = saxParserFactory.newSAXParser();
DefaultHandler defaultHandler = new DefaultHandler() {
boolean firstName = false;
boolean lastName = false;
boolean petName = false;

public void startElement(String uri, String localName,
String qName, Attributes attribute) throws SAXException {
System.out.println("Start Element : " + qName);
if (qName.equalsIgnoreCase("FIRSTNAME")) {
firstName = true;
}
if (qName.equalsIgnoreCase("LASTNAME")) {
lastName = true;
}
if (qName.equalsIgnoreCase("PETNAME")) {
petName = true;
}
}

public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element : " + qName);
}

public void characters(char ch[], int start, int length)
throws SAXException {
if (firstName) {
System.out.println("First Name : "
+ new String(ch, start, length));
firstName = false;
}
if (lastName) {
System.out.println("Last Name : "
+ new String(ch, start, length));
lastName = false;
}
if (petName) {
System.out.println("Pet Name : "
+ new String(ch, start, length));
petName = false;

}
}
};
File xmlFile = new File("C:\\jtc\\student.xml");
InputStream inputStream = new FileInputStream(xmlFile);
Reader reader = new InputStreamReader(inputStream, "UTF-8");
InputSource inputSource = new InputSource(reader);
parser.parse(inputSource, defaultHandler);
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}

If you parse XML file that contains UTF character that throws error.So avoid this error convert xml to UTF-8 InputStream by using following lines of code.

File xmlFile = new File("C:\\jtc\\student.xml");
InputStream inputStream = new FileInputStream(xmlFile);
Reader reader = new InputStreamReader(inputStream, "UTF-8");
InputSource inputSource = new InputSource(reader);
parser.parse(inputSource, defaultHandler);

student.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<college>
<student>
<firstname>Shiva</firstname>
<lastname>J</lastname>
<petname>§</petname>
</student>
</college>

Now you can run the program see the following output in console.

Start Element : college

Start Element : student

Start Element : firstname

First Name : Shiva

End Element : firstname

Start Element : lastname

Last Name : J

End Element : lastname

Start Element : petname

Pet Name : §

End Element : petname

End Element : student

End Element : college

0 comments

Post a Comment