解析XML——简单直接的来。_JAVA_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > JAVA > 解析XML——简单直接的来。

解析XML——简单直接的来。

 2011/10/11 8:12:18  deepnighttwo  http://deepnighttwo.iteye.com  我要评论(0)
  • 摘要:==================闲扯的话================对于现在越来越轻量级,越来越讲究速度和接近用户的应用来说,xml确实有点复杂了。解析起来不仅耗内存,而且很复杂。这就好像花了几千块钱买了个MSOffice,但是80%的feature都用不着,还白白的耗着CPU和内存。个人觉得,设置文件用XML其实挺好,因为设置文件一般并不太大,而且要求可读性强,还有很多乱七八糟的需求,可以利用XML的力量。昨天搞chrome的设置,发现chrome的设置文件也是使用的json
  • 标签:XML 解析

?

================== 闲扯的话================

对于现在越来越轻量级,越来越讲究速度和接近用户的应用来说,xml 确实有点复杂了。解析起来不仅耗内存,而且很复杂。这就好像花了几千块钱买了个MS Office ,但是80%feature 都用不着,还白白的耗着CPU 和内存。

个人觉得,设置文件用XML 其实挺好,因为设置文件一般并不太大,而且要求可读性强,还有很多乱七八糟的需求,可以利用XML 的力量。昨天搞chrome 的设置,发现chrome 的设置文件也是使用的json ,读起来也是轻松愉快。

前阵子做了个程序,需要解析豆瓣API 调用返回的XML 。真想说一句。。。豆瓣你别用XML 了。。。至少,提供个json 版的API 调用吧。

(以上谨代表个人观点)

=================== 正文=================

解析豆瓣返回的xml ,实在是不想用DOM 这个重量级的玩意。DOM 这个玩意,说它强大好还是说它官僚好呢。我倾向于使用SAXP 解析。但是现在面 临的一个问题是,我需要根据xml 节点的名字和属性值(一个或者多个)来决定当前的值是不是我想要的。这就麻烦一点点。第一反应是考虑xpath 。后来觉 得不如自己做一个得了,权当是按需定制一个轻量级的xpath

首先定义XMLSearchUnit 类,这个类的实例用来描述一个需要在XML 中搜索的值,值可以是xml 节点的值,或者是节点的属性。

package com.deepnighttwo.resourceresolver.douban.resolver.utils;

import java.util.HashMap;
import java.util.Map;

import org.xml.sax.Attributes;

/**
? *
? * Represent a search task. Target could be value of a node or attribute of the
? * node.
? *
? *
@author mzang
? */

public class XMLSearchUnit {

???
// attribute values to be matched during search
??? private Map<String, String> attributeMatchValidation = new HashMap<String, String>();

???
// if target is an attribute, then set this member to be the attribute name.
??? // if it is null or empty, then means the target is node value.
??? private String expectedAttr;

? ??
// xml path, format is: /node_name/node_name/...
??? private String xmlPath;

???
public XMLSearchUnit(String xmlPath) {
???????
this .xmlPath = xmlPath;
??? }

???
/**
???? * if current node meets the search conditions or not. Meets means the path
???? * is correct and the attribute value is matched.
???? *
???? *
@param path
???? *
@param attributes
???? *
@return
???? */

???
public boolean match(String path, Attributes attributes) {
???????
if (xmlPath.equals(path) == false ) {
???????????
return false ;
??????? }

???????
for (String key : attributeMatchValidation.keySet()) {
??????????? String exp = attributeMatchValidation.get(key);
??????????? String compare = attributes.getValue(key);
???????????
if (exp.equalsIgnoreCase(compare) == false ) {
?????? ?????????
return false ;
??????????? }
??????? }
???????
return true ;
??? }

???
public Map<String, String> getAttributeMatchValidation() {
???????
return attributeMatchValidation;
??? }

???
public void addAttributeValidation(String key, String value) {
?? ????? attributeMatchValidation.put(key, value);
??? }

???
public String getXmlPath() {
???????
return xmlPath;
??? }

???
public void setAttributeMatchValidation(
??????????? Map<String, String> attributeMatchValidation) {
???????
this .attributeMatchValidation = attributeMatchValidation;
??? }

???
public String getExpectedAttr() {
???????
return expectedAttr;
??? }

???
/**
???? * if target is node value, then set expectedAttr to null. if target is an
???? * attribute value, set it to be the attribute name.
???? *
???? *
@param expectedAttr
???? */

???
public void setExpectedAttr(String expectedAttr) {
???????
this .expectedAttr = expectedAttr;
??? }

???
/**
???? * hash code can be cached if all properties are not be be changed.
???? */

??? @Override
???
public int hashCode() {
???????
final int prime = 31;
???????
int result = 1;
??????? result = prime
??????????????? * result
??????????????? + ((attributeMatchValidation ==
null ) ? 0
??????????????????????? : attributeMatchValidation.hashCode());
??????? result = prime * result
??????????????? + ((expectedAttr ==
null ) ? 0 : expectedAttr.hashCode());
??????? result = prime * result + ((xmlPath ==
null ) ? 0 : xmlPath.hashCode());
???????
return result;
??? }

??? @Override
???
public boolean equals(Object obj) {
???????
if ( this == obj)
???????????
return true ;
???????
if (obj == null )
???????????
return false ;
???????
if (getClass() != obj.getClass())
???????????
return false ;
??????? XMLSearchUnit other = (XMLSearchUnit) obj;
???????
if (attributeMatchValidation == null ) {
???????????
if (other.attributeMatchValidation != null )
???????????????
return false ;
??????? }
else if (!attributeMatchValidation
??????????????? .equals(other.attributeMatchValidation))
???????????
return false ;
???????
if (expectedAttr == null ) {
???????????
if (other.expectedAttr != null )
???????????????
return false ;
??????? }
else if (!expectedAttr.equals(other.expectedAttr))
???????????
return false ;
???????
if (xmlPath == null ) {
???????????
if (other.xmlPath != null )
???????????????
return false ;
??????? }
else if (!xmlPath.equals(other.xmlPath))
???????????
return false ;
???????
return true ;
??? }

}

这个类比较简单。就是用一个hashmap 保待匹配的attribut 键值对,用一个字符串表示期待的attribute name ,用一个字符串表示期待的node path

然后就是如何在SAXP 里用到这个类的实例去搜索了。

package com.deepnighttwo.resourceresolver.douban.resolver.utils;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
? *
? * SAXP parser working with XMLSearchUnit.
? *
? *
@author mzang
? */


public class DoubanSearchParser extends DefaultHandler {

???
// create and initial search units
??? public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit(
??????????? "/feed/entry/id");

???
public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit(
??????????? "/entry/summary");

???
public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit(
??????????? "/entry/title");

???
public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit(
??????????? "/entry/db:attribute");

???
public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit(
??????????? "/entry/gd:rating");

???
public static final XMLSearchUnit DETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit(
? ?????????? "/entry/gd:rating");

???
public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit(
??????????? "/feed/entry/link");

???
static {
??????? DETAILS_LINK_URL_PATH.addAttributeValidation("rel", "alternate");
??????? DETAILS_LINK_URL_PATH.setExpectedAttr("href");

??????? DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang", "zh_CN");
??????? DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name", "aka");

??????? DETAILS_RATINGE_PATH.setExpectedAttr("average");

? ?????? DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");

??? }

???
// a map to store the XMLSearchUnit and value
??? private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>();

???
// a counter of search unit. if it is 0, then all search unit finds a match
??? // value and the result of the XML will be skipped.
??? private int count = 0;

???
private StringBuilder path = new StringBuilder();

???
private static final String pathSeparater = "/";

???
private XMLSearchUnit[] searchUnits;

??? List<XMLSearchUnit> foundItems =
new ArrayList<XMLSearchUnit>();

???
/**
???? * constructor, accept XML input stream, 0 or more search unit instances.
???? *
???? *
@param input
???? *
@param expectedPath
???? *
@return
???? */

?? ?
public Map<XMLSearchUnit, String> parseResults(InputStream input,
??????????? XMLSearchUnit... expectedPath) {
???????
for (XMLSearchUnit search : expectedPath) {
??????????? results.put(search,
null );
??????? }

??????? searchUnits = expectedPath;

???? ??? count = expectedPath.length;

??????? XMLReader xmlReader =
null ;
???????
try {
??????????? SAXParserFactory spfactory = SAXParserFactory.newInstance();
??????????? spfactory.setValidating(
false );
??????????? SAXParser saxParser = spfactory.newSAXParser();
??????????? xmlReader = saxParser.getXMLReader();
??????????? xmlReader.setContentHandler(
this );
??????????? xmlReader.parse(
new InputSource(input));
??????? }
catch (Exception e) {
??????????? System.err.println(e);
??????????? System.exit(1);
?????? ? }
???????
return results;
??? }

???
private void addToPath(String addPath) {
??????? path.append(pathSeparater).append(addPath.toLowerCase());
??? }

???
private void popPath() {
???????
int index = path.lastIndexOf(pathSeparater);
???????
// String removedPath = path.substring(index);
??????? path.delete(index, path.length());
??? }

??? @Override
???
public void startElement(String uri, String localName, String qName,
??????????? Attributes attributes)
throws SAXException {
??????? foundItems.clear();
? ??????
if (count == 0) {
???????????
return ;
??????? }

???????
// update path
??????? addToPath(qName);

??????? List<XMLSearchUnit> foundAttrItems =
null ;

???????
// check if current node matches search units. if it is a node value
??????? // search, then store it in a member variable named foundItems because
??????? // the value of the node is known only when reaches the end of the
??????? // node.but for attribute search, it value is known here. So then are
??????? // put in a local variable list named foundAttrItems.
??????? for (XMLSearchUnit unit : searchUnits) {
???????????
if (unit.match(path.toString(), attributes) == true ) {

???????????????
if (unit.getExpectedAttr() == null ) {
??????????????????? foundItems.add(unit);
??????????????? }
else {
? ??????????????????
if (foundAttrItems == null ) {
??????????????????????? foundAttrItems =
new ArrayList<XMLSearchUnit>();
??????????????????? }
??????????????????? foundAttrItems.add(unit);
??????????????? }
??????????? }
??????? }
???????
// if no attribute match, return.
??????? if (foundAttrItems == null ) {
???????????
return ;
??????? }

???????
// fill search unit value using attribute value. update count.
??????? for (XMLSearchUnit attrUnit : foundAttrItems) {
??????????? String attrValue = attributes.getValue(attrUnit.getExpectedAttr());
???????????
if (results.get(attrUnit) == null ) {
??????????????? count--;
??????????? }
??????????? results.put(attrUnit, attrValue);
??????????? count--;
??????? }
??? }

???
/**
???? * if current node matches, the the node value is useful, store it.
???? */

??? @Override
???
public void characters( char [] ch, int start, int length)
???????????
throws SAXException {
???????
if (count == 0) {
???????????
return ;
??????? }
???????
if (foundItems.size() == 0) {
?????????? ?
return ;
??????? }

???????
for (XMLSearchUnit unit : foundItems) {
??????????? String content =
new String(ch, start, length);
???????????
if (results.get(unit) == null ) {
??????????????? count--;
??????????? }
??????????? results.put(unit, content);
??? ???? }
??? }

??? @Override
???
public void endElement(String uri, String localName, String qName)
???????????
throws SAXException {
??????? foundItems.clear();
???????
if (count == 0) {
???????????
return ;
??????? }
??????? popPath();
??? }
}

?

发表评论
用户名: 匿名