001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2009 SonarSource SA
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.api.utils;
021
022 import org.apache.commons.io.IOUtils;
023 import org.slf4j.Logger;
024 import org.slf4j.LoggerFactory;
025 import org.w3c.dom.Document;
026 import org.w3c.dom.Element;
027 import org.w3c.dom.Node;
028 import org.w3c.dom.NodeList;
029 import org.xml.sax.SAXException;
030
031 import java.io.*;
032 import java.util.ArrayList;
033 import java.util.HashMap;
034 import java.util.List;
035 import java.util.Map;
036 import java.util.regex.Matcher;
037 import java.util.regex.Pattern;
038 import javax.xml.namespace.QName;
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041 import javax.xml.parsers.ParserConfigurationException;
042 import javax.xml.xpath.*;
043
044 /**
045 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
046 *
047 * @since 1.10
048 */
049 public class XpathParser {
050
051 private Element root = null;
052 private Document doc = null;
053 private DocumentBuilder builder;
054 private XPath xpath;
055 private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>();
056
057 public XpathParser() {
058 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
059 try {
060 bf.setFeature("http://apache.org/xml/features/validation/schema", false);
061 bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
062 bf.setFeature("http://xml.org/sax/features/validation", false);
063 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
064 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
065 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
066 } catch (ParserConfigurationException e) {
067 Logger log = LoggerFactory.getLogger(this.getClass().getName());
068 log.error("Error occured during features set up.", e);
069 }
070 try {
071 bf.setNamespaceAware(false);
072 bf.setValidating(false);
073 builder = bf.newDocumentBuilder();
074 } catch (ParserConfigurationException e) {
075 throw new XmlParserException("can not instance a XML parser", e);
076 }
077 }
078
079 public void parse(File file) {
080 if (file == null || !file.exists()) {
081 throw new XmlParserException("File not found : " + file);
082 }
083
084 BufferedReader buffer = null;
085 try {
086 buffer = new BufferedReader(new FileReader(file));
087 parse(buffer);
088
089 } catch (IOException e) {
090 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
091
092 } finally {
093 IOUtils.closeQuietly(buffer);
094 }
095 }
096
097 public void parse(InputStream stream) {
098 BufferedReader buffer = null;
099 try {
100 buffer = new BufferedReader(new InputStreamReader(stream));
101 parse(buffer);
102
103 } catch (IOException e) {
104 throw new XmlParserException("can not parse the stream", e);
105
106 } finally {
107 IOUtils.closeQuietly(buffer);
108 }
109 }
110
111 private void parse(BufferedReader buffer) throws IOException {
112 parse(IOUtils.toString(buffer));
113 }
114
115 public void parse(String xml) {
116 try {
117 xml = fixUnicodeChar(xml);
118 doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
119 XPathFactory factory = XPathFactory.newInstance();
120 xpath = factory.newXPath();
121
122 } catch (SAXException e) {
123 throw new XmlParserException("can not parse xml : " + xml, e);
124 } catch (IOException e) {
125 throw new XmlParserException("can not parse xml : " + xml, e);
126 }
127 }
128
129 public Element getRoot() {
130 if (root == null && doc != null) {
131 root = doc.getDocumentElement();
132 }
133 return root;
134 }
135
136 public Document getDocument() {
137 return doc;
138 }
139
140 public Element getChildElement(Element base, String elementName) {
141 NodeList childrens = base.getElementsByTagName(elementName);
142 for (int i = 0; i < childrens.getLength(); i++) {
143 Node nde = childrens.item(i);
144 if (nde.getNodeType() == Node.ELEMENT_NODE) {
145 return (Element) nde;
146 }
147 }
148 return null;
149 }
150
151 public Element getChildElement(String elementName) {
152 NodeList childrens = getRoot().getElementsByTagName(elementName);
153 for (int i = 0; i < childrens.getLength(); i++) {
154 Node nde = childrens.item(i);
155 if (nde.getNodeType() == Node.ELEMENT_NODE) {
156 return (Element) nde;
157 }
158 }
159 return null;
160 }
161
162 public List<Element> getChildElements(String elementName) {
163 List<Element> rtrVal = new ArrayList<Element>();
164 NodeList childrens = getRoot().getElementsByTagName(elementName);
165 for (int i = 0; i < childrens.getLength(); i++) {
166 Node nde = childrens.item(i);
167 if (nde.getNodeType() == Node.ELEMENT_NODE) {
168 rtrVal.add((Element) nde);
169 }
170 }
171 return rtrVal;
172 }
173
174 public List<Element> getChildElements(Element base, String elementName) {
175 List<Element> rtrVal = new ArrayList<Element>();
176 NodeList childrens = base.getElementsByTagName(elementName);
177 for (int i = 0; i < childrens.getLength(); i++) {
178 Node nde = childrens.item(i);
179 if (nde.getNodeType() == Node.ELEMENT_NODE) {
180 rtrVal.add((Element) nde);
181 }
182 }
183 return rtrVal;
184 }
185
186 public String getChildElementValue(Element base, String elementName) {
187 NodeList childrens = base.getElementsByTagName(elementName);
188 for (int i = 0; i < childrens.getLength(); i++) {
189 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
190 return childrens.item(i).getFirstChild().getNodeValue();
191 }
192 }
193 return null;
194 }
195
196 public String getElementValue(Node base) {
197 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
198 return base.getNextSibling().getNodeValue();
199 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
200 return base.getFirstChild().getNodeValue();
201 }
202 return null;
203 }
204
205 public String getChildElementValue(String elementName) {
206 NodeList childrens = getRoot().getElementsByTagName(elementName);
207 for (int i = 0; i < childrens.getLength(); i++) {
208 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
209 return childrens.item(i).getFirstChild().getNodeValue();
210 }
211 }
212 return null;
213 }
214
215 public Object executeXPath(Node node, QName qname, String xPathExpression) {
216 XPathExpression expr = compiledExprs.get(xPathExpression);
217 try {
218 if (expr == null) {
219 expr = xpath.compile(xPathExpression);
220 compiledExprs.put(xPathExpression, expr);
221 }
222 return expr.evaluate(node, qname);
223
224 } catch (XPathExpressionException e) {
225 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
226 }
227 }
228
229 public String executeXPath(String xPathExpression) {
230 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
231 }
232
233 public String executeXPath(Node node, String xPathExpression) {
234 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
235 }
236
237 public NodeList executeXPathNodeList(String xPathExpression) {
238 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
239 }
240
241 public NodeList executeXPathNodeList(Node node, String xPathExpression) {
242 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
243 }
244
245 public Node executeXPathNode(Node node, String xPathExpression) {
246 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
247 }
248
249 /**
250 * Fix the error occured when parsing a string containing unicode character
251 * Example : &u20ac; will be replaced by €
252 */
253 protected String fixUnicodeChar(String text) {
254 String unicode = "&u";
255 StringBuilder replace = new StringBuilder(text);
256 if (text.indexOf(unicode) >= 0) {
257 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
258 Matcher m = p.matcher(replace.toString());
259 int nbFind = 0;
260 while (m.find()) {
261 // Add one index each time because we add one character each time (&u -> &#x)
262 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
263 nbFind++;
264 }
265 }
266 return replace.toString();
267 }
268 }