001 /*
002 * SonarQube, open source software quality management tool.
003 * Copyright (C) 2008-2014 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * SonarQube is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * SonarQube is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
019 */
020 package org.sonar.api.utils;
021
022 import org.apache.commons.io.IOUtils;
023 import org.slf4j.Logger;
024 import org.slf4j.LoggerFactory;
025 import org.w3c.dom.Document;
026 import org.w3c.dom.Element;
027 import org.w3c.dom.Node;
028 import org.w3c.dom.NodeList;
029 import org.xml.sax.SAXException;
030
031 import javax.xml.namespace.QName;
032 import javax.xml.parsers.DocumentBuilder;
033 import javax.xml.parsers.DocumentBuilderFactory;
034 import javax.xml.parsers.ParserConfigurationException;
035 import javax.xml.xpath.XPath;
036 import javax.xml.xpath.XPathConstants;
037 import javax.xml.xpath.XPathExpression;
038 import javax.xml.xpath.XPathExpressionException;
039 import javax.xml.xpath.XPathFactory;
040
041 import java.io.BufferedReader;
042 import java.io.ByteArrayInputStream;
043 import java.io.File;
044 import java.io.FileReader;
045 import java.io.IOException;
046 import java.io.InputStream;
047 import java.io.InputStreamReader;
048 import java.util.ArrayList;
049 import java.util.HashMap;
050 import java.util.List;
051 import java.util.Map;
052 import java.util.regex.Matcher;
053 import java.util.regex.Pattern;
054
055 /**
056 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
057 *
058 * @since 1.10
059 */
060 public class XpathParser {
061
062 private static final String CAN_NOT_PARSE_XML = "can not parse xml : ";
063 private Element root = null;
064 private Document doc = null;
065 private DocumentBuilder builder;
066 private XPath xpath;
067 private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>();
068
069 public XpathParser() {
070 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
071 try {
072 bf.setFeature("http://apache.org/xml/features/validation/schema", false);
073 bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
074 bf.setFeature("http://xml.org/sax/features/validation", false);
075 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
076 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
077 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
078 } catch (ParserConfigurationException e) {
079 Logger log = LoggerFactory.getLogger(this.getClass().getName());
080 log.error("Error occured during features set up.", e);
081 }
082 try {
083 bf.setNamespaceAware(false);
084 bf.setValidating(false);
085 builder = bf.newDocumentBuilder();
086 } catch (ParserConfigurationException e) {
087 throw new XmlParserException("can not create a XML parser", e);
088 }
089 }
090
091 public void parse(File file) {
092 if (file == null || !file.exists()) {
093 throw new XmlParserException("File not found : " + file);
094 }
095
096 BufferedReader buffer = null;
097 try {
098 buffer = new BufferedReader(new FileReader(file));
099 parse(buffer);
100
101 } catch (IOException e) {
102 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
103
104 } finally {
105 IOUtils.closeQuietly(buffer);
106 }
107 }
108
109 public void parse(InputStream stream) {
110 BufferedReader buffer = null;
111 try {
112 buffer = new BufferedReader(new InputStreamReader(stream));
113 parse(buffer);
114
115 } catch (IOException e) {
116 throw new XmlParserException("can not parse the stream", e);
117
118 } finally {
119 IOUtils.closeQuietly(buffer);
120 }
121 }
122
123 private void parse(BufferedReader buffer) throws IOException {
124 parse(IOUtils.toString(buffer));
125 }
126
127 public void parse(String xml) {
128 try {
129 xml = fixUnicodeChar(xml);
130 doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
131 XPathFactory factory = XPathFactory.newInstance();
132 xpath = factory.newXPath();
133
134 } catch (SAXException e) {
135 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
136 } catch (IOException e) {
137 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
138 }
139 }
140
141 public Element getRoot() {
142 if (root == null && doc != null) {
143 root = doc.getDocumentElement();
144 }
145 return root;
146 }
147
148 public Document getDocument() {
149 return doc;
150 }
151
152 public Element getChildElement(Element base, String elementName) {
153 NodeList childrens = base.getElementsByTagName(elementName);
154 for (int i = 0; i < childrens.getLength(); i++) {
155 Node nde = childrens.item(i);
156 if (nde.getNodeType() == Node.ELEMENT_NODE) {
157 return (Element) nde;
158 }
159 }
160 return null;
161 }
162
163 public Element getChildElement(String elementName) {
164 NodeList childrens = getRoot().getElementsByTagName(elementName);
165 for (int i = 0; i < childrens.getLength(); i++) {
166 Node nde = childrens.item(i);
167 if (nde.getNodeType() == Node.ELEMENT_NODE) {
168 return (Element) nde;
169 }
170 }
171 return null;
172 }
173
174 public List<Element> getChildElements(String elementName) {
175 List<Element> rtrVal = new ArrayList<Element>();
176 NodeList childrens = getRoot().getElementsByTagName(elementName);
177 for (int i = 0; i < childrens.getLength(); i++) {
178 Node nde = childrens.item(i);
179 if (nde.getNodeType() == Node.ELEMENT_NODE) {
180 rtrVal.add((Element) nde);
181 }
182 }
183 return rtrVal;
184 }
185
186 public List<Element> getChildElements(Element base, String elementName) {
187 List<Element> rtrVal = new ArrayList<Element>();
188 NodeList childrens = base.getElementsByTagName(elementName);
189 for (int i = 0; i < childrens.getLength(); i++) {
190 Node nde = childrens.item(i);
191 if (nde.getNodeType() == Node.ELEMENT_NODE) {
192 rtrVal.add((Element) nde);
193 }
194 }
195 return rtrVal;
196 }
197
198 public String getChildElementValue(Element base, String elementName) {
199 NodeList childrens = base.getElementsByTagName(elementName);
200 for (int i = 0; i < childrens.getLength(); i++) {
201 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
202 return childrens.item(i).getFirstChild().getNodeValue();
203 }
204 }
205 return null;
206 }
207
208 public String getElementValue(Node base) {
209 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
210 return base.getNextSibling().getNodeValue();
211 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
212 return base.getFirstChild().getNodeValue();
213 }
214 return null;
215 }
216
217 public String getChildElementValue(String elementName) {
218 NodeList childrens = getRoot().getElementsByTagName(elementName);
219 for (int i = 0; i < childrens.getLength(); i++) {
220 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
221 return childrens.item(i).getFirstChild().getNodeValue();
222 }
223 }
224 return null;
225 }
226
227 public Object executeXPath(Node node, QName qname, String xPathExpression) {
228 XPathExpression expr = compiledExprs.get(xPathExpression);
229 try {
230 if (expr == null) {
231 expr = xpath.compile(xPathExpression);
232 compiledExprs.put(xPathExpression, expr);
233 }
234 return expr.evaluate(node, qname);
235
236 } catch (XPathExpressionException e) {
237 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
238 }
239 }
240
241 public String executeXPath(String xPathExpression) {
242 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
243 }
244
245 public String executeXPath(Node node, String xPathExpression) {
246 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
247 }
248
249 public NodeList executeXPathNodeList(String xPathExpression) {
250 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
251 }
252
253 public NodeList executeXPathNodeList(Node node, String xPathExpression) {
254 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
255 }
256
257 public Node executeXPathNode(Node node, String xPathExpression) {
258 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
259 }
260
261 /**
262 * Fix the error occured when parsing a string containing unicode character
263 * Example : &u20ac; will be replaced by €
264 */
265 protected String fixUnicodeChar(String text) {
266 String unicode = "&u";
267 StringBuilder replace = new StringBuilder(text);
268 if (text.indexOf(unicode) >= 0) {
269 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
270 Matcher m = p.matcher(replace.toString());
271 int nbFind = 0;
272 while (m.find()) {
273 // Add one index each time because we add one character each time (&u -> &#x)
274 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
275 nbFind++;
276 }
277 }
278 return replace.toString();
279 }
280 }