001/*
002 * SonarQube
003 * Copyright (C) 2009-2016 SonarSource SA
004 * mailto:contact AT sonarsource DOT com
005 *
006 * This program is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * This program is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import java.io.BufferedReader;
023import java.io.ByteArrayInputStream;
024import java.io.File;
025import java.io.FileInputStream;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.InputStreamReader;
029import java.nio.charset.StandardCharsets;
030import java.util.ArrayList;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036import javax.annotation.Nullable;
037import javax.xml.namespace.QName;
038import javax.xml.parsers.DocumentBuilder;
039import javax.xml.parsers.DocumentBuilderFactory;
040import javax.xml.parsers.ParserConfigurationException;
041import javax.xml.xpath.XPath;
042import javax.xml.xpath.XPathConstants;
043import javax.xml.xpath.XPathExpression;
044import javax.xml.xpath.XPathExpressionException;
045import javax.xml.xpath.XPathFactory;
046import org.apache.commons.io.IOUtils;
047import org.sonar.api.utils.log.Logger;
048import org.sonar.api.utils.log.Loggers;
049import org.w3c.dom.Document;
050import org.w3c.dom.Element;
051import org.w3c.dom.Node;
052import org.w3c.dom.NodeList;
053import org.xml.sax.SAXException;
054
055/**
056 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
057 *
058 * @since 1.10
059 */
060public class XpathParser {
061
062  private static final String CAN_NOT_PARSE_XML = "can not parse xml : ";
063  private Element root = null;
064  private Document doc = null;
065  private DocumentBuilder builder;
066  private XPath xpath;
067  private Map<String, XPathExpression> compiledExprs = new HashMap<>();
068
069  public XpathParser() {
070    DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
071    try {
072      bf.setFeature("http://apache.org/xml/features/validation/schema", false);
073      bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
074      bf.setFeature("http://xml.org/sax/features/validation", false);
075      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
076      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
077      bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
078    } catch (ParserConfigurationException e) {
079      Logger log = Loggers.get(this.getClass().getName());
080      log.error("Error occured during features set up.", e);
081    }
082    try {
083      bf.setNamespaceAware(false);
084      bf.setValidating(false);
085      builder = bf.newDocumentBuilder();
086    } catch (ParserConfigurationException e) {
087      throw new XmlParserException("can not create a XML parser", e);
088    }
089  }
090
091  public void parse(@Nullable File file) {
092    if (file == null || !file.exists()) {
093      throw new XmlParserException("File not found : " + file);
094    }
095
096    BufferedReader buffer = null;
097    try {
098      buffer = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
099      parse(buffer);
100
101    } catch (IOException e) {
102      throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
103
104    } finally {
105      IOUtils.closeQuietly(buffer);
106    }
107  }
108
109  public void parse(InputStream stream) {
110    BufferedReader buffer = null;
111    try {
112      buffer = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
113      parse(buffer);
114
115    } catch (IOException e) {
116      throw new XmlParserException("can not parse the stream", e);
117
118    } finally {
119      IOUtils.closeQuietly(buffer);
120    }
121  }
122
123  private void parse(BufferedReader buffer) throws IOException {
124    parse(IOUtils.toString(buffer));
125  }
126
127  public void parse(String xml) {
128    try {
129      String fixedXml = fixUnicodeChar(xml);
130      doc = builder.parse(new ByteArrayInputStream(fixedXml.getBytes(StandardCharsets.UTF_8)));
131      XPathFactory factory = XPathFactory.newInstance();
132      xpath = factory.newXPath();
133
134    } catch (IOException | SAXException e) {
135      throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
136    }
137  }
138
139  public Element getRoot() {
140    if (root == null && doc != null) {
141      root = doc.getDocumentElement();
142    }
143    return root;
144  }
145
146  public Document getDocument() {
147    return doc;
148  }
149
150  public Element getChildElement(Element base, String elementName) {
151    NodeList childrens = base.getElementsByTagName(elementName);
152    for (int i = 0; i < childrens.getLength(); i++) {
153      Node nde = childrens.item(i);
154      if (nde.getNodeType() == Node.ELEMENT_NODE) {
155        return (Element) nde;
156      }
157    }
158    return null;
159  }
160
161  public Element getChildElement(String elementName) {
162    NodeList childrens = getRoot().getElementsByTagName(elementName);
163    for (int i = 0; i < childrens.getLength(); i++) {
164      Node nde = childrens.item(i);
165      if (nde.getNodeType() == Node.ELEMENT_NODE) {
166        return (Element) nde;
167      }
168    }
169    return null;
170  }
171
172  public List<Element> getChildElements(String elementName) {
173    List<Element> rtrVal = new ArrayList<>();
174    NodeList childrens = getRoot().getElementsByTagName(elementName);
175    for (int i = 0; i < childrens.getLength(); i++) {
176      Node nde = childrens.item(i);
177      if (nde.getNodeType() == Node.ELEMENT_NODE) {
178        rtrVal.add((Element) nde);
179      }
180    }
181    return rtrVal;
182  }
183
184  public List<Element> getChildElements(Element base, String elementName) {
185    List<Element> rtrVal = new ArrayList<>();
186    NodeList childrens = base.getElementsByTagName(elementName);
187    for (int i = 0; i < childrens.getLength(); i++) {
188      Node nde = childrens.item(i);
189      if (nde.getNodeType() == Node.ELEMENT_NODE) {
190        rtrVal.add((Element) nde);
191      }
192    }
193    return rtrVal;
194  }
195
196  public String getChildElementValue(Element base, String elementName) {
197    NodeList childrens = base.getElementsByTagName(elementName);
198    for (int i = 0; i < childrens.getLength(); i++) {
199      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
200        return childrens.item(i).getFirstChild().getNodeValue();
201      }
202    }
203    return null;
204  }
205
206  public String getElementValue(Node base) {
207    if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
208      return base.getNextSibling().getNodeValue();
209    } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
210      return base.getFirstChild().getNodeValue();
211    }
212    return null;
213  }
214
215  public String getChildElementValue(String elementName) {
216    NodeList childrens = getRoot().getElementsByTagName(elementName);
217    for (int i = 0; i < childrens.getLength(); i++) {
218      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
219        return childrens.item(i).getFirstChild().getNodeValue();
220      }
221    }
222    return null;
223  }
224
225  public Object executeXPath(Node node, QName qname, String xPathExpression) {
226    XPathExpression expr = compiledExprs.get(xPathExpression);
227    try {
228      if (expr == null) {
229        expr = xpath.compile(xPathExpression);
230        compiledExprs.put(xPathExpression, expr);
231      }
232      return expr.evaluate(node, qname);
233
234    } catch (XPathExpressionException e) {
235      throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
236    }
237  }
238
239  public String executeXPath(String xPathExpression) {
240    return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
241  }
242
243  public String executeXPath(Node node, String xPathExpression) {
244    return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
245  }
246
247  public NodeList executeXPathNodeList(String xPathExpression) {
248    return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
249  }
250
251  public NodeList executeXPathNodeList(Node node, String xPathExpression) {
252    return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
253  }
254
255  public Node executeXPathNode(Node node, String xPathExpression) {
256    return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
257  }
258
259  /**
260   * Fix the error occured when parsing a string containing unicode character
261   * Example : {@code &u20ac;} will be replaced by {@code &#x20ac;}
262   */
263  protected String fixUnicodeChar(String text) {
264    String unicode = "&u";
265    StringBuilder replace = new StringBuilder(text);
266    if (text.indexOf(unicode) >= 0) {
267      Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
268      Matcher m = p.matcher(replace.toString());
269      int nbFind = 0;
270      while (m.find()) {
271        // Add one index each time because we add one character each time (&u -> &#x)
272        replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
273        nbFind++;
274      }
275    }
276    return replace.toString();
277  }
278}