001/*
002 * SonarQube, open source software quality management tool.
003 * Copyright (C) 2008-2014 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * SonarQube is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * SonarQube is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import org.apache.commons.io.Charsets;
023import org.apache.commons.io.IOUtils;
024import org.sonar.api.utils.log.Logger;
025import org.sonar.api.utils.log.Loggers;
026import org.w3c.dom.Document;
027import org.w3c.dom.Element;
028import org.w3c.dom.Node;
029import org.w3c.dom.NodeList;
030import org.xml.sax.SAXException;
031
032import javax.annotation.Nullable;
033import javax.xml.namespace.QName;
034import javax.xml.parsers.DocumentBuilder;
035import javax.xml.parsers.DocumentBuilderFactory;
036import javax.xml.parsers.ParserConfigurationException;
037import javax.xml.xpath.XPath;
038import javax.xml.xpath.XPathConstants;
039import javax.xml.xpath.XPathExpression;
040import javax.xml.xpath.XPathExpressionException;
041import javax.xml.xpath.XPathFactory;
042
043import java.io.BufferedReader;
044import java.io.ByteArrayInputStream;
045import java.io.File;
046import java.io.FileInputStream;
047import java.io.IOException;
048import java.io.InputStream;
049import java.io.InputStreamReader;
050import java.util.ArrayList;
051import java.util.HashMap;
052import java.util.List;
053import java.util.Map;
054import java.util.regex.Matcher;
055import java.util.regex.Pattern;
056
057/**
058 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
059 *
060 * @since 1.10
061 */
062public class XpathParser {
063
064  private static final String CAN_NOT_PARSE_XML = "can not parse xml : ";
065  private Element root = null;
066  private Document doc = null;
067  private DocumentBuilder builder;
068  private XPath xpath;
069  private Map<String, XPathExpression> compiledExprs = new HashMap<>();
070
071  public XpathParser() {
072    DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
073    try {
074      bf.setFeature("http://apache.org/xml/features/validation/schema", false);
075      bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
076      bf.setFeature("http://xml.org/sax/features/validation", false);
077      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
078      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
079      bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
080    } catch (ParserConfigurationException e) {
081      Logger log = Loggers.get(this.getClass().getName());
082      log.error("Error occured during features set up.", e);
083    }
084    try {
085      bf.setNamespaceAware(false);
086      bf.setValidating(false);
087      builder = bf.newDocumentBuilder();
088    } catch (ParserConfigurationException e) {
089      throw new XmlParserException("can not create a XML parser", e);
090    }
091  }
092
093  public void parse(@Nullable File file) {
094    if (file == null || !file.exists()) {
095      throw new XmlParserException("File not found : " + file);
096    }
097
098    BufferedReader buffer = null;
099    try {
100      buffer = new BufferedReader(new InputStreamReader(new FileInputStream(file), Charsets.UTF_8));
101      parse(buffer);
102
103    } catch (IOException e) {
104      throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
105
106    } finally {
107      IOUtils.closeQuietly(buffer);
108    }
109  }
110
111  public void parse(InputStream stream) {
112    BufferedReader buffer = null;
113    try {
114      buffer = new BufferedReader(new InputStreamReader(stream, Charsets.UTF_8));
115      parse(buffer);
116
117    } catch (IOException e) {
118      throw new XmlParserException("can not parse the stream", e);
119
120    } finally {
121      IOUtils.closeQuietly(buffer);
122    }
123  }
124
125  private void parse(BufferedReader buffer) throws IOException {
126    parse(IOUtils.toString(buffer));
127  }
128
129  public void parse(String xml) {
130    try {
131      String fixedXml = fixUnicodeChar(xml);
132      doc = builder.parse(new ByteArrayInputStream(fixedXml.getBytes(Charsets.UTF_8)));
133      XPathFactory factory = XPathFactory.newInstance();
134      xpath = factory.newXPath();
135
136    } catch (IOException | SAXException e) {
137      throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
138    }
139  }
140
141  public Element getRoot() {
142    if (root == null && doc != null) {
143      root = doc.getDocumentElement();
144    }
145    return root;
146  }
147
148  public Document getDocument() {
149    return doc;
150  }
151
152  public Element getChildElement(Element base, String elementName) {
153    NodeList childrens = base.getElementsByTagName(elementName);
154    for (int i = 0; i < childrens.getLength(); i++) {
155      Node nde = childrens.item(i);
156      if (nde.getNodeType() == Node.ELEMENT_NODE) {
157        return (Element) nde;
158      }
159    }
160    return null;
161  }
162
163  public Element getChildElement(String elementName) {
164    NodeList childrens = getRoot().getElementsByTagName(elementName);
165    for (int i = 0; i < childrens.getLength(); i++) {
166      Node nde = childrens.item(i);
167      if (nde.getNodeType() == Node.ELEMENT_NODE) {
168        return (Element) nde;
169      }
170    }
171    return null;
172  }
173
174  public List<Element> getChildElements(String elementName) {
175    List<Element> rtrVal = new ArrayList<Element>();
176    NodeList childrens = getRoot().getElementsByTagName(elementName);
177    for (int i = 0; i < childrens.getLength(); i++) {
178      Node nde = childrens.item(i);
179      if (nde.getNodeType() == Node.ELEMENT_NODE) {
180        rtrVal.add((Element) nde);
181      }
182    }
183    return rtrVal;
184  }
185
186  public List<Element> getChildElements(Element base, String elementName) {
187    List<Element> rtrVal = new ArrayList<Element>();
188    NodeList childrens = base.getElementsByTagName(elementName);
189    for (int i = 0; i < childrens.getLength(); i++) {
190      Node nde = childrens.item(i);
191      if (nde.getNodeType() == Node.ELEMENT_NODE) {
192        rtrVal.add((Element) nde);
193      }
194    }
195    return rtrVal;
196  }
197
198  public String getChildElementValue(Element base, String elementName) {
199    NodeList childrens = base.getElementsByTagName(elementName);
200    for (int i = 0; i < childrens.getLength(); i++) {
201      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
202        return childrens.item(i).getFirstChild().getNodeValue();
203      }
204    }
205    return null;
206  }
207
208  public String getElementValue(Node base) {
209    if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
210      return base.getNextSibling().getNodeValue();
211    } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
212      return base.getFirstChild().getNodeValue();
213    }
214    return null;
215  }
216
217  public String getChildElementValue(String elementName) {
218    NodeList childrens = getRoot().getElementsByTagName(elementName);
219    for (int i = 0; i < childrens.getLength(); i++) {
220      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
221        return childrens.item(i).getFirstChild().getNodeValue();
222      }
223    }
224    return null;
225  }
226
227  public Object executeXPath(Node node, QName qname, String xPathExpression) {
228    XPathExpression expr = compiledExprs.get(xPathExpression);
229    try {
230      if (expr == null) {
231        expr = xpath.compile(xPathExpression);
232        compiledExprs.put(xPathExpression, expr);
233      }
234      return expr.evaluate(node, qname);
235
236    } catch (XPathExpressionException e) {
237      throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
238    }
239  }
240
241  public String executeXPath(String xPathExpression) {
242    return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
243  }
244
245  public String executeXPath(Node node, String xPathExpression) {
246    return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
247  }
248
249  public NodeList executeXPathNodeList(String xPathExpression) {
250    return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
251  }
252
253  public NodeList executeXPathNodeList(Node node, String xPathExpression) {
254    return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
255  }
256
257  public Node executeXPathNode(Node node, String xPathExpression) {
258    return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
259  }
260
261  /**
262   * Fix the error occured when parsing a string containing unicode character
263   * Example : &u20ac; will be replaced by &#x20ac;
264   */
265  protected String fixUnicodeChar(String text) {
266    String unicode = "&u";
267    StringBuilder replace = new StringBuilder(text);
268    if (text.indexOf(unicode) >= 0) {
269      Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
270      Matcher m = p.matcher(replace.toString());
271      int nbFind = 0;
272      while (m.find()) {
273        // Add one index each time because we add one character each time (&u -> &#x)
274        replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
275        nbFind++;
276      }
277    }
278    return replace.toString();
279  }
280}