001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.api.utils;
021
022import org.apache.commons.io.IOUtils;
023import org.slf4j.Logger;
024import org.slf4j.LoggerFactory;
025import org.w3c.dom.Document;
026import org.w3c.dom.Element;
027import org.w3c.dom.Node;
028import org.w3c.dom.NodeList;
029import org.xml.sax.SAXException;
030
031import java.io.*;
032import java.util.ArrayList;
033import java.util.HashMap;
034import java.util.List;
035import java.util.Map;
036import java.util.regex.Matcher;
037import java.util.regex.Pattern;
038import javax.xml.namespace.QName;
039import javax.xml.parsers.DocumentBuilder;
040import javax.xml.parsers.DocumentBuilderFactory;
041import javax.xml.parsers.ParserConfigurationException;
042import javax.xml.xpath.*;
043
044/**
045 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
046 *
047 * @since 1.10
048 */
049public class XpathParser {
050
051  private Element root = null;
052  private Document doc = null;
053  private DocumentBuilder builder;
054  private XPath xpath;
055  private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>();
056
057  public XpathParser() {
058    DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
059    try {
060      bf.setFeature("http://apache.org/xml/features/validation/schema", false);
061      bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
062      bf.setFeature("http://xml.org/sax/features/validation", false);
063      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
064      bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
065      bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
066    } catch (ParserConfigurationException e) {
067      Logger log = LoggerFactory.getLogger(this.getClass().getName());
068      log.error("Error occured during features set up.", e);
069    }
070    try {
071      bf.setNamespaceAware(false);
072      bf.setValidating(false);
073      builder = bf.newDocumentBuilder();
074    } catch (ParserConfigurationException e) {
075      throw new XmlParserException("can not create a XML parser", e);
076    }
077  }
078
079  public void parse(File file) {
080    if (file == null || !file.exists()) {
081      throw new XmlParserException("File not found : " + file);
082    }
083
084    BufferedReader buffer = null;
085    try {
086      buffer = new BufferedReader(new FileReader(file));
087      parse(buffer);
088
089    } catch (IOException e) {
090      throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
091
092    } finally {
093      IOUtils.closeQuietly(buffer);
094    }
095  }
096
097  public void parse(InputStream stream) {
098    BufferedReader buffer = null;
099    try {
100      buffer = new BufferedReader(new InputStreamReader(stream));
101      parse(buffer);
102
103    } catch (IOException e) {
104      throw new XmlParserException("can not parse the stream", e);
105
106    } finally {
107      IOUtils.closeQuietly(buffer);
108    }
109  }
110
111  private void parse(BufferedReader buffer) throws IOException {
112    parse(IOUtils.toString(buffer));
113  }
114
115  public void parse(String xml) {
116    try {
117      xml = fixUnicodeChar(xml);
118      doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
119      XPathFactory factory = XPathFactory.newInstance();
120      xpath = factory.newXPath();
121
122    } catch (SAXException e) {
123      throw new XmlParserException("can not parse xml : " + xml, e);
124    } catch (IOException e) {
125      throw new XmlParserException("can not parse xml : " + xml, e);
126    }
127  }
128
129  public Element getRoot() {
130    if (root == null && doc != null) {
131      root = doc.getDocumentElement();
132    }
133    return root;
134  }
135
136  public Document getDocument() {
137    return doc;
138  }
139
140  public Element getChildElement(Element base, String elementName) {
141    NodeList childrens = base.getElementsByTagName(elementName);
142    for (int i = 0; i < childrens.getLength(); i++) {
143      Node nde = childrens.item(i);
144      if (nde.getNodeType() == Node.ELEMENT_NODE) {
145        return (Element) nde;
146      }
147    }
148    return null;
149  }
150
151  public Element getChildElement(String elementName) {
152    NodeList childrens = getRoot().getElementsByTagName(elementName);
153    for (int i = 0; i < childrens.getLength(); i++) {
154      Node nde = childrens.item(i);
155      if (nde.getNodeType() == Node.ELEMENT_NODE) {
156        return (Element) nde;
157      }
158    }
159    return null;
160  }
161
162  public List<Element> getChildElements(String elementName) {
163    List<Element> rtrVal = new ArrayList<Element>();
164    NodeList childrens = getRoot().getElementsByTagName(elementName);
165    for (int i = 0; i < childrens.getLength(); i++) {
166      Node nde = childrens.item(i);
167      if (nde.getNodeType() == Node.ELEMENT_NODE) {
168        rtrVal.add((Element) nde);
169      }
170    }
171    return rtrVal;
172  }
173
174  public List<Element> getChildElements(Element base, String elementName) {
175    List<Element> rtrVal = new ArrayList<Element>();
176    NodeList childrens = base.getElementsByTagName(elementName);
177    for (int i = 0; i < childrens.getLength(); i++) {
178      Node nde = childrens.item(i);
179      if (nde.getNodeType() == Node.ELEMENT_NODE) {
180        rtrVal.add((Element) nde);
181      }
182    }
183    return rtrVal;
184  }
185
186  public String getChildElementValue(Element base, String elementName) {
187    NodeList childrens = base.getElementsByTagName(elementName);
188    for (int i = 0; i < childrens.getLength(); i++) {
189      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
190        return childrens.item(i).getFirstChild().getNodeValue();
191      }
192    }
193    return null;
194  }
195
196  public String getElementValue(Node base) {
197    if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
198      return base.getNextSibling().getNodeValue();
199    } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
200      return base.getFirstChild().getNodeValue();
201    }
202    return null;
203  }
204
205  public String getChildElementValue(String elementName) {
206    NodeList childrens = getRoot().getElementsByTagName(elementName);
207    for (int i = 0; i < childrens.getLength(); i++) {
208      if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
209        return childrens.item(i).getFirstChild().getNodeValue();
210      }
211    }
212    return null;
213  }
214
215  public Object executeXPath(Node node, QName qname, String xPathExpression) {
216    XPathExpression expr = compiledExprs.get(xPathExpression);
217    try {
218      if (expr == null) {
219        expr = xpath.compile(xPathExpression);
220        compiledExprs.put(xPathExpression, expr);
221      }
222      return expr.evaluate(node, qname);
223
224    } catch (XPathExpressionException e) {
225      throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
226    }
227  }
228
229  public String executeXPath(String xPathExpression) {
230    return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
231  }
232
233  public String executeXPath(Node node, String xPathExpression) {
234    return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
235  }
236
237  public NodeList executeXPathNodeList(String xPathExpression) {
238    return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
239  }
240
241  public NodeList executeXPathNodeList(Node node, String xPathExpression) {
242    return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
243  }
244
245  public Node executeXPathNode(Node node, String xPathExpression) {
246    return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
247  }
248
249  /**
250   * Fix the error occured when parsing a string containing unicode character
251   * Example : &u20ac; will be replaced by &#x20ac;
252   */
253  protected String fixUnicodeChar(String text) {
254    String unicode = "&u";
255    StringBuilder replace = new StringBuilder(text);
256    if (text.indexOf(unicode) >= 0) {
257      Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
258      Matcher m = p.matcher(replace.toString());
259      int nbFind = 0;
260      while (m.find()) {
261        // Add one index each time because we add one character each time (&u -> &#x)
262        replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
263        nbFind++;
264      }
265    }
266    return replace.toString();
267  }
268}