001    /*
002     * SonarQube, open source software quality management tool.
003     * Copyright (C) 2008-2014 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * SonarQube is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * SonarQube is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public License
017     * along with this program; if not, write to the Free Software Foundation,
018     * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019     */
020    package org.sonar.api.utils;
021    
022    import org.apache.commons.io.IOUtils;
023    import org.slf4j.Logger;
024    import org.slf4j.LoggerFactory;
025    import org.w3c.dom.Document;
026    import org.w3c.dom.Element;
027    import org.w3c.dom.Node;
028    import org.w3c.dom.NodeList;
029    import org.xml.sax.SAXException;
030    
031    import javax.xml.namespace.QName;
032    import javax.xml.parsers.DocumentBuilder;
033    import javax.xml.parsers.DocumentBuilderFactory;
034    import javax.xml.parsers.ParserConfigurationException;
035    import javax.xml.xpath.XPath;
036    import javax.xml.xpath.XPathConstants;
037    import javax.xml.xpath.XPathExpression;
038    import javax.xml.xpath.XPathExpressionException;
039    import javax.xml.xpath.XPathFactory;
040    
041    import java.io.BufferedReader;
042    import java.io.ByteArrayInputStream;
043    import java.io.File;
044    import java.io.FileReader;
045    import java.io.IOException;
046    import java.io.InputStream;
047    import java.io.InputStreamReader;
048    import java.util.ArrayList;
049    import java.util.HashMap;
050    import java.util.List;
051    import java.util.Map;
052    import java.util.regex.Matcher;
053    import java.util.regex.Pattern;
054    
055    /**
056     * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
057     *
058     * @since 1.10
059     */
060    public class XpathParser {
061    
062      private static final String CAN_NOT_PARSE_XML = "can not parse xml : ";
063      private Element root = null;
064      private Document doc = null;
065      private DocumentBuilder builder;
066      private XPath xpath;
067      private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>();
068    
069      public XpathParser() {
070        DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
071        try {
072          bf.setFeature("http://apache.org/xml/features/validation/schema", false);
073          bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
074          bf.setFeature("http://xml.org/sax/features/validation", false);
075          bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
076          bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
077          bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
078        } catch (ParserConfigurationException e) {
079          Logger log = LoggerFactory.getLogger(this.getClass().getName());
080          log.error("Error occured during features set up.", e);
081        }
082        try {
083          bf.setNamespaceAware(false);
084          bf.setValidating(false);
085          builder = bf.newDocumentBuilder();
086        } catch (ParserConfigurationException e) {
087          throw new XmlParserException("can not create a XML parser", e);
088        }
089      }
090    
091      public void parse(File file) {
092        if (file == null || !file.exists()) {
093          throw new XmlParserException("File not found : " + file);
094        }
095    
096        BufferedReader buffer = null;
097        try {
098          buffer = new BufferedReader(new FileReader(file));
099          parse(buffer);
100    
101        } catch (IOException e) {
102          throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
103    
104        } finally {
105          IOUtils.closeQuietly(buffer);
106        }
107      }
108    
109      public void parse(InputStream stream) {
110        BufferedReader buffer = null;
111        try {
112          buffer = new BufferedReader(new InputStreamReader(stream));
113          parse(buffer);
114    
115        } catch (IOException e) {
116          throw new XmlParserException("can not parse the stream", e);
117    
118        } finally {
119          IOUtils.closeQuietly(buffer);
120        }
121      }
122    
123      private void parse(BufferedReader buffer) throws IOException {
124        parse(IOUtils.toString(buffer));
125      }
126    
127      public void parse(String xml) {
128        try {
129          xml = fixUnicodeChar(xml);
130          doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
131          XPathFactory factory = XPathFactory.newInstance();
132          xpath = factory.newXPath();
133    
134        } catch (SAXException e) {
135          throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
136        } catch (IOException e) {
137          throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e);
138        }
139      }
140    
141      public Element getRoot() {
142        if (root == null && doc != null) {
143          root = doc.getDocumentElement();
144        }
145        return root;
146      }
147    
148      public Document getDocument() {
149        return doc;
150      }
151    
152      public Element getChildElement(Element base, String elementName) {
153        NodeList childrens = base.getElementsByTagName(elementName);
154        for (int i = 0; i < childrens.getLength(); i++) {
155          Node nde = childrens.item(i);
156          if (nde.getNodeType() == Node.ELEMENT_NODE) {
157            return (Element) nde;
158          }
159        }
160        return null;
161      }
162    
163      public Element getChildElement(String elementName) {
164        NodeList childrens = getRoot().getElementsByTagName(elementName);
165        for (int i = 0; i < childrens.getLength(); i++) {
166          Node nde = childrens.item(i);
167          if (nde.getNodeType() == Node.ELEMENT_NODE) {
168            return (Element) nde;
169          }
170        }
171        return null;
172      }
173    
174      public List<Element> getChildElements(String elementName) {
175        List<Element> rtrVal = new ArrayList<Element>();
176        NodeList childrens = getRoot().getElementsByTagName(elementName);
177        for (int i = 0; i < childrens.getLength(); i++) {
178          Node nde = childrens.item(i);
179          if (nde.getNodeType() == Node.ELEMENT_NODE) {
180            rtrVal.add((Element) nde);
181          }
182        }
183        return rtrVal;
184      }
185    
186      public List<Element> getChildElements(Element base, String elementName) {
187        List<Element> rtrVal = new ArrayList<Element>();
188        NodeList childrens = base.getElementsByTagName(elementName);
189        for (int i = 0; i < childrens.getLength(); i++) {
190          Node nde = childrens.item(i);
191          if (nde.getNodeType() == Node.ELEMENT_NODE) {
192            rtrVal.add((Element) nde);
193          }
194        }
195        return rtrVal;
196      }
197    
198      public String getChildElementValue(Element base, String elementName) {
199        NodeList childrens = base.getElementsByTagName(elementName);
200        for (int i = 0; i < childrens.getLength(); i++) {
201          if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
202            return childrens.item(i).getFirstChild().getNodeValue();
203          }
204        }
205        return null;
206      }
207    
208      public String getElementValue(Node base) {
209        if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
210          return base.getNextSibling().getNodeValue();
211        } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
212          return base.getFirstChild().getNodeValue();
213        }
214        return null;
215      }
216    
217      public String getChildElementValue(String elementName) {
218        NodeList childrens = getRoot().getElementsByTagName(elementName);
219        for (int i = 0; i < childrens.getLength(); i++) {
220          if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
221            return childrens.item(i).getFirstChild().getNodeValue();
222          }
223        }
224        return null;
225      }
226    
227      public Object executeXPath(Node node, QName qname, String xPathExpression) {
228        XPathExpression expr = compiledExprs.get(xPathExpression);
229        try {
230          if (expr == null) {
231            expr = xpath.compile(xPathExpression);
232            compiledExprs.put(xPathExpression, expr);
233          }
234          return expr.evaluate(node, qname);
235    
236        } catch (XPathExpressionException e) {
237          throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
238        }
239      }
240    
241      public String executeXPath(String xPathExpression) {
242        return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
243      }
244    
245      public String executeXPath(Node node, String xPathExpression) {
246        return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
247      }
248    
249      public NodeList executeXPathNodeList(String xPathExpression) {
250        return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
251      }
252    
253      public NodeList executeXPathNodeList(Node node, String xPathExpression) {
254        return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
255      }
256    
257      public Node executeXPathNode(Node node, String xPathExpression) {
258        return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
259      }
260    
261      /**
262       * Fix the error occured when parsing a string containing unicode character
263       * Example : &u20ac; will be replaced by €
264       */
265      protected String fixUnicodeChar(String text) {
266        String unicode = "&u";
267        StringBuilder replace = new StringBuilder(text);
268        if (text.indexOf(unicode) >= 0) {
269          Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
270          Matcher m = p.matcher(replace.toString());
271          int nbFind = 0;
272          while (m.find()) {
273            // Add one index each time because we add one character each time (&u -> &#x)
274            replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
275            nbFind++;
276          }
277        }
278        return replace.toString();
279      }
280    }