001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2009 SonarSource SA
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.api.utils;
021    
022    import org.apache.commons.io.IOUtils;
023    import org.slf4j.Logger;
024    import org.slf4j.LoggerFactory;
025    import org.w3c.dom.Document;
026    import org.w3c.dom.Element;
027    import org.w3c.dom.Node;
028    import org.w3c.dom.NodeList;
029    import org.xml.sax.SAXException;
030    
031    import java.io.*;
032    import java.util.ArrayList;
033    import java.util.HashMap;
034    import java.util.List;
035    import java.util.Map;
036    import java.util.regex.Matcher;
037    import java.util.regex.Pattern;
038    import javax.xml.namespace.QName;
039    import javax.xml.parsers.DocumentBuilder;
040    import javax.xml.parsers.DocumentBuilderFactory;
041    import javax.xml.parsers.ParserConfigurationException;
042    import javax.xml.xpath.*;
043    
044    /**
045     * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files.
046     *
047     * @since 1.10
048     */
049    public class XpathParser {
050    
051      private Element root = null;
052      private Document doc = null;
053      private DocumentBuilder builder;
054      private XPath xpath;
055      private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>();
056    
057      public XpathParser() {
058        DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance();
059        try {
060          bf.setFeature("http://apache.org/xml/features/validation/schema", false);
061          bf.setFeature("http://xml.org/sax/features/external-general-entities", false);
062          bf.setFeature("http://xml.org/sax/features/validation", false);
063          bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
064          bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
065          bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
066        } catch (ParserConfigurationException e) {
067          Logger log = LoggerFactory.getLogger(this.getClass().getName());
068          log.error("Error occured during features set up.", e);
069        }
070        try {
071          bf.setNamespaceAware(false);
072          bf.setValidating(false);
073          builder = bf.newDocumentBuilder();
074        } catch (ParserConfigurationException e) {
075          throw new XmlParserException("can not create a XML parser", e);
076        }
077      }
078    
079      public void parse(File file) {
080        if (file == null || !file.exists()) {
081          throw new XmlParserException("File not found : " + file);
082        }
083    
084        BufferedReader buffer = null;
085        try {
086          buffer = new BufferedReader(new FileReader(file));
087          parse(buffer);
088    
089        } catch (IOException e) {
090          throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e);
091    
092        } finally {
093          IOUtils.closeQuietly(buffer);
094        }
095      }
096    
097      public void parse(InputStream stream) {
098        BufferedReader buffer = null;
099        try {
100          buffer = new BufferedReader(new InputStreamReader(stream));
101          parse(buffer);
102    
103        } catch (IOException e) {
104          throw new XmlParserException("can not parse the stream", e);
105    
106        } finally {
107          IOUtils.closeQuietly(buffer);
108        }
109      }
110    
111      private void parse(BufferedReader buffer) throws IOException {
112        parse(IOUtils.toString(buffer));
113      }
114    
115      public void parse(String xml) {
116        try {
117          xml = fixUnicodeChar(xml);
118          doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
119          XPathFactory factory = XPathFactory.newInstance();
120          xpath = factory.newXPath();
121    
122        } catch (SAXException e) {
123          throw new XmlParserException("can not parse xml : " + xml, e);
124        } catch (IOException e) {
125          throw new XmlParserException("can not parse xml : " + xml, e);
126        }
127      }
128    
129      public Element getRoot() {
130        if (root == null && doc != null) {
131          root = doc.getDocumentElement();
132        }
133        return root;
134      }
135    
136      public Document getDocument() {
137        return doc;
138      }
139    
140      public Element getChildElement(Element base, String elementName) {
141        NodeList childrens = base.getElementsByTagName(elementName);
142        for (int i = 0; i < childrens.getLength(); i++) {
143          Node nde = childrens.item(i);
144          if (nde.getNodeType() == Node.ELEMENT_NODE) {
145            return (Element) nde;
146          }
147        }
148        return null;
149      }
150    
151      public Element getChildElement(String elementName) {
152        NodeList childrens = getRoot().getElementsByTagName(elementName);
153        for (int i = 0; i < childrens.getLength(); i++) {
154          Node nde = childrens.item(i);
155          if (nde.getNodeType() == Node.ELEMENT_NODE) {
156            return (Element) nde;
157          }
158        }
159        return null;
160      }
161    
162      public List<Element> getChildElements(String elementName) {
163        List<Element> rtrVal = new ArrayList<Element>();
164        NodeList childrens = getRoot().getElementsByTagName(elementName);
165        for (int i = 0; i < childrens.getLength(); i++) {
166          Node nde = childrens.item(i);
167          if (nde.getNodeType() == Node.ELEMENT_NODE) {
168            rtrVal.add((Element) nde);
169          }
170        }
171        return rtrVal;
172      }
173    
174      public List<Element> getChildElements(Element base, String elementName) {
175        List<Element> rtrVal = new ArrayList<Element>();
176        NodeList childrens = base.getElementsByTagName(elementName);
177        for (int i = 0; i < childrens.getLength(); i++) {
178          Node nde = childrens.item(i);
179          if (nde.getNodeType() == Node.ELEMENT_NODE) {
180            rtrVal.add((Element) nde);
181          }
182        }
183        return rtrVal;
184      }
185    
186      public String getChildElementValue(Element base, String elementName) {
187        NodeList childrens = base.getElementsByTagName(elementName);
188        for (int i = 0; i < childrens.getLength(); i++) {
189          if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
190            return childrens.item(i).getFirstChild().getNodeValue();
191          }
192        }
193        return null;
194      }
195    
196      public String getElementValue(Node base) {
197        if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) {
198          return base.getNextSibling().getNodeValue();
199        } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) {
200          return base.getFirstChild().getNodeValue();
201        }
202        return null;
203      }
204    
205      public String getChildElementValue(String elementName) {
206        NodeList childrens = getRoot().getElementsByTagName(elementName);
207        for (int i = 0; i < childrens.getLength(); i++) {
208          if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) {
209            return childrens.item(i).getFirstChild().getNodeValue();
210          }
211        }
212        return null;
213      }
214    
215      public Object executeXPath(Node node, QName qname, String xPathExpression) {
216        XPathExpression expr = compiledExprs.get(xPathExpression);
217        try {
218          if (expr == null) {
219            expr = xpath.compile(xPathExpression);
220            compiledExprs.put(xPathExpression, expr);
221          }
222          return expr.evaluate(node, qname);
223    
224        } catch (XPathExpressionException e) {
225          throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e);
226        }
227      }
228    
229      public String executeXPath(String xPathExpression) {
230        return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression);
231      }
232    
233      public String executeXPath(Node node, String xPathExpression) {
234        return (String) executeXPath(node, XPathConstants.STRING, xPathExpression);
235      }
236    
237      public NodeList executeXPathNodeList(String xPathExpression) {
238        return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression);
239      }
240    
241      public NodeList executeXPathNodeList(Node node, String xPathExpression) {
242        return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression);
243      }
244    
245      public Node executeXPathNode(Node node, String xPathExpression) {
246        return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression);
247      }
248    
249      /**
250       * Fix the error occured when parsing a string containing unicode character
251       * Example : &u20ac; will be replaced by &#x20ac;
252       */
253      protected String fixUnicodeChar(String text) {
254        String unicode = "&u";
255        StringBuilder replace = new StringBuilder(text);
256        if (text.indexOf(unicode) >= 0) {
257          Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});");
258          Matcher m = p.matcher(replace.toString());
259          int nbFind = 0;
260          while (m.find()) {
261            // Add one index each time because we add one character each time (&u -> &#x)
262            replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";");
263            nbFind++;
264          }
265        }
266        return replace.toString();
267      }
268    }