001 /* 002 * SonarQube, open source software quality management tool. 003 * Copyright (C) 2008-2014 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * SonarQube is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * SonarQube is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public License 017 * along with this program; if not, write to the Free Software Foundation, 018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 */ 020 package org.sonar.api.utils; 021 022 import org.apache.commons.io.IOUtils; 023 import org.slf4j.Logger; 024 import org.slf4j.LoggerFactory; 025 import org.w3c.dom.Document; 026 import org.w3c.dom.Element; 027 import org.w3c.dom.Node; 028 import org.w3c.dom.NodeList; 029 import org.xml.sax.SAXException; 030 031 import javax.xml.namespace.QName; 032 import javax.xml.parsers.DocumentBuilder; 033 import javax.xml.parsers.DocumentBuilderFactory; 034 import javax.xml.parsers.ParserConfigurationException; 035 import javax.xml.xpath.XPath; 036 import javax.xml.xpath.XPathConstants; 037 import javax.xml.xpath.XPathExpression; 038 import javax.xml.xpath.XPathExpressionException; 039 import javax.xml.xpath.XPathFactory; 040 041 import java.io.BufferedReader; 042 import java.io.ByteArrayInputStream; 043 import java.io.File; 044 import java.io.FileReader; 045 import java.io.IOException; 046 import java.io.InputStream; 047 import java.io.InputStreamReader; 048 import java.util.ArrayList; 049 import java.util.HashMap; 050 import java.util.List; 051 import java.util.Map; 052 import java.util.regex.Matcher; 053 import java.util.regex.Pattern; 054 055 /** 056 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files. 057 * 058 * @since 1.10 059 */ 060 public class XpathParser { 061 062 private static final String CAN_NOT_PARSE_XML = "can not parse xml : "; 063 private Element root = null; 064 private Document doc = null; 065 private DocumentBuilder builder; 066 private XPath xpath; 067 private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>(); 068 069 public XpathParser() { 070 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance(); 071 try { 072 bf.setFeature("http://apache.org/xml/features/validation/schema", false); 073 bf.setFeature("http://xml.org/sax/features/external-general-entities", false); 074 bf.setFeature("http://xml.org/sax/features/validation", false); 075 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 076 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 077 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true); 078 } catch (ParserConfigurationException e) { 079 Logger log = LoggerFactory.getLogger(this.getClass().getName()); 080 log.error("Error occured during features set up.", e); 081 } 082 try { 083 bf.setNamespaceAware(false); 084 bf.setValidating(false); 085 builder = bf.newDocumentBuilder(); 086 } catch (ParserConfigurationException e) { 087 throw new XmlParserException("can not create a XML parser", e); 088 } 089 } 090 091 public void parse(File file) { 092 if (file == null || !file.exists()) { 093 throw new XmlParserException("File not found : " + file); 094 } 095 096 BufferedReader buffer = null; 097 try { 098 buffer = new BufferedReader(new FileReader(file)); 099 parse(buffer); 100 101 } catch (IOException e) { 102 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e); 103 104 } finally { 105 IOUtils.closeQuietly(buffer); 106 } 107 } 108 109 public void parse(InputStream stream) { 110 BufferedReader buffer = null; 111 try { 112 buffer = new BufferedReader(new InputStreamReader(stream)); 113 parse(buffer); 114 115 } catch (IOException e) { 116 throw new XmlParserException("can not parse the stream", e); 117 118 } finally { 119 IOUtils.closeQuietly(buffer); 120 } 121 } 122 123 private void parse(BufferedReader buffer) throws IOException { 124 parse(IOUtils.toString(buffer)); 125 } 126 127 public void parse(String xml) { 128 try { 129 xml = fixUnicodeChar(xml); 130 doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); 131 XPathFactory factory = XPathFactory.newInstance(); 132 xpath = factory.newXPath(); 133 134 } catch (SAXException e) { 135 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e); 136 } catch (IOException e) { 137 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e); 138 } 139 } 140 141 public Element getRoot() { 142 if (root == null && doc != null) { 143 root = doc.getDocumentElement(); 144 } 145 return root; 146 } 147 148 public Document getDocument() { 149 return doc; 150 } 151 152 public Element getChildElement(Element base, String elementName) { 153 NodeList childrens = base.getElementsByTagName(elementName); 154 for (int i = 0; i < childrens.getLength(); i++) { 155 Node nde = childrens.item(i); 156 if (nde.getNodeType() == Node.ELEMENT_NODE) { 157 return (Element) nde; 158 } 159 } 160 return null; 161 } 162 163 public Element getChildElement(String elementName) { 164 NodeList childrens = getRoot().getElementsByTagName(elementName); 165 for (int i = 0; i < childrens.getLength(); i++) { 166 Node nde = childrens.item(i); 167 if (nde.getNodeType() == Node.ELEMENT_NODE) { 168 return (Element) nde; 169 } 170 } 171 return null; 172 } 173 174 public List<Element> getChildElements(String elementName) { 175 List<Element> rtrVal = new ArrayList<Element>(); 176 NodeList childrens = getRoot().getElementsByTagName(elementName); 177 for (int i = 0; i < childrens.getLength(); i++) { 178 Node nde = childrens.item(i); 179 if (nde.getNodeType() == Node.ELEMENT_NODE) { 180 rtrVal.add((Element) nde); 181 } 182 } 183 return rtrVal; 184 } 185 186 public List<Element> getChildElements(Element base, String elementName) { 187 List<Element> rtrVal = new ArrayList<Element>(); 188 NodeList childrens = base.getElementsByTagName(elementName); 189 for (int i = 0; i < childrens.getLength(); i++) { 190 Node nde = childrens.item(i); 191 if (nde.getNodeType() == Node.ELEMENT_NODE) { 192 rtrVal.add((Element) nde); 193 } 194 } 195 return rtrVal; 196 } 197 198 public String getChildElementValue(Element base, String elementName) { 199 NodeList childrens = base.getElementsByTagName(elementName); 200 for (int i = 0; i < childrens.getLength(); i++) { 201 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 202 return childrens.item(i).getFirstChild().getNodeValue(); 203 } 204 } 205 return null; 206 } 207 208 public String getElementValue(Node base) { 209 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) { 210 return base.getNextSibling().getNodeValue(); 211 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) { 212 return base.getFirstChild().getNodeValue(); 213 } 214 return null; 215 } 216 217 public String getChildElementValue(String elementName) { 218 NodeList childrens = getRoot().getElementsByTagName(elementName); 219 for (int i = 0; i < childrens.getLength(); i++) { 220 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 221 return childrens.item(i).getFirstChild().getNodeValue(); 222 } 223 } 224 return null; 225 } 226 227 public Object executeXPath(Node node, QName qname, String xPathExpression) { 228 XPathExpression expr = compiledExprs.get(xPathExpression); 229 try { 230 if (expr == null) { 231 expr = xpath.compile(xPathExpression); 232 compiledExprs.put(xPathExpression, expr); 233 } 234 return expr.evaluate(node, qname); 235 236 } catch (XPathExpressionException e) { 237 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e); 238 } 239 } 240 241 public String executeXPath(String xPathExpression) { 242 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression); 243 } 244 245 public String executeXPath(Node node, String xPathExpression) { 246 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression); 247 } 248 249 public NodeList executeXPathNodeList(String xPathExpression) { 250 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression); 251 } 252 253 public NodeList executeXPathNodeList(Node node, String xPathExpression) { 254 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression); 255 } 256 257 public Node executeXPathNode(Node node, String xPathExpression) { 258 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression); 259 } 260 261 /** 262 * Fix the error occured when parsing a string containing unicode character 263 * Example : &u20ac; will be replaced by € 264 */ 265 protected String fixUnicodeChar(String text) { 266 String unicode = "&u"; 267 StringBuilder replace = new StringBuilder(text); 268 if (text.indexOf(unicode) >= 0) { 269 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});"); 270 Matcher m = p.matcher(replace.toString()); 271 int nbFind = 0; 272 while (m.find()) { 273 // Add one index each time because we add one character each time (&u -> &#x) 274 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";"); 275 nbFind++; 276 } 277 } 278 return replace.toString(); 279 } 280 }