001/* 002 * SonarQube 003 * Copyright (C) 2009-2016 SonarSource SA 004 * mailto:contact AT sonarsource DOT com 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public License 017 * along with this program; if not, write to the Free Software Foundation, 018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 */ 020package org.sonar.api.utils; 021 022import java.io.BufferedReader; 023import java.io.ByteArrayInputStream; 024import java.io.File; 025import java.io.FileInputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.InputStreamReader; 029import java.nio.charset.StandardCharsets; 030import java.util.ArrayList; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036import javax.annotation.Nullable; 037import javax.xml.namespace.QName; 038import javax.xml.parsers.DocumentBuilder; 039import javax.xml.parsers.DocumentBuilderFactory; 040import javax.xml.parsers.ParserConfigurationException; 041import javax.xml.xpath.XPath; 042import javax.xml.xpath.XPathConstants; 043import javax.xml.xpath.XPathExpression; 044import javax.xml.xpath.XPathExpressionException; 045import javax.xml.xpath.XPathFactory; 046import org.apache.commons.io.IOUtils; 047import org.sonar.api.utils.log.Logger; 048import org.sonar.api.utils.log.Loggers; 049import org.w3c.dom.Document; 050import org.w3c.dom.Element; 051import org.w3c.dom.Node; 052import org.w3c.dom.NodeList; 053import org.xml.sax.SAXException; 054 055/** 056 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files. 057 * 058 * @since 1.10 059 */ 060public class XpathParser { 061 062 private static final String CAN_NOT_PARSE_XML = "can not parse xml : "; 063 private Element root = null; 064 private Document doc = null; 065 private DocumentBuilder builder; 066 private XPath xpath; 067 private Map<String, XPathExpression> compiledExprs = new HashMap<>(); 068 069 public XpathParser() { 070 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance(); 071 try { 072 bf.setFeature("http://apache.org/xml/features/validation/schema", false); 073 bf.setFeature("http://xml.org/sax/features/external-general-entities", false); 074 bf.setFeature("http://xml.org/sax/features/validation", false); 075 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 076 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 077 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true); 078 } catch (ParserConfigurationException e) { 079 Logger log = Loggers.get(this.getClass().getName()); 080 log.error("Error occured during features set up.", e); 081 } 082 try { 083 bf.setNamespaceAware(false); 084 bf.setValidating(false); 085 builder = bf.newDocumentBuilder(); 086 } catch (ParserConfigurationException e) { 087 throw new XmlParserException("can not create a XML parser", e); 088 } 089 } 090 091 public void parse(@Nullable File file) { 092 if (file == null || !file.exists()) { 093 throw new XmlParserException("File not found : " + file); 094 } 095 096 BufferedReader buffer = null; 097 try { 098 buffer = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); 099 parse(buffer); 100 101 } catch (IOException e) { 102 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e); 103 104 } finally { 105 IOUtils.closeQuietly(buffer); 106 } 107 } 108 109 public void parse(InputStream stream) { 110 BufferedReader buffer = null; 111 try { 112 buffer = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); 113 parse(buffer); 114 115 } catch (IOException e) { 116 throw new XmlParserException("can not parse the stream", e); 117 118 } finally { 119 IOUtils.closeQuietly(buffer); 120 } 121 } 122 123 private void parse(BufferedReader buffer) throws IOException { 124 parse(IOUtils.toString(buffer)); 125 } 126 127 public void parse(String xml) { 128 try { 129 String fixedXml = fixUnicodeChar(xml); 130 doc = builder.parse(new ByteArrayInputStream(fixedXml.getBytes(StandardCharsets.UTF_8))); 131 XPathFactory factory = XPathFactory.newInstance(); 132 xpath = factory.newXPath(); 133 134 } catch (IOException | SAXException e) { 135 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e); 136 } 137 } 138 139 public Element getRoot() { 140 if (root == null && doc != null) { 141 root = doc.getDocumentElement(); 142 } 143 return root; 144 } 145 146 public Document getDocument() { 147 return doc; 148 } 149 150 public Element getChildElement(Element base, String elementName) { 151 NodeList childrens = base.getElementsByTagName(elementName); 152 for (int i = 0; i < childrens.getLength(); i++) { 153 Node nde = childrens.item(i); 154 if (nde.getNodeType() == Node.ELEMENT_NODE) { 155 return (Element) nde; 156 } 157 } 158 return null; 159 } 160 161 public Element getChildElement(String elementName) { 162 NodeList childrens = getRoot().getElementsByTagName(elementName); 163 for (int i = 0; i < childrens.getLength(); i++) { 164 Node nde = childrens.item(i); 165 if (nde.getNodeType() == Node.ELEMENT_NODE) { 166 return (Element) nde; 167 } 168 } 169 return null; 170 } 171 172 public List<Element> getChildElements(String elementName) { 173 List<Element> rtrVal = new ArrayList<>(); 174 NodeList childrens = getRoot().getElementsByTagName(elementName); 175 for (int i = 0; i < childrens.getLength(); i++) { 176 Node nde = childrens.item(i); 177 if (nde.getNodeType() == Node.ELEMENT_NODE) { 178 rtrVal.add((Element) nde); 179 } 180 } 181 return rtrVal; 182 } 183 184 public List<Element> getChildElements(Element base, String elementName) { 185 List<Element> rtrVal = new ArrayList<>(); 186 NodeList childrens = base.getElementsByTagName(elementName); 187 for (int i = 0; i < childrens.getLength(); i++) { 188 Node nde = childrens.item(i); 189 if (nde.getNodeType() == Node.ELEMENT_NODE) { 190 rtrVal.add((Element) nde); 191 } 192 } 193 return rtrVal; 194 } 195 196 public String getChildElementValue(Element base, String elementName) { 197 NodeList childrens = base.getElementsByTagName(elementName); 198 for (int i = 0; i < childrens.getLength(); i++) { 199 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 200 return childrens.item(i).getFirstChild().getNodeValue(); 201 } 202 } 203 return null; 204 } 205 206 public String getElementValue(Node base) { 207 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) { 208 return base.getNextSibling().getNodeValue(); 209 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) { 210 return base.getFirstChild().getNodeValue(); 211 } 212 return null; 213 } 214 215 public String getChildElementValue(String elementName) { 216 NodeList childrens = getRoot().getElementsByTagName(elementName); 217 for (int i = 0; i < childrens.getLength(); i++) { 218 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 219 return childrens.item(i).getFirstChild().getNodeValue(); 220 } 221 } 222 return null; 223 } 224 225 public Object executeXPath(Node node, QName qname, String xPathExpression) { 226 XPathExpression expr = compiledExprs.get(xPathExpression); 227 try { 228 if (expr == null) { 229 expr = xpath.compile(xPathExpression); 230 compiledExprs.put(xPathExpression, expr); 231 } 232 return expr.evaluate(node, qname); 233 234 } catch (XPathExpressionException e) { 235 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e); 236 } 237 } 238 239 public String executeXPath(String xPathExpression) { 240 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression); 241 } 242 243 public String executeXPath(Node node, String xPathExpression) { 244 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression); 245 } 246 247 public NodeList executeXPathNodeList(String xPathExpression) { 248 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression); 249 } 250 251 public NodeList executeXPathNodeList(Node node, String xPathExpression) { 252 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression); 253 } 254 255 public Node executeXPathNode(Node node, String xPathExpression) { 256 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression); 257 } 258 259 /** 260 * Fix the error occured when parsing a string containing unicode character 261 * Example : {@code &u20ac;} will be replaced by {@code €} 262 */ 263 protected String fixUnicodeChar(String text) { 264 String unicode = "&u"; 265 StringBuilder replace = new StringBuilder(text); 266 if (text.indexOf(unicode) >= 0) { 267 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});"); 268 Matcher m = p.matcher(replace.toString()); 269 int nbFind = 0; 270 while (m.find()) { 271 // Add one index each time because we add one character each time (&u -> &#x) 272 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";"); 273 nbFind++; 274 } 275 } 276 return replace.toString(); 277 } 278}