001/* 002 * SonarQube 003 * Copyright (C) 2009-2017 SonarSource SA 004 * mailto:info AT sonarsource DOT com 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public License 017 * along with this program; if not, write to the Free Software Foundation, 018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 */ 020package org.sonar.api.utils; 021 022import java.io.BufferedReader; 023import java.io.ByteArrayInputStream; 024import java.io.File; 025import java.io.FileInputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.InputStreamReader; 029import java.nio.charset.StandardCharsets; 030import java.util.ArrayList; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036import javax.annotation.Nullable; 037import javax.xml.namespace.QName; 038import javax.xml.parsers.DocumentBuilder; 039import javax.xml.parsers.DocumentBuilderFactory; 040import javax.xml.parsers.ParserConfigurationException; 041import javax.xml.xpath.XPath; 042import javax.xml.xpath.XPathConstants; 043import javax.xml.xpath.XPathExpression; 044import javax.xml.xpath.XPathExpressionException; 045import javax.xml.xpath.XPathFactory; 046import org.apache.commons.io.IOUtils; 047import org.sonar.api.utils.log.Logger; 048import org.sonar.api.utils.log.Loggers; 049import org.w3c.dom.Document; 050import org.w3c.dom.Element; 051import org.w3c.dom.Node; 052import org.w3c.dom.NodeList; 053import org.xml.sax.SAXException; 054 055/** 056 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files. 057 * 058 * @since 1.10 059 * @deprecated since 5.6 plugins should use their own dependencies 060 */ 061@Deprecated 062public class XpathParser { 063 064 private static final String CAN_NOT_PARSE_XML = "can not parse xml : "; 065 private Element root = null; 066 private Document doc = null; 067 private DocumentBuilder builder; 068 private XPath xpath; 069 private Map<String, XPathExpression> compiledExprs = new HashMap<>(); 070 071 public XpathParser() { 072 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance(); 073 try { 074 bf.setFeature("http://apache.org/xml/features/validation/schema", false); 075 bf.setFeature("http://xml.org/sax/features/external-general-entities", false); 076 bf.setFeature("http://xml.org/sax/features/validation", false); 077 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 078 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 079 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true); 080 } catch (ParserConfigurationException e) { 081 Logger log = Loggers.get(this.getClass().getName()); 082 log.error("Error occured during features set up.", e); 083 } 084 try { 085 bf.setNamespaceAware(false); 086 bf.setValidating(false); 087 builder = bf.newDocumentBuilder(); 088 } catch (ParserConfigurationException e) { 089 throw new XmlParserException("can not create a XML parser", e); 090 } 091 } 092 093 public void parse(@Nullable File file) { 094 if (file == null || !file.exists()) { 095 throw new XmlParserException("File not found : " + file); 096 } 097 098 BufferedReader buffer = null; 099 try { 100 buffer = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); 101 parse(buffer); 102 103 } catch (IOException e) { 104 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e); 105 106 } finally { 107 IOUtils.closeQuietly(buffer); 108 } 109 } 110 111 public void parse(InputStream stream) { 112 BufferedReader buffer = null; 113 try { 114 buffer = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); 115 parse(buffer); 116 117 } catch (IOException e) { 118 throw new XmlParserException("can not parse the stream", e); 119 120 } finally { 121 IOUtils.closeQuietly(buffer); 122 } 123 } 124 125 private void parse(BufferedReader buffer) throws IOException { 126 parse(IOUtils.toString(buffer)); 127 } 128 129 public void parse(String xml) { 130 try { 131 String fixedXml = fixUnicodeChar(xml); 132 doc = builder.parse(new ByteArrayInputStream(fixedXml.getBytes(StandardCharsets.UTF_8))); 133 XPathFactory factory = XPathFactory.newInstance(); 134 xpath = factory.newXPath(); 135 136 } catch (IOException | SAXException e) { 137 throw new XmlParserException(CAN_NOT_PARSE_XML + xml, e); 138 } 139 } 140 141 public Element getRoot() { 142 if (root == null && doc != null) { 143 root = doc.getDocumentElement(); 144 } 145 return root; 146 } 147 148 public Document getDocument() { 149 return doc; 150 } 151 152 public Element getChildElement(Element base, String elementName) { 153 NodeList childrens = base.getElementsByTagName(elementName); 154 for (int i = 0; i < childrens.getLength(); i++) { 155 Node nde = childrens.item(i); 156 if (nde.getNodeType() == Node.ELEMENT_NODE) { 157 return (Element) nde; 158 } 159 } 160 return null; 161 } 162 163 public Element getChildElement(String elementName) { 164 NodeList childrens = getRoot().getElementsByTagName(elementName); 165 for (int i = 0; i < childrens.getLength(); i++) { 166 Node nde = childrens.item(i); 167 if (nde.getNodeType() == Node.ELEMENT_NODE) { 168 return (Element) nde; 169 } 170 } 171 return null; 172 } 173 174 public List<Element> getChildElements(String elementName) { 175 List<Element> rtrVal = new ArrayList<>(); 176 NodeList childrens = getRoot().getElementsByTagName(elementName); 177 for (int i = 0; i < childrens.getLength(); i++) { 178 Node nde = childrens.item(i); 179 if (nde.getNodeType() == Node.ELEMENT_NODE) { 180 rtrVal.add((Element) nde); 181 } 182 } 183 return rtrVal; 184 } 185 186 public List<Element> getChildElements(Element base, String elementName) { 187 List<Element> rtrVal = new ArrayList<>(); 188 NodeList childrens = base.getElementsByTagName(elementName); 189 for (int i = 0; i < childrens.getLength(); i++) { 190 Node nde = childrens.item(i); 191 if (nde.getNodeType() == Node.ELEMENT_NODE) { 192 rtrVal.add((Element) nde); 193 } 194 } 195 return rtrVal; 196 } 197 198 public String getChildElementValue(Element base, String elementName) { 199 NodeList childrens = base.getElementsByTagName(elementName); 200 for (int i = 0; i < childrens.getLength(); i++) { 201 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 202 return childrens.item(i).getFirstChild().getNodeValue(); 203 } 204 } 205 return null; 206 } 207 208 public String getElementValue(Node base) { 209 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) { 210 return base.getNextSibling().getNodeValue(); 211 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) { 212 return base.getFirstChild().getNodeValue(); 213 } 214 return null; 215 } 216 217 public String getChildElementValue(String elementName) { 218 NodeList childrens = getRoot().getElementsByTagName(elementName); 219 for (int i = 0; i < childrens.getLength(); i++) { 220 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 221 return childrens.item(i).getFirstChild().getNodeValue(); 222 } 223 } 224 return null; 225 } 226 227 public Object executeXPath(Node node, QName qname, String xPathExpression) { 228 XPathExpression expr = compiledExprs.get(xPathExpression); 229 try { 230 if (expr == null) { 231 expr = xpath.compile(xPathExpression); 232 compiledExprs.put(xPathExpression, expr); 233 } 234 return expr.evaluate(node, qname); 235 236 } catch (XPathExpressionException e) { 237 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e); 238 } 239 } 240 241 public String executeXPath(String xPathExpression) { 242 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression); 243 } 244 245 public String executeXPath(Node node, String xPathExpression) { 246 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression); 247 } 248 249 public NodeList executeXPathNodeList(String xPathExpression) { 250 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression); 251 } 252 253 public NodeList executeXPathNodeList(Node node, String xPathExpression) { 254 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression); 255 } 256 257 public Node executeXPathNode(Node node, String xPathExpression) { 258 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression); 259 } 260 261 /** 262 * Fix the error occured when parsing a string containing unicode character 263 * Example : {@code &u20ac;} will be replaced by {@code €} 264 */ 265 protected String fixUnicodeChar(String text) { 266 String unicode = "&u"; 267 StringBuilder replace = new StringBuilder(text); 268 if (text.indexOf(unicode) >= 0) { 269 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});"); 270 Matcher m = p.matcher(replace.toString()); 271 int nbFind = 0; 272 while (m.find()) { 273 // Add one index each time because we add one character each time (&u -> &#x) 274 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";"); 275 nbFind++; 276 } 277 } 278 return replace.toString(); 279 } 280}