001/* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020package org.sonar.api.utils; 021 022import org.apache.commons.io.IOUtils; 023import org.slf4j.Logger; 024import org.slf4j.LoggerFactory; 025import org.w3c.dom.Document; 026import org.w3c.dom.Element; 027import org.w3c.dom.Node; 028import org.w3c.dom.NodeList; 029import org.xml.sax.SAXException; 030 031import java.io.*; 032import java.util.ArrayList; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036import java.util.regex.Matcher; 037import java.util.regex.Pattern; 038import javax.xml.namespace.QName; 039import javax.xml.parsers.DocumentBuilder; 040import javax.xml.parsers.DocumentBuilderFactory; 041import javax.xml.parsers.ParserConfigurationException; 042import javax.xml.xpath.*; 043 044/** 045 * XML Parsing tool using XPATH. It's recommended to use StaxParser when parsing big XML files. 046 * 047 * @since 1.10 048 */ 049public class XpathParser { 050 051 private Element root = null; 052 private Document doc = null; 053 private DocumentBuilder builder; 054 private XPath xpath; 055 private Map<String, XPathExpression> compiledExprs = new HashMap<String, XPathExpression>(); 056 057 public XpathParser() { 058 DocumentBuilderFactory bf = DocumentBuilderFactory.newInstance(); 059 try { 060 bf.setFeature("http://apache.org/xml/features/validation/schema", false); 061 bf.setFeature("http://xml.org/sax/features/external-general-entities", false); 062 bf.setFeature("http://xml.org/sax/features/validation", false); 063 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 064 bf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 065 bf.setFeature("http://apache.org/xml/features/allow-java-encodings", true); 066 } catch (ParserConfigurationException e) { 067 Logger log = LoggerFactory.getLogger(this.getClass().getName()); 068 log.error("Error occured during features set up.", e); 069 } 070 try { 071 bf.setNamespaceAware(false); 072 bf.setValidating(false); 073 builder = bf.newDocumentBuilder(); 074 } catch (ParserConfigurationException e) { 075 throw new XmlParserException("can not create a XML parser", e); 076 } 077 } 078 079 public void parse(File file) { 080 if (file == null || !file.exists()) { 081 throw new XmlParserException("File not found : " + file); 082 } 083 084 BufferedReader buffer = null; 085 try { 086 buffer = new BufferedReader(new FileReader(file)); 087 parse(buffer); 088 089 } catch (IOException e) { 090 throw new XmlParserException("can not parse the file " + file.getAbsolutePath(), e); 091 092 } finally { 093 IOUtils.closeQuietly(buffer); 094 } 095 } 096 097 public void parse(InputStream stream) { 098 BufferedReader buffer = null; 099 try { 100 buffer = new BufferedReader(new InputStreamReader(stream)); 101 parse(buffer); 102 103 } catch (IOException e) { 104 throw new XmlParserException("can not parse the stream", e); 105 106 } finally { 107 IOUtils.closeQuietly(buffer); 108 } 109 } 110 111 private void parse(BufferedReader buffer) throws IOException { 112 parse(IOUtils.toString(buffer)); 113 } 114 115 public void parse(String xml) { 116 try { 117 xml = fixUnicodeChar(xml); 118 doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); 119 XPathFactory factory = XPathFactory.newInstance(); 120 xpath = factory.newXPath(); 121 122 } catch (SAXException e) { 123 throw new XmlParserException("can not parse xml : " + xml, e); 124 } catch (IOException e) { 125 throw new XmlParserException("can not parse xml : " + xml, e); 126 } 127 } 128 129 public Element getRoot() { 130 if (root == null && doc != null) { 131 root = doc.getDocumentElement(); 132 } 133 return root; 134 } 135 136 public Document getDocument() { 137 return doc; 138 } 139 140 public Element getChildElement(Element base, String elementName) { 141 NodeList childrens = base.getElementsByTagName(elementName); 142 for (int i = 0; i < childrens.getLength(); i++) { 143 Node nde = childrens.item(i); 144 if (nde.getNodeType() == Node.ELEMENT_NODE) { 145 return (Element) nde; 146 } 147 } 148 return null; 149 } 150 151 public Element getChildElement(String elementName) { 152 NodeList childrens = getRoot().getElementsByTagName(elementName); 153 for (int i = 0; i < childrens.getLength(); i++) { 154 Node nde = childrens.item(i); 155 if (nde.getNodeType() == Node.ELEMENT_NODE) { 156 return (Element) nde; 157 } 158 } 159 return null; 160 } 161 162 public List<Element> getChildElements(String elementName) { 163 List<Element> rtrVal = new ArrayList<Element>(); 164 NodeList childrens = getRoot().getElementsByTagName(elementName); 165 for (int i = 0; i < childrens.getLength(); i++) { 166 Node nde = childrens.item(i); 167 if (nde.getNodeType() == Node.ELEMENT_NODE) { 168 rtrVal.add((Element) nde); 169 } 170 } 171 return rtrVal; 172 } 173 174 public List<Element> getChildElements(Element base, String elementName) { 175 List<Element> rtrVal = new ArrayList<Element>(); 176 NodeList childrens = base.getElementsByTagName(elementName); 177 for (int i = 0; i < childrens.getLength(); i++) { 178 Node nde = childrens.item(i); 179 if (nde.getNodeType() == Node.ELEMENT_NODE) { 180 rtrVal.add((Element) nde); 181 } 182 } 183 return rtrVal; 184 } 185 186 public String getChildElementValue(Element base, String elementName) { 187 NodeList childrens = base.getElementsByTagName(elementName); 188 for (int i = 0; i < childrens.getLength(); i++) { 189 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 190 return childrens.item(i).getFirstChild().getNodeValue(); 191 } 192 } 193 return null; 194 } 195 196 public String getElementValue(Node base) { 197 if (base.getNextSibling() != null && base.getNextSibling().getNodeType() == Node.TEXT_NODE) { 198 return base.getNextSibling().getNodeValue(); 199 } else if (base.getFirstChild() != null && base.getFirstChild().getNodeType() == Node.TEXT_NODE) { 200 return base.getFirstChild().getNodeValue(); 201 } 202 return null; 203 } 204 205 public String getChildElementValue(String elementName) { 206 NodeList childrens = getRoot().getElementsByTagName(elementName); 207 for (int i = 0; i < childrens.getLength(); i++) { 208 if (childrens.item(i).getNodeType() == Node.ELEMENT_NODE) { 209 return childrens.item(i).getFirstChild().getNodeValue(); 210 } 211 } 212 return null; 213 } 214 215 public Object executeXPath(Node node, QName qname, String xPathExpression) { 216 XPathExpression expr = compiledExprs.get(xPathExpression); 217 try { 218 if (expr == null) { 219 expr = xpath.compile(xPathExpression); 220 compiledExprs.put(xPathExpression, expr); 221 } 222 return expr.evaluate(node, qname); 223 224 } catch (XPathExpressionException e) { 225 throw new XmlParserException("Unable to evaluate xpath expression :" + xPathExpression, e); 226 } 227 } 228 229 public String executeXPath(String xPathExpression) { 230 return (String) executeXPath(doc, XPathConstants.STRING, xPathExpression); 231 } 232 233 public String executeXPath(Node node, String xPathExpression) { 234 return (String) executeXPath(node, XPathConstants.STRING, xPathExpression); 235 } 236 237 public NodeList executeXPathNodeList(String xPathExpression) { 238 return (NodeList) executeXPath(doc, XPathConstants.NODESET, xPathExpression); 239 } 240 241 public NodeList executeXPathNodeList(Node node, String xPathExpression) { 242 return (NodeList) executeXPath(node, XPathConstants.NODESET, xPathExpression); 243 } 244 245 public Node executeXPathNode(Node node, String xPathExpression) { 246 return (Node) executeXPath(node, XPathConstants.NODE, xPathExpression); 247 } 248 249 /** 250 * Fix the error occured when parsing a string containing unicode character 251 * Example : &u20ac; will be replaced by € 252 */ 253 protected String fixUnicodeChar(String text) { 254 String unicode = "&u"; 255 StringBuilder replace = new StringBuilder(text); 256 if (text.indexOf(unicode) >= 0) { 257 Pattern p = Pattern.compile("&u([0-9a-fA-F]{1,4});"); 258 Matcher m = p.matcher(replace.toString()); 259 int nbFind = 0; 260 while (m.find()) { 261 // Add one index each time because we add one character each time (&u -> &#x) 262 replace.replace(m.start() + nbFind, m.end() + nbFind, "&#x" + m.group(1) + ";"); 263 nbFind++; 264 } 265 } 266 return replace.toString(); 267 } 268}