001/* 002 * SonarQube, open source software quality management tool. 003 * Copyright (C) 2008-2014 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * SonarQube is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * SonarQube is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public License 017 * along with this program; if not, write to the Free Software Foundation, 018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 */ 020package org.sonar.api.utils; 021 022import com.ctc.wstx.stax.WstxInputFactory; 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.FileNotFoundException; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.Reader; 029import java.net.URL; 030import javax.xml.stream.XMLInputFactory; 031import javax.xml.stream.XMLResolver; 032import javax.xml.stream.XMLStreamException; 033import org.apache.commons.io.IOUtils; 034import org.apache.commons.lang.StringUtils; 035import org.codehaus.staxmate.SMInputFactory; 036import org.codehaus.staxmate.in.SMHierarchicCursor; 037 038/** 039 * @since 1.10 040 */ 041public class StaxParser { 042 043 private SMInputFactory inf; 044 private XmlStreamHandler streamHandler; 045 private boolean isoControlCharsAwareParser; 046 047 /** 048 * Stax parser for a given stream handler and iso control chars set awarness to off 049 * 050 * @param streamHandler the xml stream handler 051 */ 052 public StaxParser(XmlStreamHandler streamHandler) { 053 this(streamHandler, false); 054 } 055 056 /** 057 * Stax parser for a given stream handler and iso control chars set awarness to on. 058 * The iso control chars in the xml file will be replaced by simple spaces, usefull for 059 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary 060 * 061 * @param streamHandler the xml stream handler 062 * @param isoControlCharsAwareParser true or false 063 */ 064 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) { 065 this.streamHandler = streamHandler; 066 XMLInputFactory xmlFactory = XMLInputFactory.newInstance(); 067 if (xmlFactory instanceof WstxInputFactory) { 068 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory; 069 wstxInputfactory.configureForLowMemUsage(); 070 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver()); 071 } 072 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false); 073 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 074 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false); 075 this.isoControlCharsAwareParser = isoControlCharsAwareParser; 076 inf = new SMInputFactory(xmlFactory); 077 } 078 079 public void parse(File xmlFile) throws XMLStreamException { 080 FileInputStream input=null; 081 try { 082 input = new FileInputStream(xmlFile); 083 parse(input); 084 } catch (FileNotFoundException e) { 085 throw new XMLStreamException(e); 086 } finally { 087 IOUtils.closeQuietly(input); 088 } 089 } 090 091 public void parse(InputStream xmlInput) throws XMLStreamException { 092 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput; 093 parse(inf.rootElementCursor(xmlInput)); 094 } 095 096 public void parse(Reader xmlReader) throws XMLStreamException { 097 if (isoControlCharsAwareParser) { 098 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true"); 099 } 100 parse(inf.rootElementCursor(xmlReader)); 101 } 102 103 public void parse(URL xmlUrl) throws XMLStreamException { 104 try { 105 parse(xmlUrl.openStream()); 106 } catch (IOException e) { 107 throw new XMLStreamException(e); 108 } 109 } 110 111 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException { 112 try { 113 streamHandler.stream(rootCursor); 114 } finally { 115 rootCursor.getStreamReader().closeCompletely(); 116 } 117 } 118 119 private static class UndeclaredEntitiesXMLResolver implements XMLResolver { 120 @Override 121 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException { 122 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression 123 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) { 124 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16); 125 if (Character.isDefined(unicodeCharHexValue)) { 126 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue}); 127 } 128 } 129 return undeclaredEntity; 130 } 131 } 132 133 /** 134 * Simple interface for handling XML stream to parse 135 */ 136 public interface XmlStreamHandler { 137 void stream(SMHierarchicCursor rootCursor) throws XMLStreamException; 138 } 139 140 private static class ISOControlCharAwareInputStream extends InputStream { 141 142 private InputStream inputToCheck; 143 144 public ISOControlCharAwareInputStream(InputStream inputToCheck) { 145 super(); 146 this.inputToCheck = inputToCheck; 147 } 148 149 @Override 150 public int read() throws IOException { 151 return inputToCheck.read(); 152 } 153 154 @Override 155 public int available() throws IOException { 156 return inputToCheck.available(); 157 } 158 159 @Override 160 public void close() throws IOException { 161 inputToCheck.close(); 162 } 163 164 @Override 165 public synchronized void mark(int readlimit) { 166 inputToCheck.mark(readlimit); 167 } 168 169 @Override 170 public boolean markSupported() { 171 return inputToCheck.markSupported(); 172 } 173 174 @Override 175 public int read(byte[] b, int off, int len) throws IOException { 176 int readen = inputToCheck.read(b, off, len); 177 checkBufferForISOControlChars(b, off, len); 178 return readen; 179 } 180 181 @Override 182 public int read(byte[] b) throws IOException { 183 int readen = inputToCheck.read(b); 184 checkBufferForISOControlChars(b, 0, readen); 185 return readen; 186 } 187 188 @Override 189 public synchronized void reset() throws IOException { 190 inputToCheck.reset(); 191 } 192 193 @Override 194 public long skip(long n) throws IOException { 195 return inputToCheck.skip(n); 196 } 197 198 private static void checkBufferForISOControlChars(byte[] buffer, int off, int len) { 199 for (int i = off; i < len; i++) { 200 char streamChar = (char) buffer[i]; 201 if (Character.isISOControl(streamChar) && streamChar != '\n') { 202 // replace control chars by a simple space 203 buffer[i] = ' '; 204 } 205 } 206 } 207 } 208}