001/* 002 * SonarQube, open source software quality management tool. 003 * Copyright (C) 2008-2013 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * SonarQube is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * SonarQube is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public License 017 * along with this program; if not, write to the Free Software Foundation, 018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 */ 020package org.sonar.api.utils; 021 022import com.ctc.wstx.stax.WstxInputFactory; 023import org.apache.commons.io.IOUtils; 024import org.apache.commons.lang.StringUtils; 025import org.codehaus.staxmate.SMInputFactory; 026import org.codehaus.staxmate.in.SMHierarchicCursor; 027 028import java.io.*; 029import java.net.URL; 030import javax.xml.stream.XMLInputFactory; 031import javax.xml.stream.XMLResolver; 032import javax.xml.stream.XMLStreamException; 033 034/** 035 * @since 1.10 036 */ 037public class StaxParser { 038 039 private SMInputFactory inf; 040 private XmlStreamHandler streamHandler; 041 private boolean isoControlCharsAwareParser; 042 043 /** 044 * Stax parser for a given stream handler and iso control chars set awarness to off 045 * 046 * @param streamHandler the xml stream handler 047 */ 048 public StaxParser(XmlStreamHandler streamHandler) { 049 this(streamHandler, false); 050 } 051 052 /** 053 * Stax parser for a given stream handler and iso control chars set awarness to on. 054 * The iso control chars in the xml file will be replaced by simple spaces, usefull for 055 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary 056 * 057 * @param streamHandler the xml stream handler 058 * @param isoControlCharsAwareParser true or false 059 */ 060 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) { 061 this.streamHandler = streamHandler; 062 XMLInputFactory xmlFactory = XMLInputFactory.newInstance(); 063 if (xmlFactory instanceof WstxInputFactory) { 064 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory; 065 wstxInputfactory.configureForLowMemUsage(); 066 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver()); 067 } 068 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false); 069 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 070 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false); 071 this.isoControlCharsAwareParser = isoControlCharsAwareParser; 072 inf = new SMInputFactory(xmlFactory); 073 } 074 075 public void parse(File xmlFile) throws XMLStreamException { 076 FileInputStream input=null; 077 try { 078 input = new FileInputStream(xmlFile); 079 parse(input); 080 } catch (FileNotFoundException e) { 081 throw new XMLStreamException(e); 082 } finally { 083 IOUtils.closeQuietly(input); 084 } 085 } 086 087 public void parse(InputStream xmlInput) throws XMLStreamException { 088 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput; 089 parse(inf.rootElementCursor(xmlInput)); 090 } 091 092 public void parse(Reader xmlReader) throws XMLStreamException { 093 if (isoControlCharsAwareParser) { 094 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true"); 095 } 096 parse(inf.rootElementCursor(xmlReader)); 097 } 098 099 public void parse(URL xmlUrl) throws XMLStreamException { 100 try { 101 parse(xmlUrl.openStream()); 102 } catch (IOException e) { 103 throw new XMLStreamException(e); 104 } 105 } 106 107 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException { 108 try { 109 streamHandler.stream(rootCursor); 110 } finally { 111 rootCursor.getStreamReader().closeCompletely(); 112 } 113 } 114 115 private static class UndeclaredEntitiesXMLResolver implements XMLResolver { 116 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException { 117 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression 118 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) { 119 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16); 120 if (Character.isDefined(unicodeCharHexValue)) { 121 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue}); 122 } 123 } 124 return undeclaredEntity; 125 } 126 } 127 128 /** 129 * Simple interface for handling XML stream to parse 130 */ 131 public interface XmlStreamHandler { 132 void stream(SMHierarchicCursor rootCursor) throws XMLStreamException; 133 } 134 135 private static class ISOControlCharAwareInputStream extends InputStream { 136 137 private InputStream inputToCheck; 138 139 public ISOControlCharAwareInputStream(InputStream inputToCheck) { 140 super(); 141 this.inputToCheck = inputToCheck; 142 } 143 144 @Override 145 public int read() throws IOException { 146 return inputToCheck.read(); 147 } 148 149 @Override 150 public int available() throws IOException { 151 return inputToCheck.available(); 152 } 153 154 @Override 155 public void close() throws IOException { 156 inputToCheck.close(); 157 } 158 159 @Override 160 public synchronized void mark(int readlimit) { 161 inputToCheck.mark(readlimit); 162 } 163 164 @Override 165 public boolean markSupported() { 166 return inputToCheck.markSupported(); 167 } 168 169 @Override 170 public int read(byte[] b, int off, int len) throws IOException { 171 int readen = inputToCheck.read(b, off, len); 172 checkBufferForISOControlChars(b, off, len); 173 return readen; 174 } 175 176 @Override 177 public int read(byte[] b) throws IOException { 178 int readen = inputToCheck.read(b); 179 checkBufferForISOControlChars(b, 0, readen); 180 return readen; 181 } 182 183 @Override 184 public synchronized void reset() throws IOException { 185 inputToCheck.reset(); 186 } 187 188 @Override 189 public long skip(long n) throws IOException { 190 return inputToCheck.skip(n); 191 } 192 193 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) { 194 for (int i = off; i < len; i++) { 195 char streamChar = (char) buffer[i]; 196 if (Character.isISOControl(streamChar) && streamChar != '\n') { 197 // replace control chars by a simple space 198 buffer[i] = ' '; 199 } 200 } 201 } 202 } 203}