001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2009 SonarSource SA 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.api.utils; 021 022 import com.ctc.wstx.stax.WstxInputFactory; 023 import org.apache.commons.io.IOUtils; 024 import org.apache.commons.lang.StringUtils; 025 import org.codehaus.stax2.XMLInputFactory2; 026 import org.codehaus.staxmate.SMInputFactory; 027 import org.codehaus.staxmate.in.SMHierarchicCursor; 028 029 import java.io.*; 030 import java.net.URL; 031 import javax.xml.stream.XMLInputFactory; 032 import javax.xml.stream.XMLResolver; 033 import javax.xml.stream.XMLStreamException; 034 035 /** 036 * @since 1.10 037 */ 038 public class StaxParser { 039 040 private SMInputFactory inf; 041 private XmlStreamHandler streamHandler; 042 private boolean isoControlCharsAwareParser; 043 044 /** 045 * Stax parser for a given stream handler and iso control chars set awarness to off 046 * 047 * @param streamHandler the xml stream handler 048 */ 049 public StaxParser(XmlStreamHandler streamHandler) { 050 this(streamHandler, false); 051 } 052 053 /** 054 * Stax parser for a given stream handler and iso control chars set awarness to on. 055 * The iso control chars in the xml file will be replaced by simple spaces, usefull for 056 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary 057 * 058 * @param streamHandler the xml stream handler 059 * @param isoControlCharsAwareParser true or false 060 */ 061 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) { 062 this.streamHandler = streamHandler; 063 XMLInputFactory xmlFactory = XMLInputFactory2.newInstance(); 064 if (xmlFactory instanceof WstxInputFactory) { 065 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory; 066 wstxInputfactory.configureForLowMemUsage(); 067 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver()); 068 } 069 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false); 070 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 071 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false); 072 this.isoControlCharsAwareParser = isoControlCharsAwareParser; 073 inf = new SMInputFactory(xmlFactory); 074 } 075 076 public void parse(File xmlFile) throws XMLStreamException { 077 FileInputStream input=null; 078 try { 079 input = new FileInputStream(xmlFile); 080 parse(input); 081 } catch (FileNotFoundException e) { 082 throw new XMLStreamException(e); 083 } finally { 084 IOUtils.closeQuietly(input); 085 } 086 } 087 088 public void parse(InputStream xmlInput) throws XMLStreamException { 089 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput; 090 parse(inf.rootElementCursor(xmlInput)); 091 } 092 093 public void parse(Reader xmlReader) throws XMLStreamException { 094 if (isoControlCharsAwareParser) { 095 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true"); 096 } 097 parse(inf.rootElementCursor(xmlReader)); 098 } 099 100 public void parse(URL xmlUrl) throws XMLStreamException { 101 try { 102 parse(xmlUrl.openStream()); 103 } catch (IOException e) { 104 throw new XMLStreamException(e); 105 } 106 } 107 108 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException { 109 try { 110 streamHandler.stream(rootCursor); 111 } finally { 112 rootCursor.getStreamReader().closeCompletely(); 113 } 114 } 115 116 private static class UndeclaredEntitiesXMLResolver implements XMLResolver { 117 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException { 118 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression 119 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) { 120 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16); 121 if (Character.isDefined(unicodeCharHexValue)) { 122 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue}); 123 } 124 } 125 return undeclaredEntity; 126 } 127 } 128 129 /** 130 * Simple interface for handling XML stream to parse 131 */ 132 public interface XmlStreamHandler { 133 void stream(SMHierarchicCursor rootCursor) throws XMLStreamException; 134 } 135 136 private static class ISOControlCharAwareInputStream extends InputStream { 137 138 private InputStream inputToCheck; 139 140 public ISOControlCharAwareInputStream(InputStream inputToCheck) { 141 super(); 142 this.inputToCheck = inputToCheck; 143 } 144 145 @Override 146 public int read() throws IOException { 147 return inputToCheck.read(); 148 } 149 150 @Override 151 public int available() throws IOException { 152 return inputToCheck.available(); 153 } 154 155 @Override 156 public void close() throws IOException { 157 inputToCheck.close(); 158 } 159 160 @Override 161 public synchronized void mark(int readlimit) { 162 inputToCheck.mark(readlimit); 163 } 164 165 @Override 166 public boolean markSupported() { 167 return inputToCheck.markSupported(); 168 } 169 170 @Override 171 public int read(byte[] b, int off, int len) throws IOException { 172 int readen = inputToCheck.read(b, off, len); 173 checkBufferForISOControlChars(b, off, len); 174 return readen; 175 } 176 177 @Override 178 public int read(byte[] b) throws IOException { 179 int readen = inputToCheck.read(b); 180 checkBufferForISOControlChars(b, 0, readen); 181 return readen; 182 } 183 184 @Override 185 public synchronized void reset() throws IOException { 186 inputToCheck.reset(); 187 } 188 189 @Override 190 public long skip(long n) throws IOException { 191 return inputToCheck.skip(n); 192 } 193 194 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) { 195 for (int i = off; i < len; i++) { 196 char streamChar = (char) buffer[i]; 197 if (Character.isISOControl(streamChar) && streamChar != '\n') { 198 // replace control chars by a simple space 199 buffer[i] = ' '; 200 } 201 } 202 } 203 } 204 }