001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2009 SonarSource SA 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.api.utils; 021 022 import com.ctc.wstx.stax.WstxInputFactory; 023 import org.apache.commons.lang.StringUtils; 024 import org.codehaus.stax2.XMLInputFactory2; 025 import org.codehaus.staxmate.SMInputFactory; 026 import org.codehaus.staxmate.in.SMHierarchicCursor; 027 028 import java.io.*; 029 import java.net.URL; 030 import javax.xml.stream.XMLInputFactory; 031 import javax.xml.stream.XMLResolver; 032 import javax.xml.stream.XMLStreamException; 033 034 /** 035 * @since 1.10 036 */ 037 public class StaxParser { 038 039 private SMInputFactory inf; 040 private XmlStreamHandler streamHandler; 041 private boolean isoControlCharsAwareParser; 042 043 /** 044 * Stax parser for a given stream handler and iso control chars set awarness to off 045 * 046 * @param streamHandler the xml stream handler 047 */ 048 public StaxParser(XmlStreamHandler streamHandler) { 049 this(streamHandler, false); 050 } 051 052 /** 053 * Stax parser for a given stream handler and iso control chars set awarness to on. 054 * The iso control chars in the xml file will be replaced by simple spaces, usefull for 055 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary 056 * 057 * @param streamHandler the xml stream handler 058 * @param isoControlCharsAwareParser true or false 059 */ 060 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) { 061 this.streamHandler = streamHandler; 062 XMLInputFactory xmlFactory = XMLInputFactory2.newInstance(); 063 if (xmlFactory instanceof WstxInputFactory) { 064 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory; 065 wstxInputfactory.configureForLowMemUsage(); 066 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver()); 067 } 068 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false); 069 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 070 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false); 071 this.isoControlCharsAwareParser = true; 072 inf = new SMInputFactory(xmlFactory); 073 } 074 075 public void parse(File xmlFile) throws XMLStreamException { 076 try { 077 parse(new FileInputStream(xmlFile)); 078 } catch (FileNotFoundException e) { 079 throw new XMLStreamException(e); 080 } 081 } 082 083 public void parse(InputStream xmlInput) throws XMLStreamException { 084 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput; 085 parse(inf.rootElementCursor(xmlInput)); 086 } 087 088 public void parse(Reader xmlReader) throws XMLStreamException { 089 if (isoControlCharsAwareParser) { 090 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true"); 091 } 092 parse(inf.rootElementCursor(xmlReader)); 093 } 094 095 public void parse(URL xmlUrl) throws XMLStreamException { 096 try { 097 parse(xmlUrl.openStream()); 098 } catch (IOException e) { 099 throw new XMLStreamException(e); 100 } 101 } 102 103 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException { 104 try { 105 streamHandler.stream(rootCursor); 106 } finally { 107 rootCursor.getStreamReader().closeCompletely(); 108 } 109 } 110 111 private static class UndeclaredEntitiesXMLResolver implements XMLResolver { 112 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException { 113 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression 114 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) { 115 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16); 116 if (Character.isDefined(unicodeCharHexValue)) { 117 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue}); 118 } 119 } 120 return undeclaredEntity; 121 } 122 } 123 124 /** 125 * Simple interface for handling XML stream to parse 126 */ 127 public interface XmlStreamHandler { 128 129 public void stream(SMHierarchicCursor rootCursor) throws XMLStreamException; 130 } 131 132 private static class ISOControlCharAwareInputStream extends InputStream { 133 134 private InputStream inputToCheck; 135 136 public ISOControlCharAwareInputStream(InputStream inputToCheck) { 137 super(); 138 this.inputToCheck = inputToCheck; 139 } 140 141 @Override 142 public int read() throws IOException { 143 return inputToCheck.read(); 144 } 145 146 @Override 147 public int available() throws IOException { 148 return inputToCheck.available(); 149 } 150 151 @Override 152 public void close() throws IOException { 153 inputToCheck.close(); 154 } 155 156 @Override 157 public synchronized void mark(int readlimit) { 158 inputToCheck.mark(readlimit); 159 } 160 161 @Override 162 public boolean markSupported() { 163 return inputToCheck.markSupported(); 164 } 165 166 @Override 167 public int read(byte[] b, int off, int len) throws IOException { 168 int readen = inputToCheck.read(b, off, len); 169 checkBufferForISOControlChars(b, off, len); 170 return readen; 171 } 172 173 @Override 174 public int read(byte[] b) throws IOException { 175 int readen = inputToCheck.read(b); 176 checkBufferForISOControlChars(b, 0, readen); 177 return readen; 178 } 179 180 @Override 181 public synchronized void reset() throws IOException { 182 inputToCheck.reset(); 183 } 184 185 @Override 186 public long skip(long n) throws IOException { 187 return inputToCheck.skip(n); 188 } 189 190 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) { 191 for (int i = off; i < len; i++) { 192 if (Character.isISOControl((char) buffer[i])) { 193 // replace control chars by a simple space 194 buffer[i] = ' '; 195 } 196 } 197 } 198 } 199 }