001 /*
002 * SonarQube, open source software quality management tool.
003 * Copyright (C) 2008-2014 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * SonarQube is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * SonarQube is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
019 */
020 package org.sonar.api.utils;
021
022 import com.ctc.wstx.stax.WstxInputFactory;
023 import org.apache.commons.io.IOUtils;
024 import org.apache.commons.lang.StringUtils;
025 import org.codehaus.staxmate.SMInputFactory;
026 import org.codehaus.staxmate.in.SMHierarchicCursor;
027
028 import java.io.*;
029 import java.net.URL;
030 import javax.xml.stream.XMLInputFactory;
031 import javax.xml.stream.XMLResolver;
032 import javax.xml.stream.XMLStreamException;
033
034 /**
035 * @since 1.10
036 */
037 public class StaxParser {
038
039 private SMInputFactory inf;
040 private XmlStreamHandler streamHandler;
041 private boolean isoControlCharsAwareParser;
042
043 /**
044 * Stax parser for a given stream handler and iso control chars set awarness to off
045 *
046 * @param streamHandler the xml stream handler
047 */
048 public StaxParser(XmlStreamHandler streamHandler) {
049 this(streamHandler, false);
050 }
051
052 /**
053 * Stax parser for a given stream handler and iso control chars set awarness to on.
054 * The iso control chars in the xml file will be replaced by simple spaces, usefull for
055 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
056 *
057 * @param streamHandler the xml stream handler
058 * @param isoControlCharsAwareParser true or false
059 */
060 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
061 this.streamHandler = streamHandler;
062 XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
063 if (xmlFactory instanceof WstxInputFactory) {
064 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
065 wstxInputfactory.configureForLowMemUsage();
066 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
067 }
068 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
069 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
070 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
071 this.isoControlCharsAwareParser = isoControlCharsAwareParser;
072 inf = new SMInputFactory(xmlFactory);
073 }
074
075 public void parse(File xmlFile) throws XMLStreamException {
076 FileInputStream input=null;
077 try {
078 input = new FileInputStream(xmlFile);
079 parse(input);
080 } catch (FileNotFoundException e) {
081 throw new XMLStreamException(e);
082 } finally {
083 IOUtils.closeQuietly(input);
084 }
085 }
086
087 public void parse(InputStream xmlInput) throws XMLStreamException {
088 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
089 parse(inf.rootElementCursor(xmlInput));
090 }
091
092 public void parse(Reader xmlReader) throws XMLStreamException {
093 if (isoControlCharsAwareParser) {
094 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
095 }
096 parse(inf.rootElementCursor(xmlReader));
097 }
098
099 public void parse(URL xmlUrl) throws XMLStreamException {
100 try {
101 parse(xmlUrl.openStream());
102 } catch (IOException e) {
103 throw new XMLStreamException(e);
104 }
105 }
106
107 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
108 try {
109 streamHandler.stream(rootCursor);
110 } finally {
111 rootCursor.getStreamReader().closeCompletely();
112 }
113 }
114
115 private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
116 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
117 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
118 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
119 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
120 if (Character.isDefined(unicodeCharHexValue)) {
121 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
122 }
123 }
124 return undeclaredEntity;
125 }
126 }
127
128 /**
129 * Simple interface for handling XML stream to parse
130 */
131 public interface XmlStreamHandler {
132 void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
133 }
134
135 private static class ISOControlCharAwareInputStream extends InputStream {
136
137 private InputStream inputToCheck;
138
139 public ISOControlCharAwareInputStream(InputStream inputToCheck) {
140 super();
141 this.inputToCheck = inputToCheck;
142 }
143
144 @Override
145 public int read() throws IOException {
146 return inputToCheck.read();
147 }
148
149 @Override
150 public int available() throws IOException {
151 return inputToCheck.available();
152 }
153
154 @Override
155 public void close() throws IOException {
156 inputToCheck.close();
157 }
158
159 @Override
160 public synchronized void mark(int readlimit) {
161 inputToCheck.mark(readlimit);
162 }
163
164 @Override
165 public boolean markSupported() {
166 return inputToCheck.markSupported();
167 }
168
169 @Override
170 public int read(byte[] b, int off, int len) throws IOException {
171 int readen = inputToCheck.read(b, off, len);
172 checkBufferForISOControlChars(b, off, len);
173 return readen;
174 }
175
176 @Override
177 public int read(byte[] b) throws IOException {
178 int readen = inputToCheck.read(b);
179 checkBufferForISOControlChars(b, 0, readen);
180 return readen;
181 }
182
183 @Override
184 public synchronized void reset() throws IOException {
185 inputToCheck.reset();
186 }
187
188 @Override
189 public long skip(long n) throws IOException {
190 return inputToCheck.skip(n);
191 }
192
193 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
194 for (int i = off; i < len; i++) {
195 char streamChar = (char) buffer[i];
196 if (Character.isISOControl(streamChar) && streamChar != '\n') {
197 // replace control chars by a simple space
198 buffer[i] = ' ';
199 }
200 }
201 }
202 }
203 }