001    /*
002     * SonarQube, open source software quality management tool.
003     * Copyright (C) 2008-2013 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * SonarQube is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * SonarQube is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public License
017     * along with this program; if not, write to the Free Software Foundation,
018     * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019     */
020    package org.sonar.api.utils;
021    
022    import com.ctc.wstx.stax.WstxInputFactory;
023    import org.apache.commons.io.IOUtils;
024    import org.apache.commons.lang.StringUtils;
025    import org.codehaus.staxmate.SMInputFactory;
026    import org.codehaus.staxmate.in.SMHierarchicCursor;
027    
028    import java.io.*;
029    import java.net.URL;
030    import javax.xml.stream.XMLInputFactory;
031    import javax.xml.stream.XMLResolver;
032    import javax.xml.stream.XMLStreamException;
033    
034    /**
035     * @since 1.10
036     */
037    public class StaxParser {
038    
039      private SMInputFactory inf;
040      private XmlStreamHandler streamHandler;
041      private boolean isoControlCharsAwareParser;
042    
043      /**
044       * Stax parser for a given stream handler and iso control chars set awarness to off
045       *
046       * @param streamHandler the xml stream handler
047       */
048      public StaxParser(XmlStreamHandler streamHandler) {
049        this(streamHandler, false);
050      }
051    
052      /**
053       * Stax parser for a given stream handler and iso control chars set awarness to on.
054       * The iso control chars in the xml file will be replaced by simple spaces, usefull for
055       * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
056       *
057       * @param streamHandler              the xml stream handler
058       * @param isoControlCharsAwareParser true or false
059       */
060      public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
061        this.streamHandler = streamHandler;
062        XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
063        if (xmlFactory instanceof WstxInputFactory) {
064          WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
065          wstxInputfactory.configureForLowMemUsage();
066          wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
067        }
068        xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
069        xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
070        xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
071        this.isoControlCharsAwareParser = isoControlCharsAwareParser;
072        inf = new SMInputFactory(xmlFactory);
073      }
074    
075      public void parse(File xmlFile) throws XMLStreamException {
076        FileInputStream input=null;
077        try {
078          input = new FileInputStream(xmlFile);
079          parse(input);
080        } catch (FileNotFoundException e) {
081          throw new XMLStreamException(e);
082        } finally {
083          IOUtils.closeQuietly(input);
084        }
085      }
086    
087      public void parse(InputStream xmlInput) throws XMLStreamException {
088        xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
089        parse(inf.rootElementCursor(xmlInput));
090      }
091    
092      public void parse(Reader xmlReader) throws XMLStreamException {
093        if (isoControlCharsAwareParser) {
094          throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
095        }
096        parse(inf.rootElementCursor(xmlReader));
097      }
098    
099      public void parse(URL xmlUrl) throws XMLStreamException {
100        try {
101          parse(xmlUrl.openStream());
102        } catch (IOException e) {
103          throw new XMLStreamException(e);
104        }
105      }
106    
107      private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
108        try {
109          streamHandler.stream(rootCursor);
110        } finally {
111          rootCursor.getStreamReader().closeCompletely();
112        }
113      }
114    
115      private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
116        public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
117          // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
118          if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
119            int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
120            if (Character.isDefined(unicodeCharHexValue)) {
121              undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
122            }
123          }
124          return undeclaredEntity;
125        }
126      }
127    
128      /**
129       * Simple interface for handling XML stream to parse
130       */
131      public interface XmlStreamHandler {
132        void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
133      }
134    
135      private static class ISOControlCharAwareInputStream extends InputStream {
136    
137        private InputStream inputToCheck;
138    
139        public ISOControlCharAwareInputStream(InputStream inputToCheck) {
140          super();
141          this.inputToCheck = inputToCheck;
142        }
143    
144        @Override
145        public int read() throws IOException {
146          return inputToCheck.read();
147        }
148    
149        @Override
150        public int available() throws IOException {
151          return inputToCheck.available();
152        }
153    
154        @Override
155        public void close() throws IOException {
156          inputToCheck.close();
157        }
158    
159        @Override
160        public synchronized void mark(int readlimit) {
161          inputToCheck.mark(readlimit);
162        }
163    
164        @Override
165        public boolean markSupported() {
166          return inputToCheck.markSupported();
167        }
168    
169        @Override
170        public int read(byte[] b, int off, int len) throws IOException {
171          int readen = inputToCheck.read(b, off, len);
172          checkBufferForISOControlChars(b, off, len);
173          return readen;
174        }
175    
176        @Override
177        public int read(byte[] b) throws IOException {
178          int readen = inputToCheck.read(b);
179          checkBufferForISOControlChars(b, 0, readen);
180          return readen;
181        }
182    
183        @Override
184        public synchronized void reset() throws IOException {
185          inputToCheck.reset();
186        }
187    
188        @Override
189        public long skip(long n) throws IOException {
190          return inputToCheck.skip(n);
191        }
192    
193        private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
194          for (int i = off; i < len; i++) {
195            char streamChar = (char) buffer[i];
196            if (Character.isISOControl(streamChar) && streamChar != '\n') {
197              // replace control chars by a simple space
198              buffer[i] = ' ';
199            }
200          }
201        }
202      }
203    }