001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2009 SonarSource SA
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.api.utils;
021    
022    import com.ctc.wstx.stax.WstxInputFactory;
023    import org.apache.commons.io.IOUtils;
024    import org.apache.commons.lang.StringUtils;
025    import org.codehaus.stax2.XMLInputFactory2;
026    import org.codehaus.staxmate.SMInputFactory;
027    import org.codehaus.staxmate.in.SMHierarchicCursor;
028    
029    import java.io.*;
030    import java.net.URL;
031    import javax.xml.stream.XMLInputFactory;
032    import javax.xml.stream.XMLResolver;
033    import javax.xml.stream.XMLStreamException;
034    
035    /**
036     * @since 1.10
037     */
038    public class StaxParser {
039    
040      private SMInputFactory inf;
041      private XmlStreamHandler streamHandler;
042      private boolean isoControlCharsAwareParser;
043    
044      /**
045       * Stax parser for a given stream handler and iso control chars set awarness to off
046       *
047       * @param streamHandler the xml stream handler
048       */
049      public StaxParser(XmlStreamHandler streamHandler) {
050        this(streamHandler, false);
051      }
052    
053      /**
054       * Stax parser for a given stream handler and iso control chars set awarness to on.
055       * The iso control chars in the xml file will be replaced by simple spaces, usefull for
056       * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
057       *
058       * @param streamHandler              the xml stream handler
059       * @param isoControlCharsAwareParser true or false
060       */
061      public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
062        this.streamHandler = streamHandler;
063        XMLInputFactory xmlFactory = XMLInputFactory2.newInstance();
064        if (xmlFactory instanceof WstxInputFactory) {
065          WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
066          wstxInputfactory.configureForLowMemUsage();
067          wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
068        }
069        xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
070        xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
071        xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
072        this.isoControlCharsAwareParser = isoControlCharsAwareParser;
073        inf = new SMInputFactory(xmlFactory);
074      }
075    
076      public void parse(File xmlFile) throws XMLStreamException {
077        FileInputStream input=null;
078        try {
079          input = new FileInputStream(xmlFile);
080          parse(input);
081        } catch (FileNotFoundException e) {
082          throw new XMLStreamException(e);
083        } finally {
084          IOUtils.closeQuietly(input);
085        }
086      }
087    
088      public void parse(InputStream xmlInput) throws XMLStreamException {
089        xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
090        parse(inf.rootElementCursor(xmlInput));
091      }
092    
093      public void parse(Reader xmlReader) throws XMLStreamException {
094        if (isoControlCharsAwareParser) {
095          throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
096        }
097        parse(inf.rootElementCursor(xmlReader));
098      }
099    
100      public void parse(URL xmlUrl) throws XMLStreamException {
101        try {
102          parse(xmlUrl.openStream());
103        } catch (IOException e) {
104          throw new XMLStreamException(e);
105        }
106      }
107    
108      private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
109        try {
110          streamHandler.stream(rootCursor);
111        } finally {
112          rootCursor.getStreamReader().closeCompletely();
113        }
114      }
115    
116      private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
117        public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
118          // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
119          if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
120            int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
121            if (Character.isDefined(unicodeCharHexValue)) {
122              undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
123            }
124          }
125          return undeclaredEntity;
126        }
127      }
128    
129      /**
130       * Simple interface for handling XML stream to parse
131       */
132      public interface XmlStreamHandler {
133        void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
134      }
135    
136      private static class ISOControlCharAwareInputStream extends InputStream {
137    
138        private InputStream inputToCheck;
139    
140        public ISOControlCharAwareInputStream(InputStream inputToCheck) {
141          super();
142          this.inputToCheck = inputToCheck;
143        }
144    
145        @Override
146        public int read() throws IOException {
147          return inputToCheck.read();
148        }
149    
150        @Override
151        public int available() throws IOException {
152          return inputToCheck.available();
153        }
154    
155        @Override
156        public void close() throws IOException {
157          inputToCheck.close();
158        }
159    
160        @Override
161        public synchronized void mark(int readlimit) {
162          inputToCheck.mark(readlimit);
163        }
164    
165        @Override
166        public boolean markSupported() {
167          return inputToCheck.markSupported();
168        }
169    
170        @Override
171        public int read(byte[] b, int off, int len) throws IOException {
172          int readen = inputToCheck.read(b, off, len);
173          checkBufferForISOControlChars(b, off, len);
174          return readen;
175        }
176    
177        @Override
178        public int read(byte[] b) throws IOException {
179          int readen = inputToCheck.read(b);
180          checkBufferForISOControlChars(b, 0, readen);
181          return readen;
182        }
183    
184        @Override
185        public synchronized void reset() throws IOException {
186          inputToCheck.reset();
187        }
188    
189        @Override
190        public long skip(long n) throws IOException {
191          return inputToCheck.skip(n);
192        }
193    
194        private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
195          for (int i = off; i < len; i++) {
196            char streamChar = (char) buffer[i];
197            if (Character.isISOControl(streamChar) && streamChar != '\n') {
198              // replace control chars by a simple space
199              buffer[i] = ' ';
200            }
201          }
202        }
203      }
204    }