001/*
002 * SonarQube, open source software quality management tool.
003 * Copyright (C) 2008-2014 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * SonarQube is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * SonarQube is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import com.ctc.wstx.stax.WstxInputFactory;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.Reader;
029import java.net.URL;
030import javax.xml.stream.XMLInputFactory;
031import javax.xml.stream.XMLResolver;
032import javax.xml.stream.XMLStreamException;
033import org.apache.commons.io.IOUtils;
034import org.apache.commons.lang.StringUtils;
035import org.codehaus.staxmate.SMInputFactory;
036import org.codehaus.staxmate.in.SMHierarchicCursor;
037
038/**
039 * @since 1.10
040 */
041public class StaxParser {
042
043  private SMInputFactory inf;
044  private XmlStreamHandler streamHandler;
045  private boolean isoControlCharsAwareParser;
046
047  /**
048   * Stax parser for a given stream handler and iso control chars set awarness to off
049   *
050   * @param streamHandler the xml stream handler
051   */
052  public StaxParser(XmlStreamHandler streamHandler) {
053    this(streamHandler, false);
054  }
055
056  /**
057   * Stax parser for a given stream handler and iso control chars set awarness to on.
058   * The iso control chars in the xml file will be replaced by simple spaces, usefull for
059   * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
060   *
061   * @param streamHandler              the xml stream handler
062   * @param isoControlCharsAwareParser true or false
063   */
064  public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
065    this.streamHandler = streamHandler;
066    XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
067    if (xmlFactory instanceof WstxInputFactory) {
068      WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
069      wstxInputfactory.configureForLowMemUsage();
070      wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
071    }
072    xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
073    xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
074    xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
075    this.isoControlCharsAwareParser = isoControlCharsAwareParser;
076    inf = new SMInputFactory(xmlFactory);
077  }
078
079  public void parse(File xmlFile) throws XMLStreamException {
080    FileInputStream input=null;
081    try {
082      input = new FileInputStream(xmlFile);
083      parse(input);
084    } catch (FileNotFoundException e) {
085      throw new XMLStreamException(e);
086    } finally {
087      IOUtils.closeQuietly(input);
088    }
089  }
090
091  public void parse(InputStream xmlInput) throws XMLStreamException {
092    xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
093    parse(inf.rootElementCursor(xmlInput));
094  }
095
096  public void parse(Reader xmlReader) throws XMLStreamException {
097    if (isoControlCharsAwareParser) {
098      throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
099    }
100    parse(inf.rootElementCursor(xmlReader));
101  }
102
103  public void parse(URL xmlUrl) throws XMLStreamException {
104    try {
105      parse(xmlUrl.openStream());
106    } catch (IOException e) {
107      throw new XMLStreamException(e);
108    }
109  }
110
111  private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
112    try {
113      streamHandler.stream(rootCursor);
114    } finally {
115      rootCursor.getStreamReader().closeCompletely();
116    }
117  }
118
119  private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
120    @Override
121    public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
122      // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
123      if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
124        int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
125        if (Character.isDefined(unicodeCharHexValue)) {
126          undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
127        }
128      }
129      return undeclaredEntity;
130    }
131  }
132
133  /**
134   * Simple interface for handling XML stream to parse
135   */
136  public interface XmlStreamHandler {
137    void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
138  }
139
140  private static class ISOControlCharAwareInputStream extends InputStream {
141
142    private InputStream inputToCheck;
143
144    public ISOControlCharAwareInputStream(InputStream inputToCheck) {
145      super();
146      this.inputToCheck = inputToCheck;
147    }
148
149    @Override
150    public int read() throws IOException {
151      return inputToCheck.read();
152    }
153
154    @Override
155    public int available() throws IOException {
156      return inputToCheck.available();
157    }
158
159    @Override
160    public void close() throws IOException {
161      inputToCheck.close();
162    }
163
164    @Override
165    public synchronized void mark(int readlimit) {
166      inputToCheck.mark(readlimit);
167    }
168
169    @Override
170    public boolean markSupported() {
171      return inputToCheck.markSupported();
172    }
173
174    @Override
175    public int read(byte[] b, int off, int len) throws IOException {
176      int readen = inputToCheck.read(b, off, len);
177      checkBufferForISOControlChars(b, off, len);
178      return readen;
179    }
180
181    @Override
182    public int read(byte[] b) throws IOException {
183      int readen = inputToCheck.read(b);
184      checkBufferForISOControlChars(b, 0, readen);
185      return readen;
186    }
187
188    @Override
189    public synchronized void reset() throws IOException {
190      inputToCheck.reset();
191    }
192
193    @Override
194    public long skip(long n) throws IOException {
195      return inputToCheck.skip(n);
196    }
197
198    private static void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
199      for (int i = off; i < len; i++) {
200        char streamChar = (char) buffer[i];
201        if (Character.isISOControl(streamChar) && streamChar != '\n') {
202          // replace control chars by a simple space
203          buffer[i] = ' ';
204        }
205      }
206    }
207  }
208}