001/*
002 * SonarQube, open source software quality management tool.
003 * Copyright (C) 2008-2014 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * SonarQube is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * SonarQube is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import com.ctc.wstx.stax.WstxInputFactory;
023import org.apache.commons.io.IOUtils;
024import org.apache.commons.lang.StringUtils;
025import org.codehaus.staxmate.SMInputFactory;
026import org.codehaus.staxmate.in.SMHierarchicCursor;
027
028import java.io.*;
029import java.net.URL;
030import javax.xml.stream.XMLInputFactory;
031import javax.xml.stream.XMLResolver;
032import javax.xml.stream.XMLStreamException;
033
034/**
035 * @since 1.10
036 */
037public class StaxParser {
038
039  private SMInputFactory inf;
040  private XmlStreamHandler streamHandler;
041  private boolean isoControlCharsAwareParser;
042
043  /**
044   * Stax parser for a given stream handler and iso control chars set awarness to off
045   *
046   * @param streamHandler the xml stream handler
047   */
048  public StaxParser(XmlStreamHandler streamHandler) {
049    this(streamHandler, false);
050  }
051
052  /**
053   * Stax parser for a given stream handler and iso control chars set awarness to on.
054   * The iso control chars in the xml file will be replaced by simple spaces, usefull for
055   * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
056   *
057   * @param streamHandler              the xml stream handler
058   * @param isoControlCharsAwareParser true or false
059   */
060  public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
061    this.streamHandler = streamHandler;
062    XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
063    if (xmlFactory instanceof WstxInputFactory) {
064      WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
065      wstxInputfactory.configureForLowMemUsage();
066      wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
067    }
068    xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
069    xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
070    xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
071    this.isoControlCharsAwareParser = isoControlCharsAwareParser;
072    inf = new SMInputFactory(xmlFactory);
073  }
074
075  public void parse(File xmlFile) throws XMLStreamException {
076    FileInputStream input=null;
077    try {
078      input = new FileInputStream(xmlFile);
079      parse(input);
080    } catch (FileNotFoundException e) {
081      throw new XMLStreamException(e);
082    } finally {
083      IOUtils.closeQuietly(input);
084    }
085  }
086
087  public void parse(InputStream xmlInput) throws XMLStreamException {
088    xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
089    parse(inf.rootElementCursor(xmlInput));
090  }
091
092  public void parse(Reader xmlReader) throws XMLStreamException {
093    if (isoControlCharsAwareParser) {
094      throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
095    }
096    parse(inf.rootElementCursor(xmlReader));
097  }
098
099  public void parse(URL xmlUrl) throws XMLStreamException {
100    try {
101      parse(xmlUrl.openStream());
102    } catch (IOException e) {
103      throw new XMLStreamException(e);
104    }
105  }
106
107  private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
108    try {
109      streamHandler.stream(rootCursor);
110    } finally {
111      rootCursor.getStreamReader().closeCompletely();
112    }
113  }
114
115  private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
116    @Override
117    public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
118      // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
119      if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
120        int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
121        if (Character.isDefined(unicodeCharHexValue)) {
122          undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
123        }
124      }
125      return undeclaredEntity;
126    }
127  }
128
129  /**
130   * Simple interface for handling XML stream to parse
131   */
132  public interface XmlStreamHandler {
133    void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
134  }
135
136  private static class ISOControlCharAwareInputStream extends InputStream {
137
138    private InputStream inputToCheck;
139
140    public ISOControlCharAwareInputStream(InputStream inputToCheck) {
141      super();
142      this.inputToCheck = inputToCheck;
143    }
144
145    @Override
146    public int read() throws IOException {
147      return inputToCheck.read();
148    }
149
150    @Override
151    public int available() throws IOException {
152      return inputToCheck.available();
153    }
154
155    @Override
156    public void close() throws IOException {
157      inputToCheck.close();
158    }
159
160    @Override
161    public synchronized void mark(int readlimit) {
162      inputToCheck.mark(readlimit);
163    }
164
165    @Override
166    public boolean markSupported() {
167      return inputToCheck.markSupported();
168    }
169
170    @Override
171    public int read(byte[] b, int off, int len) throws IOException {
172      int readen = inputToCheck.read(b, off, len);
173      checkBufferForISOControlChars(b, off, len);
174      return readen;
175    }
176
177    @Override
178    public int read(byte[] b) throws IOException {
179      int readen = inputToCheck.read(b);
180      checkBufferForISOControlChars(b, 0, readen);
181      return readen;
182    }
183
184    @Override
185    public synchronized void reset() throws IOException {
186      inputToCheck.reset();
187    }
188
189    @Override
190    public long skip(long n) throws IOException {
191      return inputToCheck.skip(n);
192    }
193
194    private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
195      for (int i = off; i < len; i++) {
196        char streamChar = (char) buffer[i];
197        if (Character.isISOControl(streamChar) && streamChar != '\n') {
198          // replace control chars by a simple space
199          buffer[i] = ' ';
200        }
201      }
202    }
203  }
204}