001/*
002 * SonarQube
003 * Copyright (C) 2009-2017 SonarSource SA
004 * mailto:info AT sonarsource DOT com
005 *
006 * This program is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * This program is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import com.ctc.wstx.stax.WstxInputFactory;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.Reader;
029import java.net.URL;
030import javax.xml.stream.XMLInputFactory;
031import javax.xml.stream.XMLResolver;
032import javax.xml.stream.XMLStreamException;
033import org.apache.commons.io.IOUtils;
034import org.apache.commons.lang.StringUtils;
035import org.codehaus.staxmate.SMInputFactory;
036import org.codehaus.staxmate.in.SMHierarchicCursor;
037
038/**
039 * @since 1.10
040 * @deprecated since 5.6 plugins should use their own dependencies
041 */
042@Deprecated
043public class StaxParser {
044
045  private SMInputFactory inf;
046  private XmlStreamHandler streamHandler;
047  private boolean isoControlCharsAwareParser;
048
049  /**
050   * Stax parser for a given stream handler and iso control chars set awarness to off
051   *
052   * @param streamHandler the xml stream handler
053   */
054  public StaxParser(XmlStreamHandler streamHandler) {
055    this(streamHandler, false);
056  }
057
058  /**
059   * Stax parser for a given stream handler and iso control chars set awarness to on.
060   * The iso control chars in the xml file will be replaced by simple spaces, usefull for
061   * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
062   *
063   * @param streamHandler              the xml stream handler
064   * @param isoControlCharsAwareParser true or false
065   */
066  public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
067    this.streamHandler = streamHandler;
068    XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
069    if (xmlFactory instanceof WstxInputFactory) {
070      WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
071      wstxInputfactory.configureForLowMemUsage();
072      wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
073    }
074    xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
075    xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
076    xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
077    this.isoControlCharsAwareParser = isoControlCharsAwareParser;
078    inf = new SMInputFactory(xmlFactory);
079  }
080
081  public void parse(File xmlFile) throws XMLStreamException {
082    FileInputStream input = null;
083    try {
084      input = new FileInputStream(xmlFile);
085      parse(input);
086    } catch (FileNotFoundException e) {
087      throw new XMLStreamException(e);
088    } finally {
089      IOUtils.closeQuietly(input);
090    }
091  }
092
093  public void parse(InputStream xmlInput) throws XMLStreamException {
094    xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
095    parse(inf.rootElementCursor(xmlInput));
096  }
097
098  public void parse(Reader xmlReader) throws XMLStreamException {
099    if (isoControlCharsAwareParser) {
100      throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
101    }
102    parse(inf.rootElementCursor(xmlReader));
103  }
104
105  public void parse(URL xmlUrl) throws XMLStreamException {
106    try {
107      parse(xmlUrl.openStream());
108    } catch (IOException e) {
109      throw new XMLStreamException(e);
110    }
111  }
112
113  private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
114    try {
115      streamHandler.stream(rootCursor);
116    } finally {
117      rootCursor.getStreamReader().closeCompletely();
118    }
119  }
120
121  private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
122    @Override
123    public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
124      // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
125      if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
126        int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
127        if (Character.isDefined(unicodeCharHexValue)) {
128          undeclaredEntity = new String(new char[] {(char) unicodeCharHexValue});
129        }
130      }
131      return undeclaredEntity;
132    }
133  }
134
135  /**
136   * Simple interface for handling XML stream to parse
137   */
138  public interface XmlStreamHandler {
139    void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
140  }
141
142  private static class ISOControlCharAwareInputStream extends InputStream {
143
144    private InputStream inputToCheck;
145
146    public ISOControlCharAwareInputStream(InputStream inputToCheck) {
147      super();
148      this.inputToCheck = inputToCheck;
149    }
150
151    @Override
152    public int read() throws IOException {
153      return inputToCheck.read();
154    }
155
156    @Override
157    public int available() throws IOException {
158      return inputToCheck.available();
159    }
160
161    @Override
162    public void close() throws IOException {
163      inputToCheck.close();
164    }
165
166    @Override
167    public synchronized void mark(int readlimit) {
168      inputToCheck.mark(readlimit);
169    }
170
171    @Override
172    public boolean markSupported() {
173      return inputToCheck.markSupported();
174    }
175
176    @Override
177    public int read(byte[] b, int off, int len) throws IOException {
178      int readen = inputToCheck.read(b, off, len);
179      checkBufferForISOControlChars(b, off, len);
180      return readen;
181    }
182
183    @Override
184    public int read(byte[] b) throws IOException {
185      int readen = inputToCheck.read(b);
186      checkBufferForISOControlChars(b, 0, readen);
187      return readen;
188    }
189
190    @Override
191    public synchronized void reset() throws IOException {
192      inputToCheck.reset();
193    }
194
195    @Override
196    public long skip(long n) throws IOException {
197      return inputToCheck.skip(n);
198    }
199
200    private static void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
201      for (int i = off; i < len; i++) {
202        char streamChar = (char) buffer[i];
203        if (Character.isISOControl(streamChar) && streamChar != '\n') {
204          // replace control chars by a simple space
205          buffer[i] = ' ';
206        }
207      }
208    }
209  }
210}