001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2009 SonarSource SA
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.api.utils;
021
022 import com.ctc.wstx.stax.WstxInputFactory;
023 import org.apache.commons.io.IOUtils;
024 import org.apache.commons.lang.StringUtils;
025 import org.codehaus.stax2.XMLInputFactory2;
026 import org.codehaus.staxmate.SMInputFactory;
027 import org.codehaus.staxmate.in.SMHierarchicCursor;
028
029 import java.io.*;
030 import java.net.URL;
031 import javax.xml.stream.XMLInputFactory;
032 import javax.xml.stream.XMLResolver;
033 import javax.xml.stream.XMLStreamException;
034
035 /**
036 * @since 1.10
037 */
038 public class StaxParser {
039
040 private SMInputFactory inf;
041 private XmlStreamHandler streamHandler;
042 private boolean isoControlCharsAwareParser;
043
044 /**
045 * Stax parser for a given stream handler and iso control chars set awarness to off
046 *
047 * @param streamHandler the xml stream handler
048 */
049 public StaxParser(XmlStreamHandler streamHandler) {
050 this(streamHandler, false);
051 }
052
053 /**
054 * Stax parser for a given stream handler and iso control chars set awarness to on.
055 * The iso control chars in the xml file will be replaced by simple spaces, usefull for
056 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
057 *
058 * @param streamHandler the xml stream handler
059 * @param isoControlCharsAwareParser true or false
060 */
061 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
062 this.streamHandler = streamHandler;
063 XMLInputFactory xmlFactory = XMLInputFactory2.newInstance();
064 if (xmlFactory instanceof WstxInputFactory) {
065 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
066 wstxInputfactory.configureForLowMemUsage();
067 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
068 }
069 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
070 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
071 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
072 this.isoControlCharsAwareParser = isoControlCharsAwareParser;
073 inf = new SMInputFactory(xmlFactory);
074 }
075
076 public void parse(File xmlFile) throws XMLStreamException {
077 FileInputStream input=null;
078 try {
079 input = new FileInputStream(xmlFile);
080 parse(input);
081 } catch (FileNotFoundException e) {
082 throw new XMLStreamException(e);
083 } finally {
084 IOUtils.closeQuietly(input);
085 }
086 }
087
088 public void parse(InputStream xmlInput) throws XMLStreamException {
089 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
090 parse(inf.rootElementCursor(xmlInput));
091 }
092
093 public void parse(Reader xmlReader) throws XMLStreamException {
094 if (isoControlCharsAwareParser) {
095 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
096 }
097 parse(inf.rootElementCursor(xmlReader));
098 }
099
100 public void parse(URL xmlUrl) throws XMLStreamException {
101 try {
102 parse(xmlUrl.openStream());
103 } catch (IOException e) {
104 throw new XMLStreamException(e);
105 }
106 }
107
108 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
109 try {
110 streamHandler.stream(rootCursor);
111 } finally {
112 rootCursor.getStreamReader().closeCompletely();
113 }
114 }
115
116 private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
117 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
118 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
119 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
120 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
121 if (Character.isDefined(unicodeCharHexValue)) {
122 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
123 }
124 }
125 return undeclaredEntity;
126 }
127 }
128
129 /**
130 * Simple interface for handling XML stream to parse
131 */
132 public interface XmlStreamHandler {
133 void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
134 }
135
136 private static class ISOControlCharAwareInputStream extends InputStream {
137
138 private InputStream inputToCheck;
139
140 public ISOControlCharAwareInputStream(InputStream inputToCheck) {
141 super();
142 this.inputToCheck = inputToCheck;
143 }
144
145 @Override
146 public int read() throws IOException {
147 return inputToCheck.read();
148 }
149
150 @Override
151 public int available() throws IOException {
152 return inputToCheck.available();
153 }
154
155 @Override
156 public void close() throws IOException {
157 inputToCheck.close();
158 }
159
160 @Override
161 public synchronized void mark(int readlimit) {
162 inputToCheck.mark(readlimit);
163 }
164
165 @Override
166 public boolean markSupported() {
167 return inputToCheck.markSupported();
168 }
169
170 @Override
171 public int read(byte[] b, int off, int len) throws IOException {
172 int readen = inputToCheck.read(b, off, len);
173 checkBufferForISOControlChars(b, off, len);
174 return readen;
175 }
176
177 @Override
178 public int read(byte[] b) throws IOException {
179 int readen = inputToCheck.read(b);
180 checkBufferForISOControlChars(b, 0, readen);
181 return readen;
182 }
183
184 @Override
185 public synchronized void reset() throws IOException {
186 inputToCheck.reset();
187 }
188
189 @Override
190 public long skip(long n) throws IOException {
191 return inputToCheck.skip(n);
192 }
193
194 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
195 for (int i = off; i < len; i++) {
196 char streamChar = (char) buffer[i];
197 if (Character.isISOControl(streamChar) && streamChar != '\n') {
198 // replace control chars by a simple space
199 buffer[i] = ' ';
200 }
201 }
202 }
203 }
204 }