001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2009 SonarSource SA
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.api.utils;
021
022 import com.ctc.wstx.stax.WstxInputFactory;
023 import org.apache.commons.lang.StringUtils;
024 import org.codehaus.stax2.XMLInputFactory2;
025 import org.codehaus.staxmate.SMInputFactory;
026 import org.codehaus.staxmate.in.SMHierarchicCursor;
027
028 import java.io.*;
029 import java.net.URL;
030 import javax.xml.stream.XMLInputFactory;
031 import javax.xml.stream.XMLResolver;
032 import javax.xml.stream.XMLStreamException;
033
034 /**
035 * @since 1.10
036 */
037 public class StaxParser {
038
039 private SMInputFactory inf;
040 private XmlStreamHandler streamHandler;
041 private boolean isoControlCharsAwareParser;
042
043 /**
044 * Stax parser for a given stream handler and iso control chars set awarness to off
045 *
046 * @param streamHandler the xml stream handler
047 */
048 public StaxParser(XmlStreamHandler streamHandler) {
049 this(streamHandler, false);
050 }
051
052 /**
053 * Stax parser for a given stream handler and iso control chars set awarness to on.
054 * The iso control chars in the xml file will be replaced by simple spaces, usefull for
055 * potentially bogus XML files to parse, this has a small perfs overhead so use it only when necessary
056 *
057 * @param streamHandler the xml stream handler
058 * @param isoControlCharsAwareParser true or false
059 */
060 public StaxParser(XmlStreamHandler streamHandler, boolean isoControlCharsAwareParser) {
061 this.streamHandler = streamHandler;
062 XMLInputFactory xmlFactory = XMLInputFactory2.newInstance();
063 if (xmlFactory instanceof WstxInputFactory) {
064 WstxInputFactory wstxInputfactory = (WstxInputFactory) xmlFactory;
065 wstxInputfactory.configureForLowMemUsage();
066 wstxInputfactory.getConfig().setUndeclaredEntityResolver(new UndeclaredEntitiesXMLResolver());
067 }
068 xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, false);
069 xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
070 xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
071 this.isoControlCharsAwareParser = true;
072 inf = new SMInputFactory(xmlFactory);
073 }
074
075 public void parse(File xmlFile) throws XMLStreamException {
076 try {
077 parse(new FileInputStream(xmlFile));
078 } catch (FileNotFoundException e) {
079 throw new XMLStreamException(e);
080 }
081 }
082
083 public void parse(InputStream xmlInput) throws XMLStreamException {
084 xmlInput = isoControlCharsAwareParser ? new ISOControlCharAwareInputStream(xmlInput) : xmlInput;
085 parse(inf.rootElementCursor(xmlInput));
086 }
087
088 public void parse(Reader xmlReader) throws XMLStreamException {
089 if (isoControlCharsAwareParser) {
090 throw new SonarException("Method call not supported when isoControlCharsAwareParser=true");
091 }
092 parse(inf.rootElementCursor(xmlReader));
093 }
094
095 public void parse(URL xmlUrl) throws XMLStreamException {
096 try {
097 parse(xmlUrl.openStream());
098 } catch (IOException e) {
099 throw new XMLStreamException(e);
100 }
101 }
102
103 private void parse(SMHierarchicCursor rootCursor) throws XMLStreamException {
104 try {
105 streamHandler.stream(rootCursor);
106 } finally {
107 rootCursor.getStreamReader().closeCompletely();
108 }
109 }
110
111 private static class UndeclaredEntitiesXMLResolver implements XMLResolver {
112 public Object resolveEntity(String arg0, String arg1, String fileName, String undeclaredEntity) throws XMLStreamException {
113 // avoid problems with XML docs containing undeclared entities.. return the entity under its raw form if not an unicode expression
114 if (StringUtils.startsWithIgnoreCase(undeclaredEntity, "u") && undeclaredEntity.length() == 5) {
115 int unicodeCharHexValue = Integer.parseInt(undeclaredEntity.substring(1), 16);
116 if (Character.isDefined(unicodeCharHexValue)) {
117 undeclaredEntity = new String(new char[]{(char) unicodeCharHexValue});
118 }
119 }
120 return undeclaredEntity;
121 }
122 }
123
124 /**
125 * Simple interface for handling XML stream to parse
126 */
127 public interface XmlStreamHandler {
128
129 public void stream(SMHierarchicCursor rootCursor) throws XMLStreamException;
130 }
131
132 private static class ISOControlCharAwareInputStream extends InputStream {
133
134 private InputStream inputToCheck;
135
136 public ISOControlCharAwareInputStream(InputStream inputToCheck) {
137 super();
138 this.inputToCheck = inputToCheck;
139 }
140
141 @Override
142 public int read() throws IOException {
143 return inputToCheck.read();
144 }
145
146 @Override
147 public int available() throws IOException {
148 return inputToCheck.available();
149 }
150
151 @Override
152 public void close() throws IOException {
153 inputToCheck.close();
154 }
155
156 @Override
157 public synchronized void mark(int readlimit) {
158 inputToCheck.mark(readlimit);
159 }
160
161 @Override
162 public boolean markSupported() {
163 return inputToCheck.markSupported();
164 }
165
166 @Override
167 public int read(byte[] b, int off, int len) throws IOException {
168 int readen = inputToCheck.read(b, off, len);
169 checkBufferForISOControlChars(b, off, len);
170 return readen;
171 }
172
173 @Override
174 public int read(byte[] b) throws IOException {
175 int readen = inputToCheck.read(b);
176 checkBufferForISOControlChars(b, 0, readen);
177 return readen;
178 }
179
180 @Override
181 public synchronized void reset() throws IOException {
182 inputToCheck.reset();
183 }
184
185 @Override
186 public long skip(long n) throws IOException {
187 return inputToCheck.skip(n);
188 }
189
190 private void checkBufferForISOControlChars(byte[] buffer, int off, int len) {
191 for (int i = off; i < len; i++) {
192 if (Character.isISOControl((char) buffer[i])) {
193 // replace control chars by a simple space
194 buffer[i] = ' ';
195 }
196 }
197 }
198 }
199 }