001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.InputStream; 020import java.util.Iterator; 021import java.util.LinkedHashMap; 022import java.util.Map; 023import java.util.Scanner; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.camel.util.ObjectHelper; 028 029/** 030 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 031 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token. 032 * <p/> 033 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 034 * to access the message body. 035 * <p/> 036 * Can be used to split big XML files. 037 * <p/> 038 * This implementation supports inheriting namespaces from a parent/root tag. 039 * 040 * @deprecated use {@link TokenXMLExpressionIterator} instead. 041 */ 042@Deprecated 043public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator { 044 045 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\""); 046 private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>"; 047 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)"; 048 protected final String inheritNamespaceToken; 049 050 public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) { 051 super(startToken, endToken, true); 052 // namespace token is optional 053 this.inheritNamespaceToken = inheritNamespaceToken; 054 055 // must be XML tokens 056 if (!startToken.startsWith("<") || !startToken.endsWith(">")) { 057 throw new IllegalArgumentException("Start token must be a valid XML token, was: " + startToken); 058 } 059 if (!endToken.startsWith("<") || !endToken.endsWith(">")) { 060 throw new IllegalArgumentException("End token must be a valid XML token, was: " + endToken); 061 } 062 if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) { 063 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken); 064 } 065 } 066 067 @Override 068 protected Iterator<?> createIterator(InputStream in, String charset) { 069 XMLTokenPairIterator iterator = new XMLTokenPairIterator(startToken, endToken, inheritNamespaceToken, in, charset); 070 iterator.init(); 071 return iterator; 072 } 073 074 /** 075 * Iterator to walk the input stream 076 */ 077 static class XMLTokenPairIterator extends TokenPairIterator { 078 079 private final Pattern startTokenPattern; 080 private final String scanEndToken; 081 private final String inheritNamespaceToken; 082 private Pattern inheritNamespaceTokenPattern; 083 private String rootTokenNamespaces; 084 085 XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) { 086 super(startToken, endToken, true, in, charset); 087 088 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 089 StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 090 append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX); 091 this.startTokenPattern = Pattern.compile(tokenSb.toString()); 092 093 tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX). 094 append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX); 095 this.scanEndToken = tokenSb.toString(); 096 097 this.inheritNamespaceToken = inheritNamespaceToken; 098 if (inheritNamespaceToken != null) { 099 // the inherit namespace token may itself have a namespace prefix 100 tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 101 append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX); 102 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 103 this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL); 104 } 105 } 106 107 @Override 108 void init() { 109 // use scan end token as delimiter which supports attributes/namespaces 110 this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken); 111 // this iterator will do look ahead as we may have data 112 // after the last end token, which the scanner would find 113 // so we need to be one step ahead of the scanner 114 this.image = scanner.hasNext() ? (String) next(true) : null; 115 } 116 117 @Override 118 String getNext(boolean first) { 119 String next = scanner.next(); 120 if (next == null) { 121 return null; 122 } 123 124 // initialize inherited namespaces on first 125 if (first && inheritNamespaceToken != null) { 126 rootTokenNamespaces = getNamespacesFromNamespaceToken(next); 127 } 128 129 // make sure next is positioned at start token as we can have leading data 130 // or we reached EOL and there is no more start tags 131 Matcher matcher = startTokenPattern.matcher(next); 132 if (!matcher.find()) { 133 return null; 134 } else { 135 int index = matcher.start(); 136 next = next.substring(index); 137 } 138 139 // make sure the end tag matches the begin tag if the tag has a namespace prefix 140 String tag = ObjectHelper.before(next, ">"); 141 StringBuilder endTagSb = new StringBuilder("</"); 142 int firstSpaceIndex = tag.indexOf(" "); 143 if (firstSpaceIndex > 0) { 144 endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">"); 145 } else { 146 endTagSb.append(tag.substring(1, tag.length())).append(">"); 147 } 148 149 // build answer accordingly to whether namespaces should be inherited or not 150 StringBuilder sb = new StringBuilder(); 151 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 152 // append root namespaces to local start token 153 // grab the text 154 String text = ObjectHelper.after(next, ">"); 155 // build result with inherited namespaces 156 next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString(); 157 } else { 158 next = sb.append(next).append(endTagSb.toString()).toString(); 159 } 160 161 return next; 162 } 163 164 private String getNamespacesFromNamespaceToken(String text) { 165 if (text == null) { 166 return null; 167 } 168 169 // grab the namespace tag 170 Matcher mat = inheritNamespaceTokenPattern.matcher(text); 171 if (mat.find()) { 172 text = mat.group(0); 173 } else { 174 // cannot find namespace tag 175 return null; 176 } 177 178 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 179 Map<String, String> namespaces = new LinkedHashMap<String, String>(); 180 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 181 while (matcher.find()) { 182 String prefix = matcher.group(1); 183 String url = matcher.group(2); 184 if (ObjectHelper.isEmpty(prefix)) { 185 prefix = "_DEFAULT_"; 186 } else { 187 // skip leading : 188 prefix = prefix.substring(1); 189 } 190 namespaces.put(prefix, url); 191 } 192 193 // did we find any namespaces 194 if (namespaces.isEmpty()) { 195 return null; 196 } 197 198 // build namespace String 199 StringBuilder sb = new StringBuilder(); 200 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 201 String key = entry.getKey(); 202 String value = entry.getValue(); 203 if ("_DEFAULT_".equals(key)) { 204 sb.append(" xmlns=\"").append(value).append("\""); 205 } else { 206 sb.append(" xmlns:").append(key).append("=\"").append(value).append("\""); 207 } 208 } 209 210 return sb.toString(); 211 } 212 } 213 214}