001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.util.BitSet; 020import java.util.List; 021 022/** 023 * Encoder for unsafe URI characters. 024 * <p/> 025 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article. 026 */ 027public final class UnsafeUriCharactersEncoder { 028 private static BitSet unsafeCharactersFastParser; 029 private static BitSet unsafeCharactersRfc1738; 030 private static BitSet unsafeCharactersHttp; 031 private static final char[] HEX_DIGITS = { 032 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 033 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f' }; 034 035 static { 036 unsafeCharactersFastParser = new BitSet(14); 037 unsafeCharactersFastParser.set(' '); 038 unsafeCharactersFastParser.set('"'); 039 unsafeCharactersFastParser.set('<'); 040 unsafeCharactersFastParser.set('>'); 041 unsafeCharactersFastParser.set('%'); 042 unsafeCharactersFastParser.set('{'); 043 unsafeCharactersFastParser.set('}'); 044 unsafeCharactersFastParser.set('|'); 045 unsafeCharactersFastParser.set('\\'); 046 unsafeCharactersFastParser.set('^'); 047 unsafeCharactersFastParser.set('~'); 048 unsafeCharactersFastParser.set('['); 049 unsafeCharactersFastParser.set(']'); 050 unsafeCharactersFastParser.set('`'); 051 // we allow # as a safe when using the fast parser as its used for 052 // looking up beans in the registry (foo=#myBar) 053 } 054 055 static { 056 unsafeCharactersRfc1738 = new BitSet(15); 057 unsafeCharactersRfc1738.set(' '); 058 unsafeCharactersRfc1738.set('"'); 059 unsafeCharactersRfc1738.set('<'); 060 unsafeCharactersRfc1738.set('>'); 061 unsafeCharactersRfc1738.set('#'); 062 unsafeCharactersRfc1738.set('%'); 063 unsafeCharactersRfc1738.set('{'); 064 unsafeCharactersRfc1738.set('}'); 065 unsafeCharactersRfc1738.set('|'); 066 unsafeCharactersRfc1738.set('\\'); 067 unsafeCharactersRfc1738.set('^'); 068 unsafeCharactersRfc1738.set('~'); 069 unsafeCharactersRfc1738.set('['); 070 unsafeCharactersRfc1738.set(']'); 071 unsafeCharactersRfc1738.set('`'); 072 } 073 074 static { 075 unsafeCharactersHttp = new BitSet(13); 076 unsafeCharactersHttp.set(' '); 077 unsafeCharactersHttp.set('"'); 078 unsafeCharactersHttp.set('<'); 079 unsafeCharactersHttp.set('>'); 080 unsafeCharactersHttp.set('#'); 081 unsafeCharactersHttp.set('%'); 082 unsafeCharactersHttp.set('{'); 083 unsafeCharactersHttp.set('}'); 084 unsafeCharactersHttp.set('|'); 085 unsafeCharactersHttp.set('\\'); 086 unsafeCharactersHttp.set('^'); 087 unsafeCharactersHttp.set('~'); 088 unsafeCharactersHttp.set('`'); 089 } 090 091 private UnsafeUriCharactersEncoder() { 092 // util class 093 } 094 095 public static boolean isSafeFastParser(char ch) { 096 return !unsafeCharactersFastParser.get(ch); 097 } 098 099 public static String encode(String s) { 100 return encode(s, unsafeCharactersRfc1738); 101 } 102 103 public static String encodeHttpURI(String s) { 104 return encode(s, unsafeCharactersHttp); 105 } 106 107 public static String encode(String s, BitSet unsafeCharacters) { 108 return encode(s, unsafeCharacters, false); 109 } 110 111 public static String encode(String s, boolean checkRaw) { 112 return encode(s, unsafeCharactersRfc1738, checkRaw); 113 } 114 115 public static String encodeHttpURI(String s, boolean checkRaw) { 116 return encode(s, unsafeCharactersHttp, checkRaw); 117 } 118 119 // Just skip the encode for isRAW part 120 public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) { 121 if (s == null) { 122 return null; 123 } 124 int len = s.length(); 125 if (len == 0) { 126 return s; 127 } 128 129 // first check whether we actually need to encode 130 boolean safe = true; 131 for (int i = 0; i < len; i++) { 132 char ch = s.charAt(i); 133 // just deal with the ascii character 134 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 135 safe = false; 136 break; 137 } 138 } 139 if (safe) { 140 return s; 141 } 142 143 List<Pair<Integer>> rawPairs = null; 144 if (checkRaw) { 145 rawPairs = URISupport.scanRaw(s); 146 } 147 148 // add a bit of extra space as initial capacity 149 int initial = len + 8; 150 151 // okay there are some unsafe characters so we do need to encode 152 // see details at: http://en.wikipedia.org/wiki/Url_encode 153 StringBuilder sb = new StringBuilder(initial); 154 for (int i = 0; i < len; i++) { 155 char ch = s.charAt(i); 156 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 157 // special for % sign as it may be a decimal encoded value 158 if (ch == '%') { 159 char next = i + 1 < len ? s.charAt(i + 1) : ' '; 160 char next2 = i + 2 < len ? s.charAt(i + 2) : ' '; 161 162 if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) { 163 // its already encoded (decimal encoded) so just append as is 164 sb.append(ch); 165 } else { 166 // must escape then, as its an unsafe character 167 appendEscape(sb, (byte) ch); 168 } 169 } else { 170 // must escape then, as its an unsafe character 171 appendEscape(sb, (byte) ch); 172 } 173 } else { 174 sb.append(ch); 175 } 176 } 177 return sb.toString(); 178 } 179 180 private static void appendEscape(StringBuilder sb, byte b) { 181 sb.append('%'); 182 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]); 183 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]); 184 } 185 186 private static boolean isHexDigit(char ch) { 187 // 0..9 A..F a..f 188 return ch >= 48 && ch <= 57 || ch >= 65 && ch <= 70 || ch >= 97 && ch <= 102; 189 } 190 191}