001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.BitSet;
020
021/**
022 * Encoder for unsafe URI characters.
023 * <p/>
024 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
025 */
026public final class UnsafeUriCharactersEncoder {
027    private static BitSet unsafeCharactersRfc1738;
028    private static BitSet unsafeCharactersHttp;
029    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
030                                              'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
031
032    static {
033        unsafeCharactersRfc1738 = new BitSet(256);
034        unsafeCharactersRfc1738.set(' ');
035        unsafeCharactersRfc1738.set('"');
036        unsafeCharactersRfc1738.set('<');
037        unsafeCharactersRfc1738.set('>');
038        unsafeCharactersRfc1738.set('#');
039        unsafeCharactersRfc1738.set('%');
040        unsafeCharactersRfc1738.set('{');
041        unsafeCharactersRfc1738.set('}');
042        unsafeCharactersRfc1738.set('|');
043        unsafeCharactersRfc1738.set('\\');
044        unsafeCharactersRfc1738.set('^');
045        unsafeCharactersRfc1738.set('~');
046        unsafeCharactersRfc1738.set('[');
047        unsafeCharactersRfc1738.set(']');
048        unsafeCharactersRfc1738.set('`');
049    }
050    
051    static {
052        unsafeCharactersHttp = new BitSet(256);
053        unsafeCharactersHttp.set(' ');
054        unsafeCharactersHttp.set('"');
055        unsafeCharactersHttp.set('<');
056        unsafeCharactersHttp.set('>');
057        unsafeCharactersHttp.set('#');
058        unsafeCharactersHttp.set('%');
059        unsafeCharactersHttp.set('{');
060        unsafeCharactersHttp.set('}');
061        unsafeCharactersHttp.set('|');
062        unsafeCharactersHttp.set('\\');
063        unsafeCharactersHttp.set('^');
064        unsafeCharactersHttp.set('~');
065        unsafeCharactersHttp.set('`');
066    }
067
068    private UnsafeUriCharactersEncoder() {
069        // util class
070    }
071
072    public static String encode(String s) {
073        return encode(s, unsafeCharactersRfc1738);
074    }
075    
076    public static String encodeHttpURI(String s) {
077        return encode(s, unsafeCharactersHttp);
078    }
079    
080    public static String encode(String s, BitSet unsafeCharacters) {
081        int n = s == null ? 0 : s.length();
082        if (n == 0) {
083            return s;
084        }
085
086        // First check whether we actually need to encode
087        char chars[] = s.toCharArray();
088        for (int i = 0;;) {
089            // just deal with the ascii character
090            if (chars[i] > 0 && chars[i] < 128) {
091                if (unsafeCharacters.get(chars[i])) {
092                    break;
093                }
094            }
095            if (++i >= chars.length) {
096                return s;
097            }
098        }
099
100        // okay there are some unsafe characters so we do need to encode
101        // see details at: http://en.wikipedia.org/wiki/Url_encode
102        StringBuilder sb = new StringBuilder();
103        for (int i = 0; i < chars.length; i++) {
104            char ch = chars[i];
105            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
106                // special for % sign as it may be a decimal encoded value
107                if (ch == '%') {
108                    char next = i + 1 < chars.length ? chars[i + 1] : ' ';
109                    char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
110
111                    if (isHexDigit(next) && isHexDigit(next2)) {
112                        // its already encoded (decimal encoded) so just append as is
113                        sb.append(ch);
114                    } else {
115                        // must escape then, as its an unsafe character
116                        appendEscape(sb, (byte)ch);
117                    }
118                } else {
119                    // must escape then, as its an unsafe character
120                    appendEscape(sb, (byte)ch);
121                }
122            } else {
123                sb.append(ch);
124            }
125        }
126        return sb.toString();
127    }
128
129    private static void appendEscape(StringBuilder sb, byte b) {
130        sb.append('%');
131        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
132        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
133    }
134
135    private static boolean isHexDigit(char ch) {
136        for (char hex : HEX_DIGITS) {
137            if (hex == ch) {
138                return true;
139            }
140        }
141        return false;
142    }
143
144}