001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019/**
020 * A Camel specific URI parser that parses endpoint URIs in a quasi syntax that Camel uses.
021 *
022 * The {@link java.net.URI} is much slower and parses endpoint URIs into additional parts which Camel does not use or
023 * need.
024 */
025public final class CamelURIParser {
026
027    public static final String[] URI_ALREADY_NORMALIZED = new String[] {};
028
029    private CamelURIParser() {
030    }
031
032    /**
033     * Parses the URI (in fast mode).
034     *
035     * If this parser cannot parse the uri then <tt>null</tt> is returned. And instead the follow code can be used:
036     *
037     * <pre>
038     * URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
039     * </pre>
040     *
041     * @param  uri the uri
042     *
043     * @return     <tt>null</tt> if not possible to parse, if the uri is already normalized, then
044     *             {@link #URI_ALREADY_NORMALIZED} is returned, or an array[3] with scheme,path,query
045     */
046    public static String[] fastParseUri(String uri) {
047        return doParseUri(uri, true);
048    }
049
050    /**
051     * Parses the URI.
052     *
053     * If this parser cannot parse the uri then <tt>null</tt> is returned. And instead the follow code can be used:
054     * 
055     * <pre>
056     * URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
057     * </pre>
058     *
059     * @param  uri the uri
060     *
061     * @return     <tt>null</tt> if not possible to parse, or an array[3] with scheme,path,query
062     */
063    public static String[] parseUri(String uri) {
064        return doParseUri(uri, false);
065    }
066
067    private static String[] doParseUri(String uri, boolean fastParse) {
068        int schemeStart = 0;
069        int schemeEnd = 0;
070        int pathStart = 0;
071        int pathEnd = 0;
072        int queryStart = 0;
073
074        int len = uri.length();
075        for (int i = 0; i < len; i++) {
076            char ch = uri.charAt(i);
077            if (ch > 128) {
078                // must be an ascii char
079                return null;
080            }
081            // must be a safe char
082            if (!UnsafeUriCharactersEncoder.isSafeFastParser(ch)) {
083                return null;
084            }
085            if (schemeEnd == 0) {
086                if (ch == ':') {
087                    schemeEnd = i;
088                    // skip colon
089                    pathStart = i + 1;
090                }
091            } else if (pathEnd == 0) {
092                if (ch == '?') {
093                    pathEnd = i;
094                    // skip ? marker
095                    queryStart = i + 1;
096                }
097            }
098        }
099
100        if (pathStart == 0 && schemeEnd != 0) {
101            // skip colon
102            pathStart = schemeEnd + 1;
103        }
104        // invalid if there is no path anyway
105        if (pathStart >= len) {
106            return null;
107        }
108
109        String scheme = null;
110        if (schemeEnd != 0) {
111
112            // optimized if there are no query and the uri is already in camel style
113            if (fastParse && queryStart == 0 && pathStart + 1 < len) {
114                char ch = uri.charAt(schemeEnd);
115                char ch2 = uri.charAt(pathStart);
116                char ch3 = uri.charAt(pathStart + 1);
117                if (ch == ':' && ch2 == '/' && ch3 == '/') {
118                    return URI_ALREADY_NORMALIZED;
119                }
120            }
121
122            scheme = uri.substring(schemeStart, schemeEnd);
123        }
124        if (scheme == null) {
125            return null;
126        }
127
128        String path;
129        // skip two leading slashes
130        int next = pathStart + 1;
131        if (uri.charAt(pathStart) == '/' && next < len && uri.charAt(next) == '/') {
132            pathStart = pathStart + 2;
133        }
134        if (pathEnd != 0) {
135            path = uri.substring(pathStart, pathEnd);
136        } else {
137            path = uri.substring(pathStart);
138        }
139
140        String query = null;
141        if (queryStart != 0 && queryStart < len) {
142            query = uri.substring(queryStart);
143        }
144
145        return new String[] { scheme, path, query };
146    }
147}