001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017
018package org.apache.commons.compress.archivers.zip;
019
020import static java.nio.charset.StandardCharsets.UTF_8;
021
022import java.nio.ByteBuffer;
023import java.nio.charset.Charset;
024import java.nio.charset.UnsupportedCharsetException;
025
026import org.apache.commons.compress.utils.CharsetNames;
027import org.apache.commons.io.Charsets;
028
029/**
030 * Static helper functions for robustly encoding file names in ZIP files.
031 */
032public abstract class ZipEncodingHelper {
033
034    /**
035     * UTF-8.
036     */
037    static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(CharsetNames.UTF_8);
038
039    /**
040     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
041     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
042     * <p>
043     * If the requested character set cannot be found, the platform default will be used instead.
044     * </p>
045     *
046     * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
047     * @return A ZIP encoding for the given encoding name.
048     * @since 1.26.0
049     */
050    public static ZipEncoding getZipEncoding(final Charset charset) {
051        final Charset actual = Charsets.toCharset(charset);
052        final boolean useReplacement = isUTF8(actual);
053        return new NioZipEncoding(actual, useReplacement);
054    }
055
056    /**
057     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
058     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
059     * <p>
060     * If the requested character set cannot be found, the platform default will be used instead.
061     * </p>
062     *
063     * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding.
064     * @return A ZIP encoding for the given encoding name.
065     */
066    public static ZipEncoding getZipEncoding(final String name) {
067        Charset charset = Charset.defaultCharset();
068        try {
069            charset = Charsets.toCharset(name);
070        } catch (final UnsupportedCharsetException ignore) { // NOSONAR we use the default encoding instead
071        }
072        final boolean useReplacement = isUTF8(charset.name());
073        return new NioZipEncoding(charset, useReplacement);
074    }
075
076    static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
077        buffer.limit(buffer.position());
078        buffer.rewind();
079        final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
080        on.put(buffer);
081        return on;
082    }
083
084    /**
085     * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
086     *
087     * @param charset If the given charset is null, then check the platform's default encoding.
088     */
089    static boolean isUTF8(final Charset charset) {
090        return isUTF8Alias(Charsets.toCharset(charset).name());
091    }
092
093    /**
094     * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
095     *
096     * @param charsetName If the given name is null, then check the platform's default encoding.
097     */
098    static boolean isUTF8(final String charsetName) {
099        return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name());
100    }
101
102    private static boolean isUTF8Alias(final String actual) {
103        return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
104    }
105}