001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; 021import org.apache.commons.compress.parallel.InputStreamSupplier; 022import org.apache.commons.compress.parallel.ScatterGatherBackingStore; 023import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; 024 025import java.io.File; 026import java.io.IOException; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.concurrent.atomic.AtomicInteger; 036import java.util.zip.Deflater; 037 038import static java.util.Collections.synchronizedList; 039import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest; 040 041/** 042 * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances. 043 * <p> 044 * Note that this class generally makes no guarantees about the order of things written to 045 * the output file. Things that need to come in a specific order (manifests, directories) 046 * must be handled by the client of this class, usually by writing these things to the 047 * {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p> 048 * <p> 049 * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of 050 * memory model consistency, this will be shut down by this class prior to completion. 051 * </p> 052 * @since 1.10 053 */ 054public class ParallelScatterZipCreator { 055 private final List<ScatterZipOutputStream> streams = synchronizedList(new ArrayList<ScatterZipOutputStream>()); 056 private final ExecutorService es; 057 private final ScatterGatherBackingStoreSupplier backingStoreSupplier; 058 private final List<Future<Object>> futures = new ArrayList<Future<Object>>(); 059 060 private final long startedAt = System.currentTimeMillis(); 061 private long compressionDoneAt = 0; 062 private long scatterDoneAt; 063 064 private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier { 065 final AtomicInteger storeNum = new AtomicInteger(0); 066 067 public ScatterGatherBackingStore get() throws IOException { 068 File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); 069 return new FileBasedScatterGatherBackingStore(tempFile); 070 } 071 } 072 073 private ScatterZipOutputStream createDeferred(ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) 074 throws IOException { 075 ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get(); 076 StreamCompressor sc = StreamCompressor.create(Deflater.DEFAULT_COMPRESSION, bs); 077 return new ScatterZipOutputStream(bs, sc); 078 } 079 080 private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() { 081 @Override 082 protected ScatterZipOutputStream initialValue() { 083 try { 084 ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier); 085 streams.add(scatterStream); 086 return scatterStream; 087 } catch (IOException e) { 088 throw new RuntimeException(e); 089 } 090 } 091 }; 092 093 /** 094 * Create a ParallelScatterZipCreator with default threads, which is set to the number of available 095 * processors, as defined by {@link java.lang.Runtime#availableProcessors} 096 */ 097 public ParallelScatterZipCreator() { 098 this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())); 099 } 100 101 /** 102 * Create a ParallelScatterZipCreator 103 * 104 * @param executorService The executorService to use for parallel scheduling. For technical reasons, 105 * this will be shut down by this class. 106 */ 107 public ParallelScatterZipCreator(ExecutorService executorService) { 108 this(executorService, new DefaultBackingStoreSupplier()); 109 } 110 111 /** 112 * Create a ParallelScatterZipCreator 113 * 114 * @param executorService The executorService to use. For technical reasons, this will be shut down 115 * by this class. 116 * @param backingStoreSupplier The supplier of backing store which shall be used 117 */ 118 public ParallelScatterZipCreator(ExecutorService executorService, 119 ScatterGatherBackingStoreSupplier backingStoreSupplier) { 120 this.backingStoreSupplier = backingStoreSupplier; 121 es = executorService; 122 } 123 124 /** 125 * Adds an archive entry to this archive. 126 * <p> 127 * This method is expected to be called from a single client thread 128 * </p> 129 * 130 * @param zipArchiveEntry The entry to add. 131 * @param source The source input stream supplier 132 */ 133 134 public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { 135 submit(createCallable(zipArchiveEntry, source)); 136 } 137 138 /** 139 * Submit a callable for compression. 140 * 141 * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. 142 * 143 * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. 144 */ 145 public final void submit(Callable<Object> callable) { 146 futures.add(es.submit(callable)); 147 } 148 149 /** 150 * Create a callable that will compress the given archive entry. 151 * 152 * <p>This method is expected to be called from a single client thread.</p> 153 * 154 * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submit submit}. 155 * The most common use case for using {@link #createCallable createCallable} and {@link #submit submit} from a 156 * client is if you want to wrap the callable in something that can be prioritized by the supplied 157 * {@link ExecutorService}, for instance to process large or slow files first. 158 * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client. 159 * 160 * @param zipArchiveEntry The entry to add. 161 * @param source The source input stream supplier 162 * @return A callable that should subsequently passed to #submit, possibly in a wrapped/adapted from. The 163 * value of this callable is not used, but any exceptions happening inside the compression 164 * will be propagated through the callable. 165 */ 166 167 public final Callable<Object> createCallable(ZipArchiveEntry zipArchiveEntry, InputStreamSupplier source) { 168 final int method = zipArchiveEntry.getMethod(); 169 if (method == ZipMethod.UNKNOWN_CODE) { 170 throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry); 171 } 172 final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source); 173 return new Callable<Object>() { 174 public Object call() throws Exception { 175 tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequest); 176 return null; 177 } 178 }; 179 } 180 181 182 /** 183 * Write the contents this to the target {@link ZipArchiveOutputStream}. 184 * <p> 185 * It may be beneficial to write things like directories and manifest files to the targetStream 186 * before calling this method. 187 * </p> 188 * 189 * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams 190 * @throws IOException If writing fails 191 * @throws InterruptedException If we get interrupted 192 * @throws ExecutionException If something happens in the parallel execution 193 */ 194 public void writeTo(ZipArchiveOutputStream targetStream) 195 throws IOException, InterruptedException, ExecutionException { 196 197 // Make sure we catch any exceptions from parallel phase 198 for (Future<?> future : futures) { 199 future.get(); 200 } 201 202 es.shutdown(); 203 es.awaitTermination(1000 * 60, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete 204 205 // It is important that all threads terminate before we go on, ensure happens-before relationship 206 compressionDoneAt = System.currentTimeMillis(); 207 208 for (ScatterZipOutputStream scatterStream : streams) { 209 scatterStream.writeTo(targetStream); 210 scatterStream.close(); 211 } 212 213 scatterDoneAt = System.currentTimeMillis(); 214 } 215 216 /** 217 * Returns a message describing the overall statistics of the compression run 218 * 219 * @return A string 220 */ 221 public ScatterStatistics getStatisticsMessage() { 222 return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt); 223 } 224} 225