001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.EOFException; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.Map; 025 026/** 027 * CodecEncoding is used to get the right Codec for a given meta-encoding 028 */ 029public class CodecEncoding { 030 031 /** 032 * The canonical encodings are defined to allow a single byte to represent one of the standard encodings. The 033 * following values are defined in the Pack200 specification, and this array cannot be changed. 034 */ 035 private static final BHSDCodec[] canonicalCodec = {null, new BHSDCodec(1, 256), new BHSDCodec(1, 256, 1), 036 new BHSDCodec(1, 256, 0, 1), new BHSDCodec(1, 256, 1, 1), new BHSDCodec(2, 256), new BHSDCodec(2, 256, 1), 037 new BHSDCodec(2, 256, 0, 1), new BHSDCodec(2, 256, 1, 1), new BHSDCodec(3, 256), new BHSDCodec(3, 256, 1), 038 new BHSDCodec(3, 256, 0, 1), new BHSDCodec(3, 256, 1, 1), new BHSDCodec(4, 256), new BHSDCodec(4, 256, 1), 039 new BHSDCodec(4, 256, 0, 1), new BHSDCodec(4, 256, 1, 1), new BHSDCodec(5, 4), new BHSDCodec(5, 4, 1), 040 new BHSDCodec(5, 4, 2), new BHSDCodec(5, 16), new BHSDCodec(5, 16, 1), new BHSDCodec(5, 16, 2), 041 new BHSDCodec(5, 32), new BHSDCodec(5, 32, 1), new BHSDCodec(5, 32, 2), new BHSDCodec(5, 64), 042 new BHSDCodec(5, 64, 1), new BHSDCodec(5, 64, 2), new BHSDCodec(5, 128), new BHSDCodec(5, 128, 1), 043 new BHSDCodec(5, 128, 2), new BHSDCodec(5, 4, 0, 1), new BHSDCodec(5, 4, 1, 1), new BHSDCodec(5, 4, 2, 1), 044 new BHSDCodec(5, 16, 0, 1), new BHSDCodec(5, 16, 1, 1), new BHSDCodec(5, 16, 2, 1), new BHSDCodec(5, 32, 0, 1), 045 new BHSDCodec(5, 32, 1, 1), new BHSDCodec(5, 32, 2, 1), new BHSDCodec(5, 64, 0, 1), new BHSDCodec(5, 64, 1, 1), 046 new BHSDCodec(5, 64, 2, 1), new BHSDCodec(5, 128, 0, 1), new BHSDCodec(5, 128, 1, 1), 047 new BHSDCodec(5, 128, 2, 1), new BHSDCodec(2, 192), new BHSDCodec(2, 224), new BHSDCodec(2, 240), 048 new BHSDCodec(2, 248), new BHSDCodec(2, 252), new BHSDCodec(2, 8, 0, 1), new BHSDCodec(2, 8, 1, 1), 049 new BHSDCodec(2, 16, 0, 1), new BHSDCodec(2, 16, 1, 1), new BHSDCodec(2, 32, 0, 1), new BHSDCodec(2, 32, 1, 1), 050 new BHSDCodec(2, 64, 0, 1), new BHSDCodec(2, 64, 1, 1), new BHSDCodec(2, 128, 0, 1), 051 new BHSDCodec(2, 128, 1, 1), new BHSDCodec(2, 192, 0, 1), new BHSDCodec(2, 192, 1, 1), 052 new BHSDCodec(2, 224, 0, 1), new BHSDCodec(2, 224, 1, 1), new BHSDCodec(2, 240, 0, 1), 053 new BHSDCodec(2, 240, 1, 1), new BHSDCodec(2, 248, 0, 1), new BHSDCodec(2, 248, 1, 1), new BHSDCodec(3, 192), 054 new BHSDCodec(3, 224), new BHSDCodec(3, 240), new BHSDCodec(3, 248), new BHSDCodec(3, 252), 055 new BHSDCodec(3, 8, 0, 1), new BHSDCodec(3, 8, 1, 1), new BHSDCodec(3, 16, 0, 1), new BHSDCodec(3, 16, 1, 1), 056 new BHSDCodec(3, 32, 0, 1), new BHSDCodec(3, 32, 1, 1), new BHSDCodec(3, 64, 0, 1), new BHSDCodec(3, 64, 1, 1), 057 new BHSDCodec(3, 128, 0, 1), new BHSDCodec(3, 128, 1, 1), new BHSDCodec(3, 192, 0, 1), 058 new BHSDCodec(3, 192, 1, 1), new BHSDCodec(3, 224, 0, 1), new BHSDCodec(3, 224, 1, 1), 059 new BHSDCodec(3, 240, 0, 1), new BHSDCodec(3, 240, 1, 1), new BHSDCodec(3, 248, 0, 1), 060 new BHSDCodec(3, 248, 1, 1), new BHSDCodec(4, 192), new BHSDCodec(4, 224), new BHSDCodec(4, 240), 061 new BHSDCodec(4, 248), new BHSDCodec(4, 252), new BHSDCodec(4, 8, 0, 1), new BHSDCodec(4, 8, 1, 1), 062 new BHSDCodec(4, 16, 0, 1), new BHSDCodec(4, 16, 1, 1), new BHSDCodec(4, 32, 0, 1), new BHSDCodec(4, 32, 1, 1), 063 new BHSDCodec(4, 64, 0, 1), new BHSDCodec(4, 64, 1, 1), new BHSDCodec(4, 128, 0, 1), 064 new BHSDCodec(4, 128, 1, 1), new BHSDCodec(4, 192, 0, 1), new BHSDCodec(4, 192, 1, 1), 065 new BHSDCodec(4, 224, 0, 1), new BHSDCodec(4, 224, 1, 1), new BHSDCodec(4, 240, 0, 1), 066 new BHSDCodec(4, 240, 1, 1), new BHSDCodec(4, 248, 0, 1), new BHSDCodec(4, 248, 1, 1)}; 067 068 private static Map<BHSDCodec, Integer> canonicalCodecsToSpecifiers; 069 070 public static BHSDCodec getCanonicalCodec(final int i) { 071 return canonicalCodec[i]; 072 } 073 074 /** 075 * Returns the codec specified by the given value byte and optional byte header. If the value is >= 116, then 076 * bytes may be consumed from the secondary input stream, which is taken to be the contents of the band_headers byte 077 * array. Since the values from this are consumed and not repeated, the input stream should be reused for subsequent 078 * encodings. This does not therefore close the input stream. 079 * 080 * @param value the canonical encoding value 081 * @param in the input stream to read additional byte headers from 082 * @param defaultCodec TODO 083 * @return the corresponding codec, or {@code null} if the default should be used 084 * 085 * @throws IOException if there is a problem reading from the input stream (which in reality, is never, since the 086 * band_headers are likely stored in a byte array and accessed via a ByteArrayInputStream. However, an 087 * EOFException could occur if things go wrong) 088 * @throws Pack200Exception TODO 089 */ 090 public static Codec getCodec(final int value, final InputStream in, final Codec defaultCodec) 091 throws IOException, Pack200Exception { 092 // Sanity check to make sure that no-one has changed 093 // the canonical codecs, which would really cause havoc 094 if (canonicalCodec.length != 116) { 095 throw new Error("Canonical encodings have been incorrectly modified"); 096 } 097 if (value < 0) { 098 throw new IllegalArgumentException("Encoding cannot be less than zero"); 099 } 100 if (value == 0) { 101 return defaultCodec; 102 } 103 if (value <= 115) { 104 return canonicalCodec[value]; 105 } 106 if (value == 116) { 107 int code = in.read(); 108 if (code == -1) { 109 throw new EOFException("End of buffer read whilst trying to decode codec"); 110 } 111 final int d = (code & 0x01); 112 final int s = (code >> 1 & 0x03); 113 final int b = (code >> 3 & 0x07) + 1; // this might result in an invalid 114 // number, but it's checked in the 115 // Codec constructor 116 code = in.read(); 117 if (code == -1) { 118 throw new EOFException("End of buffer read whilst trying to decode codec"); 119 } 120 final int h = code + 1; 121 // This handles the special cases for invalid combinations of data. 122 return new BHSDCodec(b, h, s, d); 123 } 124 if (value >= 117 && value <= 140) { // Run codec 125 final int offset = value - 117; 126 final int kx = offset & 3; 127 final boolean kbflag = (offset >> 2 & 1) == 1; 128 final boolean adef = (offset >> 3 & 1) == 1; 129 final boolean bdef = (offset >> 4 & 1) == 1; 130 // If both A and B use the default encoding, what's the point of 131 // having a run of default values followed by default values 132 if (adef && bdef) { 133 throw new Pack200Exception("ADef and BDef should never both be true"); 134 } 135 final int kb = (kbflag ? in.read() : 3); 136 final int k = (kb + 1) * (int) Math.pow(16, kx); 137 Codec aCodec, bCodec; 138 if (adef) { 139 aCodec = defaultCodec; 140 } else { 141 aCodec = getCodec(in.read(), in, defaultCodec); 142 } 143 if (bdef) { 144 bCodec = defaultCodec; 145 } else { 146 bCodec = getCodec(in.read(), in, defaultCodec); 147 } 148 return new RunCodec(k, aCodec, bCodec); 149 } 150 if ((value < 141) || (value > 188)) { 151 throw new Pack200Exception("Invalid codec encoding byte (" + value + ") found"); 152 } 153 final int offset = value - 141; 154 final boolean fdef = (offset & 1) == 1; 155 final boolean udef = (offset >> 1 & 1) == 1; 156 final int tdefl = offset >> 2; 157 final boolean tdef = tdefl != 0; 158 // From section 6.7.3 of spec 159 final int[] tdefToL = {0, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252}; 160 final int l = tdefToL[tdefl]; 161 // NOTE: Do not re-factor this to bring out uCodec; the order in 162 // which 163 // they are read from the stream is important 164 if (tdef) { 165 final Codec fCodec = (fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec)); 166 final Codec uCodec = (udef ? defaultCodec : getCodec(in.read(), in, defaultCodec)); 167 // Unfortunately, if tdef, then tCodec depends both on l and 168 // also on k, the 169 // number of items read from the fCodec. So we don't know in 170 // advance what 171 // the codec will be. 172 return new PopulationCodec(fCodec, l, uCodec); 173 } 174 final Codec fCodec = (fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec)); 175 final Codec tCodec = getCodec(in.read(), in, defaultCodec); 176 final Codec uCodec = (udef ? defaultCodec : getCodec(in.read(), in, defaultCodec)); 177 return new PopulationCodec(fCodec, tCodec, uCodec); 178 } 179 180 public static int[] getSpecifier(final Codec codec, final Codec defaultForBand) { 181 // lazy initialization 182 if (canonicalCodecsToSpecifiers == null) { 183 final HashMap<BHSDCodec, Integer> reverseMap = new HashMap<>(canonicalCodec.length); 184 for (int i = 0; i < canonicalCodec.length; i++) { 185 reverseMap.put(canonicalCodec[i], Integer.valueOf(i)); 186 } 187 canonicalCodecsToSpecifiers = reverseMap; 188 } 189 190 if (canonicalCodecsToSpecifiers.containsKey(codec)) { 191 return new int[] { canonicalCodecsToSpecifiers.get(codec).intValue() }; 192 } 193 if (codec instanceof BHSDCodec) { 194 // Cache these? 195 final BHSDCodec bhsdCodec = (BHSDCodec) codec; 196 final int[] specifiers = new int[3]; 197 specifiers[0] = 116; 198 specifiers[1] = (bhsdCodec.isDelta() ? 1 : 0) + 2 * bhsdCodec.getS() + 8 * (bhsdCodec.getB() - 1); 199 specifiers[2] = bhsdCodec.getH() - 1; 200 return specifiers; 201 } 202 if (codec instanceof RunCodec) { 203 final RunCodec runCodec = (RunCodec) codec; 204 final int k = runCodec.getK(); 205 int kb; 206 int kx; 207 if (k <= 256) { 208 kb = 0; 209 kx = k - 1; 210 } else if (k <= 4096) { 211 kb = 1; 212 kx = k / 16 - 1; 213 } else if (k <= 65536) { 214 kb = 2; 215 kx = k / 256 - 1; 216 } else { 217 kb = 3; 218 kx = k / 4096 - 1; 219 } 220 final Codec aCodec = runCodec.getACodec(); 221 final Codec bCodec = runCodec.getBCodec(); 222 int abDef = 0; 223 if (aCodec.equals(defaultForBand)) { 224 abDef = 1; 225 } else if (bCodec.equals(defaultForBand)) { 226 abDef = 2; 227 } 228 final int first = 117 + kb + (kx == 3 ? 0 : 4) + (8 * abDef); 229 final int[] aSpecifier = abDef == 1 ? new int[0] : getSpecifier(aCodec, defaultForBand); 230 final int[] bSpecifier = abDef == 2 ? new int[0] : getSpecifier(bCodec, defaultForBand); 231 final int[] specifier = new int[1 + (kx == 3 ? 0 : 1) + aSpecifier.length + bSpecifier.length]; 232 specifier[0] = first; 233 int index = 1; 234 if (kx != 3) { 235 specifier[1] = kx; 236 index++; 237 } 238 for (final int element : aSpecifier) { 239 specifier[index] = element; 240 index++; 241 } 242 for (final int element : bSpecifier) { 243 specifier[index] = element; 244 index++; 245 } 246 return specifier; 247 } 248 if (codec instanceof PopulationCodec) { 249 final PopulationCodec populationCodec = (PopulationCodec) codec; 250 final Codec tokenCodec = populationCodec.getTokenCodec(); 251 final Codec favouredCodec = populationCodec.getFavouredCodec(); 252 final Codec unfavouredCodec = populationCodec.getUnfavouredCodec(); 253 final int fDef = favouredCodec.equals(defaultForBand) ? 1 : 0; 254 final int uDef = unfavouredCodec.equals(defaultForBand) ? 1 : 0; 255 int tDefL = 0; 256 final int[] favoured = populationCodec.getFavoured(); 257 if (favoured != null) { 258 if (tokenCodec == Codec.BYTE1) { 259 tDefL = 1; 260 } else if (tokenCodec instanceof BHSDCodec) { 261 final BHSDCodec tokenBHSD = (BHSDCodec) tokenCodec; 262 if (tokenBHSD.getS() == 0) { 263 final int[] possibleLValues = {4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252}; 264 final int l = 256 - tokenBHSD.getH(); 265 int index = Arrays.binarySearch(possibleLValues, l); 266 if (index != -1) { 267 // TODO: check range is ok for ks 268 tDefL = index++; 269 } 270 } 271 } 272 } 273 final int first = 141 + fDef + (2 * uDef) + (4 * tDefL); 274 final int[] favouredSpecifier = fDef == 1 ? new int[0] : getSpecifier(favouredCodec, defaultForBand); 275 final int[] tokenSpecifier = tDefL != 0 ? new int[0] : getSpecifier(tokenCodec, defaultForBand); 276 final int[] unfavouredSpecifier = uDef == 1 ? new int[0] : getSpecifier(unfavouredCodec, defaultForBand); 277 final int[] specifier = new int[1 + favouredSpecifier.length + unfavouredSpecifier.length 278 + tokenSpecifier.length]; 279 specifier[0] = first; 280 int index = 1; 281 for (final int element : favouredSpecifier) { 282 specifier[index] = element; 283 index++; 284 } 285 for (final int element : tokenSpecifier) { 286 specifier[index] = element; 287 index++; 288 } 289 for (final int element : unfavouredSpecifier) { 290 specifier[index] = element; 291 index++; 292 } 293 return specifier; 294 } 295 296 return null; 297 } 298 299 public static int getSpecifierForDefaultCodec(final BHSDCodec defaultCodec) { 300 return getSpecifier(defaultCodec, null)[0]; 301 } 302}