001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.util;
019
020import java.io.BufferedReader;
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.FileReader;
024import java.io.IOException;
025import java.io.InputStream;
026import java.security.DigestInputStream;
027import java.security.MessageDigest;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.apache.hadoop.io.IOUtils;
034import org.apache.hadoop.io.MD5Hash;
035import org.apache.hadoop.util.StringUtils;
036
037/**
038 * Static functions for dealing with files of the same format
039 * that the Unix "md5sum" utility writes.
040 */
041public abstract class MD5FileUtils {
042  private static final Log LOG = LogFactory.getLog(
043      MD5FileUtils.class);
044
045  private static final String MD5_SUFFIX = ".md5";
046  private static final Pattern LINE_REGEX =
047    Pattern.compile("([0-9a-f]{32}) [ \\*](.+)");
048  
049  /**
050   * Verify that the previously saved md5 for the given file matches
051   * expectedMd5.
052   * @throws IOException 
053   */
054  public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5)
055      throws IOException {
056    MD5Hash storedHash = readStoredMd5ForFile(dataFile);
057    // Check the hash itself
058    if (!expectedMD5.equals(storedHash)) {
059      throw new IOException(
060          "File " + dataFile + " did not match stored MD5 checksum " +
061          " (stored: " + storedHash + ", computed: " + expectedMD5);
062    }
063  }
064  
065  /**
066   * Read the md5 checksum stored alongside the given file, or null
067   * if no md5 is stored.
068   * @param dataFile the file containing data
069   * @return the checksum stored in dataFile.md5
070   */
071  public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException {
072    File md5File = getDigestFileForFile(dataFile);
073
074    String md5Line;
075    
076    if (!md5File.exists()) {
077      return null;
078    }
079    
080    BufferedReader reader =
081      new BufferedReader(new FileReader(md5File));
082    try {
083      md5Line = reader.readLine();
084      if (md5Line == null) { md5Line = ""; }
085      md5Line = md5Line.trim();
086    } catch (IOException ioe) {
087      throw new IOException("Error reading md5 file at " + md5File, ioe);
088    } finally {
089      IOUtils.cleanup(LOG, reader);
090    }
091    
092    Matcher matcher = LINE_REGEX.matcher(md5Line);
093    if (!matcher.matches()) {
094      throw new IOException("Invalid MD5 file at " + md5File
095          + " (does not match expected pattern)");
096    }
097    String storedHash = matcher.group(1);
098    File referencedFile = new File(matcher.group(2));
099
100    // Sanity check: Make sure that the file referenced in the .md5 file at
101    // least has the same name as the file we expect
102    if (!referencedFile.getName().equals(dataFile.getName())) {
103      throw new IOException(
104          "MD5 file at " + md5File + " references file named " +
105          referencedFile.getName() + " but we expected it to reference " +
106          dataFile);
107    }
108    return new MD5Hash(storedHash);
109  }
110  
111  /**
112   * Read dataFile and compute its MD5 checksum.
113   */
114  public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
115    InputStream in = new FileInputStream(dataFile);
116    try {
117      MessageDigest digester = MD5Hash.getDigester();
118      DigestInputStream dis = new DigestInputStream(in, digester);
119      IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024);
120      
121      return new MD5Hash(digester.digest());
122    } finally {
123      IOUtils.closeStream(in);
124    }
125  }
126
127  /**
128   * Save the ".md5" file that lists the md5sum of another file.
129   * @param dataFile the original file whose md5 was computed
130   * @param digest the computed digest
131   * @throws IOException
132   */
133  public static void saveMD5File(File dataFile, MD5Hash digest)
134      throws IOException {
135    File md5File = getDigestFileForFile(dataFile);
136    String digestString = StringUtils.byteToHexString(
137        digest.getDigest());
138    String md5Line = digestString + " *" + dataFile.getName() + "\n";
139    
140    AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File);
141    afos.write(md5Line.getBytes());
142    afos.close();
143    LOG.debug("Saved MD5 " + digest + " to " + md5File);
144  }
145
146  /**
147   * @return a reference to the file with .md5 suffix that will
148   * contain the md5 checksum for the given data file.
149   */
150  public static File getDigestFileForFile(File file) {
151    return new File(file.getParentFile(), file.getName() + MD5_SUFFIX);
152  }
153}