001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.util; 019 020 import java.io.BufferedReader; 021 import java.io.File; 022 import java.io.FileInputStream; 023 import java.io.IOException; 024 import java.io.InputStream; 025 import java.io.InputStreamReader; 026 import java.security.DigestInputStream; 027 import java.security.MessageDigest; 028 import java.util.regex.Matcher; 029 import java.util.regex.Pattern; 030 031 import org.apache.commons.logging.Log; 032 import org.apache.commons.logging.LogFactory; 033 import org.apache.hadoop.io.IOUtils; 034 import org.apache.hadoop.io.MD5Hash; 035 import org.apache.hadoop.util.StringUtils; 036 037 import com.google.common.base.Charsets; 038 039 /** 040 * Static functions for dealing with files of the same format 041 * that the Unix "md5sum" utility writes. 042 */ 043 public abstract class MD5FileUtils { 044 private static final Log LOG = LogFactory.getLog( 045 MD5FileUtils.class); 046 047 public static final String MD5_SUFFIX = ".md5"; 048 private static final Pattern LINE_REGEX = 049 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)"); 050 051 /** 052 * Verify that the previously saved md5 for the given file matches 053 * expectedMd5. 054 * @throws IOException 055 */ 056 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5) 057 throws IOException { 058 MD5Hash storedHash = readStoredMd5ForFile(dataFile); 059 // Check the hash itself 060 if (!expectedMD5.equals(storedHash)) { 061 throw new IOException( 062 "File " + dataFile + " did not match stored MD5 checksum " + 063 " (stored: " + storedHash + ", computed: " + expectedMD5); 064 } 065 } 066 067 /** 068 * Read the md5 checksum stored alongside the given file, or null 069 * if no md5 is stored. 070 * @param dataFile the file containing data 071 * @return the checksum stored in dataFile.md5 072 */ 073 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { 074 File md5File = getDigestFileForFile(dataFile); 075 076 String md5Line; 077 078 if (!md5File.exists()) { 079 return null; 080 } 081 082 BufferedReader reader = 083 new BufferedReader(new InputStreamReader(new FileInputStream( 084 md5File), Charsets.UTF_8)); 085 try { 086 md5Line = reader.readLine(); 087 if (md5Line == null) { md5Line = ""; } 088 md5Line = md5Line.trim(); 089 } catch (IOException ioe) { 090 throw new IOException("Error reading md5 file at " + md5File, ioe); 091 } finally { 092 IOUtils.cleanup(LOG, reader); 093 } 094 095 Matcher matcher = LINE_REGEX.matcher(md5Line); 096 if (!matcher.matches()) { 097 throw new IOException("Invalid MD5 file at " + md5File 098 + " (does not match expected pattern)"); 099 } 100 String storedHash = matcher.group(1); 101 File referencedFile = new File(matcher.group(2)); 102 103 // Sanity check: Make sure that the file referenced in the .md5 file at 104 // least has the same name as the file we expect 105 if (!referencedFile.getName().equals(dataFile.getName())) { 106 throw new IOException( 107 "MD5 file at " + md5File + " references file named " + 108 referencedFile.getName() + " but we expected it to reference " + 109 dataFile); 110 } 111 return new MD5Hash(storedHash); 112 } 113 114 /** 115 * Read dataFile and compute its MD5 checksum. 116 */ 117 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { 118 InputStream in = new FileInputStream(dataFile); 119 try { 120 MessageDigest digester = MD5Hash.getDigester(); 121 DigestInputStream dis = new DigestInputStream(in, digester); 122 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024); 123 124 return new MD5Hash(digester.digest()); 125 } finally { 126 IOUtils.closeStream(in); 127 } 128 } 129 130 /** 131 * Save the ".md5" file that lists the md5sum of another file. 132 * @param dataFile the original file whose md5 was computed 133 * @param digest the computed digest 134 * @throws IOException 135 */ 136 public static void saveMD5File(File dataFile, MD5Hash digest) 137 throws IOException { 138 File md5File = getDigestFileForFile(dataFile); 139 String digestString = StringUtils.byteToHexString( 140 digest.getDigest()); 141 String md5Line = digestString + " *" + dataFile.getName() + "\n"; 142 143 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File); 144 afos.write(md5Line.getBytes(Charsets.UTF_8)); 145 afos.close(); 146 LOG.debug("Saved MD5 " + digest + " to " + md5File); 147 } 148 149 /** 150 * @return a reference to the file with .md5 suffix that will 151 * contain the md5 checksum for the given data file. 152 */ 153 public static File getDigestFileForFile(File file) { 154 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX); 155 } 156 }