001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.hdfs; 020 021 import com.google.common.collect.Iterators; 022 import com.google.common.util.concurrent.UncheckedExecutionException; 023 import org.apache.hadoop.classification.InterfaceAudience; 024 import org.apache.hadoop.classification.InterfaceStability; 025 import org.apache.hadoop.hdfs.inotify.Event; 026 import org.apache.hadoop.hdfs.inotify.EventsList; 027 import org.apache.hadoop.hdfs.inotify.MissingEventsException; 028 import org.apache.hadoop.hdfs.protocol.ClientProtocol; 029 import org.apache.hadoop.util.Time; 030 import org.slf4j.Logger; 031 import org.slf4j.LoggerFactory; 032 033 import java.io.IOException; 034 import java.util.Iterator; 035 import java.util.Random; 036 import java.util.concurrent.Callable; 037 import java.util.concurrent.ExecutionException; 038 import java.util.concurrent.ExecutorService; 039 import java.util.concurrent.Executors; 040 import java.util.concurrent.Future; 041 import java.util.concurrent.TimeUnit; 042 import java.util.concurrent.TimeoutException; 043 044 /** 045 * Stream for reading inotify events. DFSInotifyEventInputStreams should not 046 * be shared among multiple threads. 047 */ 048 @InterfaceAudience.Public 049 @InterfaceStability.Unstable 050 public class DFSInotifyEventInputStream { 051 public static Logger LOG = LoggerFactory.getLogger(DFSInotifyEventInputStream 052 .class); 053 054 private final ClientProtocol namenode; 055 private Iterator<Event> it; 056 private long lastReadTxid; 057 /** 058 * The most recent txid the NameNode told us it has sync'ed -- helps us 059 * determine how far behind we are in the edit stream. 060 */ 061 private long syncTxid; 062 /** 063 * Used to generate wait times in {@link DFSInotifyEventInputStream#take()}. 064 */ 065 private Random rng = new Random(); 066 067 private static final int INITIAL_WAIT_MS = 10; 068 069 DFSInotifyEventInputStream(ClientProtocol namenode) throws IOException { 070 this(namenode, namenode.getCurrentEditLogTxid()); // only consider new txn's 071 } 072 073 DFSInotifyEventInputStream(ClientProtocol namenode, long lastReadTxid) 074 throws IOException { 075 this.namenode = namenode; 076 this.it = Iterators.emptyIterator(); 077 this.lastReadTxid = lastReadTxid; 078 } 079 080 /** 081 * Returns the next event in the stream or null if no new events are currently 082 * available. 083 * 084 * @throws IOException because of network error or edit log 085 * corruption. Also possible if JournalNodes are unresponsive in the 086 * QJM setting (even one unresponsive JournalNode is enough in rare cases), 087 * so catching this exception and retrying at least a few times is 088 * recommended. 089 * @throws MissingEventsException if we cannot return the next event in the 090 * stream because the data for the event (and possibly some subsequent events) 091 * has been deleted (generally because this stream is a very large number of 092 * events behind the current state of the NameNode). It is safe to continue 093 * reading from the stream after this exception is thrown -- the next 094 * available event will be returned. 095 */ 096 public Event poll() throws IOException, MissingEventsException { 097 // need to keep retrying until the NN sends us the latest committed txid 098 if (lastReadTxid == -1) { 099 LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN"); 100 lastReadTxid = namenode.getCurrentEditLogTxid(); 101 return null; 102 } 103 if (!it.hasNext()) { 104 EventsList el = namenode.getEditsFromTxid(lastReadTxid + 1); 105 if (el.getLastTxid() != -1) { 106 // we only want to set syncTxid when we were actually able to read some 107 // edits on the NN -- otherwise it will seem like edits are being 108 // generated faster than we can read them when the problem is really 109 // that we are temporarily unable to read edits 110 syncTxid = el.getSyncTxid(); 111 it = el.getEvents().iterator(); 112 long formerLastReadTxid = lastReadTxid; 113 lastReadTxid = el.getLastTxid(); 114 if (el.getFirstTxid() != formerLastReadTxid + 1) { 115 throw new MissingEventsException(formerLastReadTxid + 1, 116 el.getFirstTxid()); 117 } 118 } else { 119 LOG.debug("poll(): read no edits from the NN when requesting edits " + 120 "after txid {}", lastReadTxid); 121 return null; 122 } 123 } 124 125 if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the 126 // newly seen edit log ops actually got converted to events 127 return it.next(); 128 } else { 129 return null; 130 } 131 } 132 133 /** 134 * Return a estimate of how many events behind the NameNode's current state 135 * this stream is. Clients should periodically call this method and check if 136 * its result is steadily increasing, which indicates that they are falling 137 * behind (i.e. events are being generated faster than the client is reading 138 * them). If a client falls too far behind events may be deleted before the 139 * client can read them. 140 * <p/> 141 * A return value of -1 indicates that an estimate could not be produced, and 142 * should be ignored. The value returned by this method is really only useful 143 * when compared to previous or subsequent returned values. 144 */ 145 public long getEventsBehindEstimate() { 146 if (syncTxid == 0) { 147 return -1; 148 } else { 149 assert syncTxid >= lastReadTxid; 150 // this gives the difference between the last txid we have fetched to the 151 // client and syncTxid at the time we last fetched events from the 152 // NameNode 153 return syncTxid - lastReadTxid; 154 } 155 } 156 157 /** 158 * Returns the next event in the stream, waiting up to the specified amount of 159 * time for a new event. Returns null if a new event is not available at the 160 * end of the specified amount of time. The time before the method returns may 161 * exceed the specified amount of time by up to the time required for an RPC 162 * to the NameNode. 163 * 164 * @param time number of units of the given TimeUnit to wait 165 * @param tu the desired TimeUnit 166 * @throws IOException see {@link DFSInotifyEventInputStream#poll()} 167 * @throws MissingEventsException 168 * see {@link DFSInotifyEventInputStream#poll()} 169 * @throws InterruptedException if the calling thread is interrupted 170 */ 171 public Event poll(long time, TimeUnit tu) throws IOException, 172 InterruptedException, MissingEventsException { 173 long initialTime = Time.monotonicNow(); 174 long totalWait = TimeUnit.MILLISECONDS.convert(time, tu); 175 long nextWait = INITIAL_WAIT_MS; 176 Event next = null; 177 while ((next = poll()) == null) { 178 long timeLeft = totalWait - (Time.monotonicNow() - initialTime); 179 if (timeLeft <= 0) { 180 LOG.debug("timed poll(): timed out"); 181 break; 182 } else if (timeLeft < nextWait * 2) { 183 nextWait = timeLeft; 184 } else { 185 nextWait *= 2; 186 } 187 LOG.debug("timed poll(): poll() returned null, sleeping for {} ms", 188 nextWait); 189 Thread.sleep(nextWait); 190 } 191 192 return next; 193 } 194 195 /** 196 * Returns the next event in the stream, waiting indefinitely if a new event 197 * is not immediately available. 198 * 199 * @throws IOException see {@link DFSInotifyEventInputStream#poll()} 200 * @throws MissingEventsException see 201 * {@link DFSInotifyEventInputStream#poll()} 202 * @throws InterruptedException if the calling thread is interrupted 203 */ 204 public Event take() throws IOException, InterruptedException, 205 MissingEventsException { 206 Event next = null; 207 int nextWaitMin = INITIAL_WAIT_MS; 208 while ((next = poll()) == null) { 209 // sleep for a random period between nextWaitMin and nextWaitMin * 2 210 // to avoid stampedes at the NN if there are multiple clients 211 int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin); 212 LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime); 213 Thread.sleep(sleepTime); 214 // the maximum sleep is 2 minutes 215 nextWaitMin = Math.min(60000, nextWaitMin * 2); 216 } 217 218 return next; 219 } 220 }