001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.net.unix;
019    
020    import java.io.Closeable;
021    import java.io.EOFException;
022    
023    import org.apache.hadoop.classification.InterfaceAudience;
024    import org.apache.hadoop.io.IOUtils;
025    
026    import java.io.IOException;
027    import java.nio.channels.ClosedChannelException;
028    import java.util.Iterator;
029    import java.util.LinkedList;
030    import java.util.TreeMap;
031    import java.util.Map;
032    import java.util.concurrent.locks.Condition;
033    import java.util.concurrent.locks.ReentrantLock;
034    
035    import org.apache.commons.lang.SystemUtils;
036    import org.apache.commons.logging.Log;
037    import org.apache.commons.logging.LogFactory;
038    import org.apache.hadoop.util.NativeCodeLoader;
039    
040    import com.google.common.annotations.VisibleForTesting;
041    import com.google.common.base.Preconditions;
042    import com.google.common.util.concurrent.Uninterruptibles;
043    
044    /**
045     * The DomainSocketWatcher watches a set of domain sockets to see when they
046     * become readable, or closed.  When one of those events happens, it makes a
047     * callback.
048     *
049     * See {@link DomainSocket} for more information about UNIX domain sockets.
050     */
051    @InterfaceAudience.LimitedPrivate("HDFS")
052    public final class DomainSocketWatcher implements Closeable {
053      static {
054        if (SystemUtils.IS_OS_WINDOWS) {
055          loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
056        } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
057          loadingFailureReason = "libhadoop cannot be loaded.";
058        } else {
059          String problem;
060          try {
061            anchorNative();
062            problem = null;
063          } catch (Throwable t) {
064            problem = "DomainSocketWatcher#anchorNative got error: " +
065              t.getMessage();
066          }
067          loadingFailureReason = problem;
068        }
069      }
070    
071      static Log LOG = LogFactory.getLog(DomainSocketWatcher.class);
072    
073      /**
074       * The reason why DomainSocketWatcher is not available, or null if it is
075       * available.
076       */
077      private final static String loadingFailureReason;
078    
079      /**
080       * Initializes the native library code.
081       */
082      private static native void anchorNative();
083    
084      public static String getLoadingFailureReason() {
085        return loadingFailureReason;
086      }
087    
088      public interface Handler {
089        /**
090         * Handles an event on a socket.  An event may be the socket becoming
091         * readable, or the remote end being closed.
092         *
093         * @param sock    The socket that the event occurred on.
094         * @return        Whether we should close the socket.
095         */
096        boolean handle(DomainSocket sock);
097      }
098    
099      /**
100       * Handler for {DomainSocketWatcher#notificationSockets[1]}
101       */
102      private class NotificationHandler implements Handler {
103        public boolean handle(DomainSocket sock) {
104          assert(lock.isHeldByCurrentThread());
105          try {
106            if (LOG.isTraceEnabled()) {
107              LOG.trace(this + ": NotificationHandler: doing a read on " +
108                sock.fd);
109            }
110            if (sock.getInputStream().read() == -1) {
111              if (LOG.isTraceEnabled()) {
112                LOG.trace(this + ": NotificationHandler: got EOF on " + sock.fd);
113              }
114              throw new EOFException();
115            }
116            if (LOG.isTraceEnabled()) {
117              LOG.trace(this + ": NotificationHandler: read succeeded on " +
118                sock.fd);
119            }
120            return false;
121          } catch (IOException e) {
122            if (LOG.isTraceEnabled()) {
123              LOG.trace(this + ": NotificationHandler: setting closed to " +
124                  "true for " + sock.fd);
125            }
126            closed = true;
127            return true;
128          }
129        }
130      }
131    
132      private static class Entry {
133        final DomainSocket socket;
134        final Handler handler;
135    
136        Entry(DomainSocket socket, Handler handler) {
137          this.socket = socket;
138          this.handler = handler;
139        }
140    
141        DomainSocket getDomainSocket() {
142          return socket;
143        }
144    
145        Handler getHandler() {
146          return handler;
147        }
148      }
149    
150      /**
151       * The FdSet is a set of file descriptors that gets passed to poll(2).
152       * It contains a native memory segment, so that we don't have to copy
153       * in the poll0 function.
154       */
155      private static class FdSet {
156        private long data;
157    
158        private native static long alloc0();
159    
160        FdSet() {
161          data = alloc0();
162        }
163    
164        /**
165         * Add a file descriptor to the set.
166         *
167         * @param fd   The file descriptor to add.
168         */
169        native void add(int fd);
170    
171        /**
172         * Remove a file descriptor from the set.
173         *
174         * @param fd   The file descriptor to remove.
175         */
176        native void remove(int fd);
177    
178        /**
179         * Get an array containing all the FDs marked as readable.
180         * Also clear the state of all FDs.
181         *
182         * @return     An array containing all of the currently readable file
183         *             descriptors.
184         */
185        native int[] getAndClearReadableFds();
186    
187        /**
188         * Close the object and de-allocate the memory used.
189         */
190        native void close();
191      }
192    
193      /**
194       * Lock which protects toAdd, toRemove, and closed.
195       */
196      private final ReentrantLock lock = new ReentrantLock();
197    
198      /**
199       * Condition variable which indicates that toAdd and toRemove have been
200       * processed.
201       */
202      private final Condition processedCond = lock.newCondition();
203    
204      /**
205       * Entries to add.
206       */
207      private final LinkedList<Entry> toAdd =
208          new LinkedList<Entry>();
209    
210      /**
211       * Entries to remove.
212       */
213      private final TreeMap<Integer, DomainSocket> toRemove =
214          new TreeMap<Integer, DomainSocket>();
215    
216      /**
217       * Maximum length of time to go between checking whether the interrupted
218       * bit has been set for this thread.
219       */
220      private final int interruptCheckPeriodMs;
221    
222      /**
223       * A pair of sockets used to wake up the thread after it has called poll(2).
224       */
225      private final DomainSocket notificationSockets[];
226    
227      /**
228       * Whether or not this DomainSocketWatcher is closed.
229       */
230      private boolean closed = false;
231    
232      public DomainSocketWatcher(int interruptCheckPeriodMs) throws IOException {
233        if (loadingFailureReason != null) {
234          throw new UnsupportedOperationException(loadingFailureReason);
235        }
236        Preconditions.checkArgument(interruptCheckPeriodMs > 0);
237        this.interruptCheckPeriodMs = interruptCheckPeriodMs;
238        notificationSockets = DomainSocket.socketpair();
239        watcherThread.setDaemon(true);
240        watcherThread.start();
241      }
242    
243      /**
244       * Close the DomainSocketWatcher and wait for its thread to terminate.
245       *
246       * If there is more than one close, all but the first will be ignored.
247       */
248      @Override
249      public void close() throws IOException {
250        lock.lock();
251        try {
252          if (closed) return;
253          if (LOG.isDebugEnabled()) {
254            LOG.debug(this + ": closing");
255          }
256          closed = true;
257        } finally {
258          lock.unlock();
259        }
260        // Close notificationSockets[0], so that notificationSockets[1] gets an EOF
261        // event.  This will wake up the thread immediately if it is blocked inside
262        // the select() system call.
263        notificationSockets[0].close();
264        // Wait for the select thread to terminate.
265        Uninterruptibles.joinUninterruptibly(watcherThread);
266      }
267    
268      @VisibleForTesting
269      public boolean isClosed() {
270        lock.lock();
271        try {
272          return closed;
273        } finally {
274          lock.unlock();
275        }
276      }
277    
278      /**
279       * Add a socket.
280       *
281       * @param sock     The socket to add.  It is an error to re-add a socket that
282       *                   we are already watching.
283       * @param handler  The handler to associate with this socket.  This may be
284       *                   called any time after this function is called.
285       */
286      public void add(DomainSocket sock, Handler handler) {
287        lock.lock();
288        try {
289          if (closed) {
290            handler.handle(sock);
291            IOUtils.cleanup(LOG, sock);
292            return;
293          }
294          Entry entry = new Entry(sock, handler);
295          try {
296            sock.refCount.reference();
297          } catch (ClosedChannelException e1) {
298            // If the socket is already closed before we add it, invoke the
299            // handler immediately.  Then we're done.
300            handler.handle(sock);
301            return;
302          }
303          toAdd.add(entry);
304          kick();
305          while (true) {
306            try {
307              processedCond.await();
308            } catch (InterruptedException e) {
309              Thread.currentThread().interrupt();
310            }
311            if (!toAdd.contains(entry)) {
312              break;
313            }
314          }
315        } finally {
316          lock.unlock();
317        }
318      }
319    
320      /**
321       * Remove a socket.  Its handler will be called.
322       *
323       * @param sock     The socket to remove.
324       */
325      public void remove(DomainSocket sock) {
326        lock.lock();
327        try {
328          if (closed) return;
329          toRemove.put(sock.fd, sock);
330          kick();
331          while (true) {
332            try {
333              processedCond.await();
334            } catch (InterruptedException e) {
335              Thread.currentThread().interrupt();
336            }
337            if (!toRemove.containsKey(sock.fd)) {
338              break;
339            }
340          }
341        } finally {
342          lock.unlock();
343        }
344      }
345    
346      /**
347       * Wake up the DomainSocketWatcher thread.
348       */
349      private void kick() {
350        assert(lock.isHeldByCurrentThread());
351        try {
352          notificationSockets[0].getOutputStream().write(0);
353        } catch (IOException e) {
354          if (!closed) {
355            LOG.error(this + ": error writing to notificationSockets[0]", e);
356          }
357        }
358      }
359    
360      private void sendCallback(String caller, TreeMap<Integer, Entry> entries,
361          FdSet fdSet, int fd) {
362        if (LOG.isTraceEnabled()) {
363          LOG.trace(this + ": " + caller + " starting sendCallback for fd " + fd);
364        }
365        Entry entry = entries.get(fd);
366        Preconditions.checkNotNull(entry,
367            this + ": fdSet contained " + fd + ", which we were " +
368            "not tracking.");
369        DomainSocket sock = entry.getDomainSocket();
370        if (entry.getHandler().handle(sock)) {
371          if (LOG.isTraceEnabled()) {
372            LOG.trace(this + ": " + caller + ": closing fd " + fd +
373                " at the request of the handler.");
374          }
375          if (toRemove.remove(fd) != null) {
376            if (LOG.isTraceEnabled()) {
377              LOG.trace(this + ": " + caller + " : sendCallback processed fd " +
378                fd  + " in toRemove.");
379            }
380          }
381          try {
382            sock.refCount.unreferenceCheckClosed();
383          } catch (IOException e) {
384            Preconditions.checkArgument(false,
385                this + ": file descriptor " + sock.fd + " was closed while " +
386                "still in the poll(2) loop.");
387          }
388          IOUtils.cleanup(LOG, sock);
389          entries.remove(fd);
390          fdSet.remove(fd);
391        } else {
392          if (LOG.isTraceEnabled()) {
393            LOG.trace(this + ": " + caller + ": sendCallback not " +
394                "closing fd " + fd);
395          }
396        }
397      }
398    
399      @VisibleForTesting
400      final Thread watcherThread = new Thread(new Runnable() {
401        @Override
402        public void run() {
403          if (LOG.isDebugEnabled()) {
404            LOG.debug(this + ": starting with interruptCheckPeriodMs = " +
405                interruptCheckPeriodMs);
406          }
407          final TreeMap<Integer, Entry> entries = new TreeMap<Integer, Entry>();
408          FdSet fdSet = new FdSet();
409          addNotificationSocket(entries, fdSet);
410          try {
411            while (true) {
412              lock.lock();
413              try {
414                for (int fd : fdSet.getAndClearReadableFds()) {
415                  sendCallback("getAndClearReadableFds", entries, fdSet, fd);
416                }
417                if (!(toAdd.isEmpty() && toRemove.isEmpty())) {
418                  // Handle pending additions (before pending removes).
419                  for (Iterator<Entry> iter = toAdd.iterator(); iter.hasNext(); ) {
420                    Entry entry = iter.next();
421                    DomainSocket sock = entry.getDomainSocket();
422                    Entry prevEntry = entries.put(sock.fd, entry);
423                    Preconditions.checkState(prevEntry == null,
424                        this + ": tried to watch a file descriptor that we " +
425                        "were already watching: " + sock);
426                    if (LOG.isTraceEnabled()) {
427                      LOG.trace(this + ": adding fd " + sock.fd);
428                    }
429                    fdSet.add(sock.fd);
430                    iter.remove();
431                  }
432                  // Handle pending removals
433                  while (true) {
434                    Map.Entry<Integer, DomainSocket> entry = toRemove.firstEntry();
435                    if (entry == null) break;
436                    sendCallback("handlePendingRemovals",
437                        entries, fdSet, entry.getValue().fd);
438                  }
439                  processedCond.signalAll();
440                }
441                // Check if the thread should terminate.  Doing this check now is
442                // easier than at the beginning of the loop, since we know toAdd and
443                // toRemove are now empty and processedCond has been notified if it
444                // needed to be.
445                if (closed) {
446                  if (LOG.isDebugEnabled()) {
447                    LOG.debug(toString() + " thread terminating.");
448                  }
449                  return;
450                }
451                // Check if someone sent our thread an InterruptedException while we
452                // were waiting in poll().
453                if (Thread.interrupted()) {
454                  throw new InterruptedException();
455                }
456              } finally {
457                lock.unlock();
458              }
459              doPoll0(interruptCheckPeriodMs, fdSet);
460            }
461          } catch (InterruptedException e) {
462            LOG.info(toString() + " terminating on InterruptedException");
463          } catch (IOException e) {
464            LOG.error(toString() + " terminating on IOException", e);
465          } finally {
466            lock.lock();
467            try {
468              kick(); // allow the handler for notificationSockets[0] to read a byte
469              for (Entry entry : entries.values()) {
470                sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
471              }
472              entries.clear();
473              fdSet.close();
474            } finally {
475              lock.unlock();
476            }
477          }
478        }
479      });
480    
481      private void addNotificationSocket(final TreeMap<Integer, Entry> entries,
482          FdSet fdSet) {
483        entries.put(notificationSockets[1].fd, 
484            new Entry(notificationSockets[1], new NotificationHandler()));
485        try {
486          notificationSockets[1].refCount.reference();
487        } catch (IOException e) {
488          throw new RuntimeException(e);
489        }
490        fdSet.add(notificationSockets[1].fd);
491        if (LOG.isTraceEnabled()) {
492          LOG.trace(this + ": adding notificationSocket " +
493              notificationSockets[1].fd + ", connected to " +
494              notificationSockets[0].fd);
495        }
496      }
497    
498      public String toString() {
499        return "DomainSocketWatcher(" + System.identityHashCode(this) + ")"; 
500      }
501    
502      private static native int doPoll0(int maxWaitMs, FdSet readFds)
503          throws IOException;
504    }