001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.util.concurrent;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.base.Predicates.equalTo;
021import static com.google.common.base.Predicates.in;
022import static com.google.common.base.Predicates.instanceOf;
023import static com.google.common.base.Predicates.not;
024import static com.google.common.util.concurrent.Internal.toNanosSaturated;
025import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
026import static com.google.common.util.concurrent.Service.State.FAILED;
027import static com.google.common.util.concurrent.Service.State.NEW;
028import static com.google.common.util.concurrent.Service.State.RUNNING;
029import static com.google.common.util.concurrent.Service.State.STARTING;
030import static com.google.common.util.concurrent.Service.State.STOPPING;
031import static com.google.common.util.concurrent.Service.State.TERMINATED;
032import static java.util.concurrent.TimeUnit.MILLISECONDS;
033
034import com.google.common.annotations.GwtIncompatible;
035import com.google.common.annotations.J2ktIncompatible;
036import com.google.common.base.Function;
037import com.google.common.base.MoreObjects;
038import com.google.common.base.Stopwatch;
039import com.google.common.collect.Collections2;
040import com.google.common.collect.ImmutableCollection;
041import com.google.common.collect.ImmutableList;
042import com.google.common.collect.ImmutableMap;
043import com.google.common.collect.ImmutableSet;
044import com.google.common.collect.ImmutableSetMultimap;
045import com.google.common.collect.Lists;
046import com.google.common.collect.Maps;
047import com.google.common.collect.MultimapBuilder;
048import com.google.common.collect.Multimaps;
049import com.google.common.collect.Multiset;
050import com.google.common.collect.Ordering;
051import com.google.common.collect.SetMultimap;
052import com.google.common.util.concurrent.Service.State;
053import com.google.errorprone.annotations.CanIgnoreReturnValue;
054import com.google.errorprone.annotations.concurrent.GuardedBy;
055import com.google.j2objc.annotations.J2ObjCIncompatible;
056import com.google.j2objc.annotations.WeakOuter;
057import java.lang.ref.WeakReference;
058import java.time.Duration;
059import java.util.Collections;
060import java.util.EnumSet;
061import java.util.List;
062import java.util.Map;
063import java.util.Map.Entry;
064import java.util.concurrent.Executor;
065import java.util.concurrent.TimeUnit;
066import java.util.concurrent.TimeoutException;
067import java.util.logging.Level;
068
069/**
070 * A manager for monitoring and controlling a set of {@linkplain Service services}. This class
071 * provides methods for {@linkplain #startAsync() starting}, {@linkplain #stopAsync() stopping} and
072 * {@linkplain #servicesByState inspecting} a collection of {@linkplain Service services}.
073 * Additionally, users can monitor state transitions with the {@linkplain Listener listener}
074 * mechanism.
075 *
076 * <p>While it is recommended that service lifecycles be managed via this class, state transitions
077 * initiated via other mechanisms do not impact the correctness of its methods. For example, if the
078 * services are started by some mechanism besides {@link #startAsync}, the listeners will be invoked
079 * when appropriate and {@link #awaitHealthy} will still work as expected.
080 *
081 * <p>Here is a simple example of how to use a {@code ServiceManager} to start a server.
082 *
083 * <pre>{@code
084 * class Server {
085 *   public static void main(String[] args) {
086 *     Set<Service> services = ...;
087 *     ServiceManager manager = new ServiceManager(services);
088 *     manager.addListener(new Listener() {
089 *         public void stopped() {}
090 *         public void healthy() {
091 *           // Services have been initialized and are healthy, start accepting requests...
092 *         }
093 *         public void failure(Service service) {
094 *           // Something failed, at this point we could log it, notify a load balancer, or take
095 *           // some other action.  For now we will just exit.
096 *           System.exit(1);
097 *         }
098 *       },
099 *       MoreExecutors.directExecutor());
100 *
101 *     Runtime.getRuntime().addShutdownHook(new Thread() {
102 *       public void run() {
103 *         // Give the services 5 seconds to stop to ensure that we are responsive to shutdown
104 *         // requests.
105 *         try {
106 *           manager.stopAsync().awaitStopped(5, TimeUnit.SECONDS);
107 *         } catch (TimeoutException timeout) {
108 *           // stopping timed out
109 *         }
110 *       }
111 *     });
112 *     manager.startAsync();  // start all the services asynchronously
113 *   }
114 * }
115 * }</pre>
116 *
117 * <p>This class uses the ServiceManager's methods to start all of its services, to respond to
118 * service failure and to ensure that when the JVM is shutting down all the services are stopped.
119 *
120 * @author Luke Sandberg
121 * @since 14.0
122 */
123@J2ktIncompatible
124@GwtIncompatible
125@ElementTypesAreNonnullByDefault
126public final class ServiceManager implements ServiceManagerBridge {
127  private static final LazyLogger logger = new LazyLogger(ServiceManager.class);
128  private static final ListenerCallQueue.Event<Listener> HEALTHY_EVENT =
129      new ListenerCallQueue.Event<Listener>() {
130        @Override
131        public void call(Listener listener) {
132          listener.healthy();
133        }
134
135        @Override
136        public String toString() {
137          return "healthy()";
138        }
139      };
140  private static final ListenerCallQueue.Event<Listener> STOPPED_EVENT =
141      new ListenerCallQueue.Event<Listener>() {
142        @Override
143        public void call(Listener listener) {
144          listener.stopped();
145        }
146
147        @Override
148        public String toString() {
149          return "stopped()";
150        }
151      };
152
153  /**
154   * A listener for the aggregate state changes of the services that are under management. Users
155   * that need to listen to more fine-grained events (such as when each particular {@linkplain
156   * Service service} starts, or terminates), should attach {@linkplain Service.Listener service
157   * listeners} to each individual service.
158   *
159   * @author Luke Sandberg
160   * @since 15.0 (present as an interface in 14.0)
161   */
162  public abstract static class Listener {
163    /**
164     * Called when the service initially becomes healthy.
165     *
166     * <p>This will be called at most once after all the services have entered the {@linkplain
167     * State#RUNNING running} state. If any services fail during start up or {@linkplain
168     * State#FAILED fail}/{@linkplain State#TERMINATED terminate} before all other services have
169     * started {@linkplain State#RUNNING running} then this method will not be called.
170     */
171    public void healthy() {}
172
173    /**
174     * Called when the all of the component services have reached a terminal state, either
175     * {@linkplain State#TERMINATED terminated} or {@linkplain State#FAILED failed}.
176     */
177    public void stopped() {}
178
179    /**
180     * Called when a component service has {@linkplain State#FAILED failed}.
181     *
182     * @param service The service that failed.
183     */
184    public void failure(Service service) {}
185  }
186
187  /**
188   * An encapsulation of all of the state that is accessed by the {@linkplain ServiceListener
189   * service listeners}. This is extracted into its own object so that {@link ServiceListener} could
190   * be made {@code static} and its instances can be safely constructed and added in the {@link
191   * ServiceManager} constructor without having to close over the partially constructed {@link
192   * ServiceManager} instance (i.e. avoid leaking a pointer to {@code this}).
193   */
194  private final ServiceManagerState state;
195
196  private final ImmutableList<Service> services;
197
198  /**
199   * Constructs a new instance for managing the given services.
200   *
201   * @param services The services to manage
202   * @throws IllegalArgumentException if not all services are {@linkplain State#NEW new} or if there
203   *     are any duplicate services.
204   */
205  public ServiceManager(Iterable<? extends Service> services) {
206    ImmutableList<Service> copy = ImmutableList.copyOf(services);
207    if (copy.isEmpty()) {
208      // Having no services causes the manager to behave strangely. Notably, listeners are never
209      // fired. To avoid this we substitute a placeholder service.
210      logger
211          .get()
212          .log(
213              Level.WARNING,
214              "ServiceManager configured with no services.  Is your application configured"
215                  + " properly?",
216              new EmptyServiceManagerWarning());
217      copy = ImmutableList.<Service>of(new NoOpService());
218    }
219    this.state = new ServiceManagerState(copy);
220    this.services = copy;
221    WeakReference<ServiceManagerState> stateReference = new WeakReference<>(state);
222    for (Service service : copy) {
223      service.addListener(new ServiceListener(service, stateReference), directExecutor());
224      // We check the state after adding the listener as a way to ensure that our listener was added
225      // to a NEW service.
226      checkArgument(service.state() == NEW, "Can only manage NEW services, %s", service);
227    }
228    // We have installed all of our listeners and after this point any state transition should be
229    // correct.
230    this.state.markReady();
231  }
232
233  /**
234   * Registers a {@link Listener} to be {@linkplain Executor#execute executed} on the given
235   * executor. The listener will not have previous state changes replayed, so it is suggested that
236   * listeners are added before any of the managed services are {@linkplain Service#startAsync
237   * started}.
238   *
239   * <p>{@code addListener} guarantees execution ordering across calls to a given listener but not
240   * across calls to multiple listeners. Specifically, a given listener will have its callbacks
241   * invoked in the same order as the underlying service enters those states. Additionally, at most
242   * one of the listener's callbacks will execute at once. However, multiple listeners' callbacks
243   * may execute concurrently, and listeners may execute in an order different from the one in which
244   * they were registered.
245   *
246   * <p>RuntimeExceptions thrown by a listener will be caught and logged. Any exception thrown
247   * during {@code Executor.execute} (e.g., a {@code RejectedExecutionException}) will be caught and
248   * logged.
249   *
250   * <p>When selecting an executor, note that {@code directExecutor} is dangerous in some cases. See
251   * the discussion in the {@link ListenableFuture#addListener ListenableFuture.addListener}
252   * documentation.
253   *
254   * @param listener the listener to run when the manager changes state
255   * @param executor the executor in which the listeners callback methods will be run.
256   */
257  public void addListener(Listener listener, Executor executor) {
258    state.addListener(listener, executor);
259  }
260
261  /**
262   * Initiates service {@linkplain Service#startAsync startup} on all the services being managed. It
263   * is only valid to call this method if all of the services are {@linkplain State#NEW new}.
264   *
265   * @return this
266   * @throws IllegalStateException if any of the Services are not {@link State#NEW new} when the
267   *     method is called.
268   */
269  @CanIgnoreReturnValue
270  public ServiceManager startAsync() {
271    for (Service service : services) {
272      checkState(service.state() == NEW, "Not all services are NEW, cannot start %s", this);
273    }
274    for (Service service : services) {
275      try {
276        state.tryStartTiming(service);
277        service.startAsync();
278      } catch (IllegalStateException e) {
279        // This can happen if the service has already been started or stopped (e.g. by another
280        // service or listener). Our contract says it is safe to call this method if
281        // all services were NEW when it was called, and this has already been verified above, so we
282        // don't propagate the exception.
283        logger.get().log(Level.WARNING, "Unable to start Service " + service, e);
284      }
285    }
286    return this;
287  }
288
289  /**
290   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy}. The manager
291   * will become healthy after all the component services have reached the {@linkplain State#RUNNING
292   * running} state.
293   *
294   * @throws IllegalStateException if the service manager reaches a state from which it cannot
295   *     become {@linkplain #isHealthy() healthy}.
296   */
297  public void awaitHealthy() {
298    state.awaitHealthy();
299  }
300
301  /**
302   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy} for no more
303   * than the given time. The manager will become healthy after all the component services have
304   * reached the {@linkplain State#RUNNING running} state.
305   *
306   * @param timeout the maximum time to wait
307   * @throws TimeoutException if not all of the services have finished starting within the deadline
308   * @throws IllegalStateException if the service manager reaches a state from which it cannot
309   *     become {@linkplain #isHealthy() healthy}.
310   * @since 28.0
311   */
312  public void awaitHealthy(Duration timeout) throws TimeoutException {
313    awaitHealthy(toNanosSaturated(timeout), TimeUnit.NANOSECONDS);
314  }
315
316  /**
317   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy} for no more
318   * than the given time. The manager will become healthy after all the component services have
319   * reached the {@linkplain State#RUNNING running} state.
320   *
321   * @param timeout the maximum time to wait
322   * @param unit the time unit of the timeout argument
323   * @throws TimeoutException if not all of the services have finished starting within the deadline
324   * @throws IllegalStateException if the service manager reaches a state from which it cannot
325   *     become {@linkplain #isHealthy() healthy}.
326   */
327  @SuppressWarnings("GoodTime") // should accept a java.time.Duration
328  public void awaitHealthy(long timeout, TimeUnit unit) throws TimeoutException {
329    state.awaitHealthy(timeout, unit);
330  }
331
332  /**
333   * Initiates service {@linkplain Service#stopAsync shutdown} if necessary on all the services
334   * being managed.
335   *
336   * @return this
337   */
338  @CanIgnoreReturnValue
339  public ServiceManager stopAsync() {
340    for (Service service : services) {
341      service.stopAsync();
342    }
343    return this;
344  }
345
346  /**
347   * Waits for the all the services to reach a terminal state. After this method returns all
348   * services will either be {@linkplain Service.State#TERMINATED terminated} or {@linkplain
349   * Service.State#FAILED failed}.
350   */
351  public void awaitStopped() {
352    state.awaitStopped();
353  }
354
355  /**
356   * Waits for the all the services to reach a terminal state for no more than the given time. After
357   * this method returns all services will either be {@linkplain Service.State#TERMINATED
358   * terminated} or {@linkplain Service.State#FAILED failed}.
359   *
360   * @param timeout the maximum time to wait
361   * @throws TimeoutException if not all of the services have stopped within the deadline
362   * @since 28.0
363   */
364  public void awaitStopped(Duration timeout) throws TimeoutException {
365    awaitStopped(toNanosSaturated(timeout), TimeUnit.NANOSECONDS);
366  }
367
368  /**
369   * Waits for the all the services to reach a terminal state for no more than the given time. After
370   * this method returns all services will either be {@linkplain Service.State#TERMINATED
371   * terminated} or {@linkplain Service.State#FAILED failed}.
372   *
373   * @param timeout the maximum time to wait
374   * @param unit the time unit of the timeout argument
375   * @throws TimeoutException if not all of the services have stopped within the deadline
376   */
377  @SuppressWarnings("GoodTime") // should accept a java.time.Duration
378  public void awaitStopped(long timeout, TimeUnit unit) throws TimeoutException {
379    state.awaitStopped(timeout, unit);
380  }
381
382  /**
383   * Returns true if all services are currently in the {@linkplain State#RUNNING running} state.
384   *
385   * <p>Users who want more detailed information should use the {@link #servicesByState} method to
386   * get detailed information about which services are not running.
387   */
388  public boolean isHealthy() {
389    for (Service service : services) {
390      if (!service.isRunning()) {
391        return false;
392      }
393    }
394    return true;
395  }
396
397  /**
398   * Provides a snapshot of the current state of all the services under management.
399   *
400   * <p>N.B. This snapshot is guaranteed to be consistent, i.e. the set of states returned will
401   * correspond to a point in time view of the services.
402   *
403   * @since 29.0 (present with return type {@code ImmutableMultimap} since 14.0)
404   */
405  @Override
406  public ImmutableSetMultimap<State, Service> servicesByState() {
407    return state.servicesByState();
408  }
409
410  /**
411   * Returns the service load times. This value will only return startup times for services that
412   * have finished starting.
413   *
414   * @return Map of services and their corresponding startup time in millis, the map entries will be
415   *     ordered by startup time.
416   */
417  public ImmutableMap<Service, Long> startupTimes() {
418    return state.startupTimes();
419  }
420
421  /**
422   * Returns the service load times. This value will only return startup times for services that
423   * have finished starting.
424   *
425   * @return Map of services and their corresponding startup time, the map entries will be ordered
426   *     by startup time.
427   * @since 31.0
428   */
429  @J2ObjCIncompatible
430  public ImmutableMap<Service, Duration> startupDurations() {
431    return ImmutableMap.copyOf(
432        Maps.<Service, Long, Duration>transformValues(startupTimes(), Duration::ofMillis));
433  }
434
435  @Override
436  public String toString() {
437    return MoreObjects.toStringHelper(ServiceManager.class)
438        .add("services", Collections2.filter(services, not(instanceOf(NoOpService.class))))
439        .toString();
440  }
441
442  /**
443   * An encapsulation of all the mutable state of the {@link ServiceManager} that needs to be
444   * accessed by instances of {@link ServiceListener}.
445   */
446  private static final class ServiceManagerState {
447    final Monitor monitor = new Monitor();
448
449    @GuardedBy("monitor")
450    final SetMultimap<State, Service> servicesByState =
451        MultimapBuilder.enumKeys(State.class).linkedHashSetValues().build();
452
453    @GuardedBy("monitor")
454    final Multiset<State> states = servicesByState.keys();
455
456    @GuardedBy("monitor")
457    final Map<Service, Stopwatch> startupTimers = Maps.newIdentityHashMap();
458
459    /**
460     * These two booleans are used to mark the state as ready to start.
461     *
462     * <p>{@link #ready}: is set by {@link #markReady} to indicate that all listeners have been
463     * correctly installed
464     *
465     * <p>{@link #transitioned}: is set by {@link #transitionService} to indicate that some
466     * transition has been performed.
467     *
468     * <p>Together, they allow us to enforce that all services have their listeners installed prior
469     * to any service performing a transition, then we can fail in the ServiceManager constructor
470     * rather than in a Service.Listener callback.
471     */
472    @GuardedBy("monitor")
473    boolean ready;
474
475    @GuardedBy("monitor")
476    boolean transitioned;
477
478    final int numberOfServices;
479
480    /**
481     * Controls how long to wait for all the services to either become healthy or reach a state from
482     * which it is guaranteed that it can never become healthy.
483     */
484    final Monitor.Guard awaitHealthGuard = new AwaitHealthGuard();
485
486    @WeakOuter
487    final class AwaitHealthGuard extends Monitor.Guard {
488      AwaitHealthGuard() {
489        super(ServiceManagerState.this.monitor);
490      }
491
492      @Override
493      @GuardedBy("ServiceManagerState.this.monitor")
494      public boolean isSatisfied() {
495        // All services have started or some service has terminated/failed.
496        return states.count(RUNNING) == numberOfServices
497            || states.contains(STOPPING)
498            || states.contains(TERMINATED)
499            || states.contains(FAILED);
500      }
501    }
502
503    /** Controls how long to wait for all services to reach a terminal state. */
504    final Monitor.Guard stoppedGuard = new StoppedGuard();
505
506    @WeakOuter
507    final class StoppedGuard extends Monitor.Guard {
508      StoppedGuard() {
509        super(ServiceManagerState.this.monitor);
510      }
511
512      @Override
513      @GuardedBy("ServiceManagerState.this.monitor")
514      public boolean isSatisfied() {
515        return states.count(TERMINATED) + states.count(FAILED) == numberOfServices;
516      }
517    }
518
519    /** The listeners to notify during a state transition. */
520    final ListenerCallQueue<Listener> listeners = new ListenerCallQueue<>();
521
522    /**
523     * It is implicitly assumed that all the services are NEW and that they will all remain NEW
524     * until all the Listeners are installed and {@link #markReady()} is called. It is our caller's
525     * responsibility to only call {@link #markReady()} if all services were new at the time this
526     * method was called and when all the listeners were installed.
527     */
528    ServiceManagerState(ImmutableCollection<Service> services) {
529      this.numberOfServices = services.size();
530      servicesByState.putAll(NEW, services);
531    }
532
533    /**
534     * Attempts to start the timer immediately prior to the service being started via {@link
535     * Service#startAsync()}.
536     */
537    void tryStartTiming(Service service) {
538      monitor.enter();
539      try {
540        Stopwatch stopwatch = startupTimers.get(service);
541        if (stopwatch == null) {
542          startupTimers.put(service, Stopwatch.createStarted());
543        }
544      } finally {
545        monitor.leave();
546      }
547    }
548
549    /**
550     * Marks the {@link State} as ready to receive transitions. Returns true if no transitions have
551     * been observed yet.
552     */
553    void markReady() {
554      monitor.enter();
555      try {
556        if (!transitioned) {
557          // nothing has transitioned since construction, good.
558          ready = true;
559        } else {
560          // This should be an extremely rare race condition.
561          List<Service> servicesInBadStates = Lists.newArrayList();
562          for (Service service : servicesByState().values()) {
563            if (service.state() != NEW) {
564              servicesInBadStates.add(service);
565            }
566          }
567          throw new IllegalArgumentException(
568              "Services started transitioning asynchronously before "
569                  + "the ServiceManager was constructed: "
570                  + servicesInBadStates);
571        }
572      } finally {
573        monitor.leave();
574      }
575    }
576
577    void addListener(Listener listener, Executor executor) {
578      listeners.addListener(listener, executor);
579    }
580
581    void awaitHealthy() {
582      monitor.enterWhenUninterruptibly(awaitHealthGuard);
583      try {
584        checkHealthy();
585      } finally {
586        monitor.leave();
587      }
588    }
589
590    void awaitHealthy(long timeout, TimeUnit unit) throws TimeoutException {
591      monitor.enter();
592      try {
593        if (!monitor.waitForUninterruptibly(awaitHealthGuard, timeout, unit)) {
594          throw new TimeoutException(
595              "Timeout waiting for the services to become healthy. The "
596                  + "following services have not started: "
597                  + Multimaps.filterKeys(servicesByState, in(ImmutableSet.of(NEW, STARTING))));
598        }
599        checkHealthy();
600      } finally {
601        monitor.leave();
602      }
603    }
604
605    void awaitStopped() {
606      monitor.enterWhenUninterruptibly(stoppedGuard);
607      monitor.leave();
608    }
609
610    void awaitStopped(long timeout, TimeUnit unit) throws TimeoutException {
611      monitor.enter();
612      try {
613        if (!monitor.waitForUninterruptibly(stoppedGuard, timeout, unit)) {
614          throw new TimeoutException(
615              "Timeout waiting for the services to stop. The following "
616                  + "services have not stopped: "
617                  + Multimaps.filterKeys(servicesByState, not(in(EnumSet.of(TERMINATED, FAILED)))));
618        }
619      } finally {
620        monitor.leave();
621      }
622    }
623
624    ImmutableSetMultimap<State, Service> servicesByState() {
625      ImmutableSetMultimap.Builder<State, Service> builder = ImmutableSetMultimap.builder();
626      monitor.enter();
627      try {
628        for (Entry<State, Service> entry : servicesByState.entries()) {
629          if (!(entry.getValue() instanceof NoOpService)) {
630            builder.put(entry);
631          }
632        }
633      } finally {
634        monitor.leave();
635      }
636      return builder.build();
637    }
638
639    ImmutableMap<Service, Long> startupTimes() {
640      List<Entry<Service, Long>> loadTimes;
641      monitor.enter();
642      try {
643        loadTimes = Lists.newArrayListWithCapacity(startupTimers.size());
644        // N.B. There will only be an entry in the map if the service has started
645        for (Entry<Service, Stopwatch> entry : startupTimers.entrySet()) {
646          Service service = entry.getKey();
647          Stopwatch stopwatch = entry.getValue();
648          if (!stopwatch.isRunning() && !(service instanceof NoOpService)) {
649            loadTimes.add(Maps.immutableEntry(service, stopwatch.elapsed(MILLISECONDS)));
650          }
651        }
652      } finally {
653        monitor.leave();
654      }
655      Collections.sort(
656          loadTimes,
657          Ordering.natural()
658              .onResultOf(
659                  new Function<Entry<Service, Long>, Long>() {
660                    @Override
661                    public Long apply(Entry<Service, Long> input) {
662                      return input.getValue();
663                    }
664                  }));
665      return ImmutableMap.copyOf(loadTimes);
666    }
667
668    /**
669     * Updates the state with the given service transition.
670     *
671     * <p>This method performs the main logic of ServiceManager in the following steps.
672     *
673     * <ol>
674     *   <li>Update the {@link #servicesByState()}
675     *   <li>Update the {@link #startupTimers}
676     *   <li>Based on the new state queue listeners to run
677     *   <li>Run the listeners (outside of the lock)
678     * </ol>
679     */
680    void transitionService(final Service service, State from, State to) {
681      checkNotNull(service);
682      checkArgument(from != to);
683      monitor.enter();
684      try {
685        transitioned = true;
686        if (!ready) {
687          return;
688        }
689        // Update state.
690        checkState(
691            servicesByState.remove(from, service),
692            "Service %s not at the expected location in the state map %s",
693            service,
694            from);
695        checkState(
696            servicesByState.put(to, service),
697            "Service %s in the state map unexpectedly at %s",
698            service,
699            to);
700        // Update the timer
701        Stopwatch stopwatch = startupTimers.get(service);
702        if (stopwatch == null) {
703          // This means the service was started by some means other than ServiceManager.startAsync
704          stopwatch = Stopwatch.createStarted();
705          startupTimers.put(service, stopwatch);
706        }
707        if (to.compareTo(RUNNING) >= 0 && stopwatch.isRunning()) {
708          // N.B. if we miss the STARTING event then we may never record a startup time.
709          stopwatch.stop();
710          if (!(service instanceof NoOpService)) {
711            logger.get().log(Level.FINE, "Started {0} in {1}.", new Object[] {service, stopwatch});
712          }
713        }
714        // Queue our listeners
715
716        // Did a service fail?
717        if (to == FAILED) {
718          enqueueFailedEvent(service);
719        }
720
721        if (states.count(RUNNING) == numberOfServices) {
722          // This means that the manager is currently healthy. N.B. If other threads call isHealthy
723          // they are not guaranteed to get 'true', because any service could fail right now.
724          enqueueHealthyEvent();
725        } else if (states.count(TERMINATED) + states.count(FAILED) == numberOfServices) {
726          enqueueStoppedEvent();
727        }
728      } finally {
729        monitor.leave();
730        // Run our executors outside of the lock
731        dispatchListenerEvents();
732      }
733    }
734
735    void enqueueStoppedEvent() {
736      listeners.enqueue(STOPPED_EVENT);
737    }
738
739    void enqueueHealthyEvent() {
740      listeners.enqueue(HEALTHY_EVENT);
741    }
742
743    void enqueueFailedEvent(final Service service) {
744      listeners.enqueue(
745          new ListenerCallQueue.Event<Listener>() {
746            @Override
747            public void call(Listener listener) {
748              listener.failure(service);
749            }
750
751            @Override
752            public String toString() {
753              return "failed({service=" + service + "})";
754            }
755          });
756    }
757
758    /** Attempts to execute all the listeners in {@link #listeners}. */
759    void dispatchListenerEvents() {
760      checkState(
761          !monitor.isOccupiedByCurrentThread(),
762          "It is incorrect to execute listeners with the monitor held.");
763      listeners.dispatch();
764    }
765
766    @GuardedBy("monitor")
767    void checkHealthy() {
768      if (states.count(RUNNING) != numberOfServices) {
769        IllegalStateException exception =
770            new IllegalStateException(
771                "Expected to be healthy after starting. The following services are not running: "
772                    + Multimaps.filterKeys(servicesByState, not(equalTo(RUNNING))));
773        for (Service service : servicesByState.get(State.FAILED)) {
774          exception.addSuppressed(new FailedService(service));
775        }
776        throw exception;
777      }
778    }
779  }
780
781  /**
782   * A {@link Service} that wraps another service and times how long it takes for it to start and
783   * also calls the {@link ServiceManagerState#transitionService(Service, State, State)}, to record
784   * the state transitions.
785   */
786  private static final class ServiceListener extends Service.Listener {
787    final Service service;
788    // We store the state in a weak reference to ensure that if something went wrong while
789    // constructing the ServiceManager we don't pointlessly keep updating the state.
790    final WeakReference<ServiceManagerState> state;
791
792    ServiceListener(Service service, WeakReference<ServiceManagerState> state) {
793      this.service = service;
794      this.state = state;
795    }
796
797    @Override
798    public void starting() {
799      ServiceManagerState state = this.state.get();
800      if (state != null) {
801        state.transitionService(service, NEW, STARTING);
802        if (!(service instanceof NoOpService)) {
803          logger.get().log(Level.FINE, "Starting {0}.", service);
804        }
805      }
806    }
807
808    @Override
809    public void running() {
810      ServiceManagerState state = this.state.get();
811      if (state != null) {
812        state.transitionService(service, STARTING, RUNNING);
813      }
814    }
815
816    @Override
817    public void stopping(State from) {
818      ServiceManagerState state = this.state.get();
819      if (state != null) {
820        state.transitionService(service, from, STOPPING);
821      }
822    }
823
824    @Override
825    public void terminated(State from) {
826      ServiceManagerState state = this.state.get();
827      if (state != null) {
828        if (!(service instanceof NoOpService)) {
829          logger
830              .get()
831              .log(
832                  Level.FINE,
833                  "Service {0} has terminated. Previous state was: {1}",
834                  new Object[] {service, from});
835        }
836        state.transitionService(service, from, TERMINATED);
837      }
838    }
839
840    @Override
841    public void failed(State from, Throwable failure) {
842      ServiceManagerState state = this.state.get();
843      if (state != null) {
844        // Log before the transition, so that if the process exits in response to server failure,
845        // there is a higher likelihood that the cause will be in the logs.
846        boolean log = !(service instanceof NoOpService);
847        /*
848         * We have already exposed startup exceptions to the user in the form of suppressed
849         * exceptions. We don't need to log those exceptions again.
850         */
851        log &= from != State.STARTING;
852        if (log) {
853          logger
854              .get()
855              .log(
856                  Level.SEVERE,
857                  "Service " + service + " has failed in the " + from + " state.",
858                  failure);
859        }
860        state.transitionService(service, from, FAILED);
861      }
862    }
863  }
864
865  /**
866   * A {@link Service} instance that does nothing. This is only useful as a placeholder to ensure
867   * that the {@link ServiceManager} functions properly even when it is managing no services.
868   *
869   * <p>The use of this class is considered an implementation detail of ServiceManager and as such
870   * it is excluded from {@link #servicesByState}, {@link #startupTimes}, {@link #toString} and all
871   * logging statements.
872   */
873  private static final class NoOpService extends AbstractService {
874    @Override
875    protected void doStart() {
876      notifyStarted();
877    }
878
879    @Override
880    protected void doStop() {
881      notifyStopped();
882    }
883  }
884
885  /** This is never thrown but only used for logging. */
886  private static final class EmptyServiceManagerWarning extends Throwable {}
887
888  private static final class FailedService extends Throwable {
889    FailedService(Service service) {
890      super(
891          service.toString(),
892          service.failureCause(),
893          false /* don't enable suppression */,
894          false /* don't calculate a stack trace. */);
895    }
896  }
897}