001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.util.concurrent;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.base.Predicates.equalTo;
021import static com.google.common.base.Predicates.in;
022import static com.google.common.base.Predicates.instanceOf;
023import static com.google.common.base.Predicates.not;
024import static com.google.common.util.concurrent.Internal.toNanosSaturated;
025import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
026import static com.google.common.util.concurrent.Service.State.FAILED;
027import static com.google.common.util.concurrent.Service.State.NEW;
028import static com.google.common.util.concurrent.Service.State.RUNNING;
029import static com.google.common.util.concurrent.Service.State.STARTING;
030import static com.google.common.util.concurrent.Service.State.STOPPING;
031import static com.google.common.util.concurrent.Service.State.TERMINATED;
032import static java.util.concurrent.TimeUnit.MILLISECONDS;
033
034import com.google.common.annotations.GwtIncompatible;
035import com.google.common.base.Function;
036import com.google.common.base.MoreObjects;
037import com.google.common.base.Stopwatch;
038import com.google.common.collect.Collections2;
039import com.google.common.collect.ImmutableCollection;
040import com.google.common.collect.ImmutableList;
041import com.google.common.collect.ImmutableMap;
042import com.google.common.collect.ImmutableSet;
043import com.google.common.collect.ImmutableSetMultimap;
044import com.google.common.collect.Lists;
045import com.google.common.collect.Maps;
046import com.google.common.collect.MultimapBuilder;
047import com.google.common.collect.Multimaps;
048import com.google.common.collect.Multiset;
049import com.google.common.collect.Ordering;
050import com.google.common.collect.SetMultimap;
051import com.google.common.util.concurrent.Service.State;
052import com.google.errorprone.annotations.CanIgnoreReturnValue;
053import com.google.errorprone.annotations.concurrent.GuardedBy;
054import com.google.j2objc.annotations.J2ObjCIncompatible;
055import com.google.j2objc.annotations.WeakOuter;
056import java.lang.ref.WeakReference;
057import java.time.Duration;
058import java.util.Collections;
059import java.util.EnumSet;
060import java.util.List;
061import java.util.Map;
062import java.util.Map.Entry;
063import java.util.concurrent.Executor;
064import java.util.concurrent.TimeUnit;
065import java.util.concurrent.TimeoutException;
066import java.util.logging.Level;
067import java.util.logging.Logger;
068
069/**
070 * A manager for monitoring and controlling a set of {@linkplain Service services}. This class
071 * provides methods for {@linkplain #startAsync() starting}, {@linkplain #stopAsync() stopping} and
072 * {@linkplain #servicesByState inspecting} a collection of {@linkplain Service services}.
073 * Additionally, users can monitor state transitions with the {@linkplain Listener listener}
074 * mechanism.
075 *
076 * <p>While it is recommended that service lifecycles be managed via this class, state transitions
077 * initiated via other mechanisms do not impact the correctness of its methods. For example, if the
078 * services are started by some mechanism besides {@link #startAsync}, the listeners will be invoked
079 * when appropriate and {@link #awaitHealthy} will still work as expected.
080 *
081 * <p>Here is a simple example of how to use a {@code ServiceManager} to start a server.
082 *
083 * <pre>{@code
084 * class Server {
085 *   public static void main(String[] args) {
086 *     Set<Service> services = ...;
087 *     ServiceManager manager = new ServiceManager(services);
088 *     manager.addListener(new Listener() {
089 *         public void stopped() {}
090 *         public void healthy() {
091 *           // Services have been initialized and are healthy, start accepting requests...
092 *         }
093 *         public void failure(Service service) {
094 *           // Something failed, at this point we could log it, notify a load balancer, or take
095 *           // some other action.  For now we will just exit.
096 *           System.exit(1);
097 *         }
098 *       },
099 *       MoreExecutors.directExecutor());
100 *
101 *     Runtime.getRuntime().addShutdownHook(new Thread() {
102 *       public void run() {
103 *         // Give the services 5 seconds to stop to ensure that we are responsive to shutdown
104 *         // requests.
105 *         try {
106 *           manager.stopAsync().awaitStopped(5, TimeUnit.SECONDS);
107 *         } catch (TimeoutException timeout) {
108 *           // stopping timed out
109 *         }
110 *       }
111 *     });
112 *     manager.startAsync();  // start all the services asynchronously
113 *   }
114 * }
115 * }</pre>
116 *
117 * <p>This class uses the ServiceManager's methods to start all of its services, to respond to
118 * service failure and to ensure that when the JVM is shutting down all the services are stopped.
119 *
120 * @author Luke Sandberg
121 * @since 14.0
122 */
123@GwtIncompatible
124@ElementTypesAreNonnullByDefault
125public final class ServiceManager implements ServiceManagerBridge {
126  private static final Logger logger = Logger.getLogger(ServiceManager.class.getName());
127  private static final ListenerCallQueue.Event<Listener> HEALTHY_EVENT =
128      new ListenerCallQueue.Event<Listener>() {
129        @Override
130        public void call(Listener listener) {
131          listener.healthy();
132        }
133
134        @Override
135        public String toString() {
136          return "healthy()";
137        }
138      };
139  private static final ListenerCallQueue.Event<Listener> STOPPED_EVENT =
140      new ListenerCallQueue.Event<Listener>() {
141        @Override
142        public void call(Listener listener) {
143          listener.stopped();
144        }
145
146        @Override
147        public String toString() {
148          return "stopped()";
149        }
150      };
151
152  /**
153   * A listener for the aggregate state changes of the services that are under management. Users
154   * that need to listen to more fine-grained events (such as when each particular {@linkplain
155   * Service service} starts, or terminates), should attach {@linkplain Service.Listener service
156   * listeners} to each individual service.
157   *
158   * @author Luke Sandberg
159   * @since 15.0 (present as an interface in 14.0)
160   */
161  public abstract static class Listener {
162    /**
163     * Called when the service initially becomes healthy.
164     *
165     * <p>This will be called at most once after all the services have entered the {@linkplain
166     * State#RUNNING running} state. If any services fail during start up or {@linkplain
167     * State#FAILED fail}/{@linkplain State#TERMINATED terminate} before all other services have
168     * started {@linkplain State#RUNNING running} then this method will not be called.
169     */
170    public void healthy() {}
171
172    /**
173     * Called when the all of the component services have reached a terminal state, either
174     * {@linkplain State#TERMINATED terminated} or {@linkplain State#FAILED failed}.
175     */
176    public void stopped() {}
177
178    /**
179     * Called when a component service has {@linkplain State#FAILED failed}.
180     *
181     * @param service The service that failed.
182     */
183    public void failure(Service service) {}
184  }
185
186  /**
187   * An encapsulation of all of the state that is accessed by the {@linkplain ServiceListener
188   * service listeners}. This is extracted into its own object so that {@link ServiceListener} could
189   * be made {@code static} and its instances can be safely constructed and added in the {@link
190   * ServiceManager} constructor without having to close over the partially constructed {@link
191   * ServiceManager} instance (i.e. avoid leaking a pointer to {@code this}).
192   */
193  private final ServiceManagerState state;
194
195  private final ImmutableList<Service> services;
196
197  /**
198   * Constructs a new instance for managing the given services.
199   *
200   * @param services The services to manage
201   * @throws IllegalArgumentException if not all services are {@linkplain State#NEW new} or if there
202   *     are any duplicate services.
203   */
204  public ServiceManager(Iterable<? extends Service> services) {
205    ImmutableList<Service> copy = ImmutableList.copyOf(services);
206    if (copy.isEmpty()) {
207      // Having no services causes the manager to behave strangely. Notably, listeners are never
208      // fired. To avoid this we substitute a placeholder service.
209      logger.log(
210          Level.WARNING,
211          "ServiceManager configured with no services.  Is your application configured properly?",
212          new EmptyServiceManagerWarning());
213      copy = ImmutableList.<Service>of(new NoOpService());
214    }
215    this.state = new ServiceManagerState(copy);
216    this.services = copy;
217    WeakReference<ServiceManagerState> stateReference = new WeakReference<>(state);
218    for (Service service : copy) {
219      service.addListener(new ServiceListener(service, stateReference), directExecutor());
220      // We check the state after adding the listener as a way to ensure that our listener was added
221      // to a NEW service.
222      checkArgument(service.state() == NEW, "Can only manage NEW services, %s", service);
223    }
224    // We have installed all of our listeners and after this point any state transition should be
225    // correct.
226    this.state.markReady();
227  }
228
229  /**
230   * Registers a {@link Listener} to be {@linkplain Executor#execute executed} on the given
231   * executor. The listener will not have previous state changes replayed, so it is suggested that
232   * listeners are added before any of the managed services are {@linkplain Service#startAsync
233   * started}.
234   *
235   * <p>{@code addListener} guarantees execution ordering across calls to a given listener but not
236   * across calls to multiple listeners. Specifically, a given listener will have its callbacks
237   * invoked in the same order as the underlying service enters those states. Additionally, at most
238   * one of the listener's callbacks will execute at once. However, multiple listeners' callbacks
239   * may execute concurrently, and listeners may execute in an order different from the one in which
240   * they were registered.
241   *
242   * <p>RuntimeExceptions thrown by a listener will be caught and logged. Any exception thrown
243   * during {@code Executor.execute} (e.g., a {@code RejectedExecutionException}) will be caught and
244   * logged.
245   *
246   * <p>When selecting an executor, note that {@code directExecutor} is dangerous in some cases. See
247   * the discussion in the {@link ListenableFuture#addListener ListenableFuture.addListener}
248   * documentation.
249   *
250   * @param listener the listener to run when the manager changes state
251   * @param executor the executor in which the listeners callback methods will be run.
252   */
253  public void addListener(Listener listener, Executor executor) {
254    state.addListener(listener, executor);
255  }
256
257  /**
258   * Initiates service {@linkplain Service#startAsync startup} on all the services being managed. It
259   * is only valid to call this method if all of the services are {@linkplain State#NEW new}.
260   *
261   * @return this
262   * @throws IllegalStateException if any of the Services are not {@link State#NEW new} when the
263   *     method is called.
264   */
265  @CanIgnoreReturnValue
266  public ServiceManager startAsync() {
267    for (Service service : services) {
268      State state = service.state();
269      checkState(state == NEW, "Service %s is %s, cannot start it.", service, state);
270    }
271    for (Service service : services) {
272      try {
273        state.tryStartTiming(service);
274        service.startAsync();
275      } catch (IllegalStateException e) {
276        // This can happen if the service has already been started or stopped (e.g. by another
277        // service or listener). Our contract says it is safe to call this method if
278        // all services were NEW when it was called, and this has already been verified above, so we
279        // don't propagate the exception.
280        logger.log(Level.WARNING, "Unable to start Service " + service, e);
281      }
282    }
283    return this;
284  }
285
286  /**
287   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy}. The manager
288   * will become healthy after all the component services have reached the {@linkplain State#RUNNING
289   * running} state.
290   *
291   * @throws IllegalStateException if the service manager reaches a state from which it cannot
292   *     become {@linkplain #isHealthy() healthy}.
293   */
294  public void awaitHealthy() {
295    state.awaitHealthy();
296  }
297
298  /**
299   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy} for no more
300   * than the given time. The manager will become healthy after all the component services have
301   * reached the {@linkplain State#RUNNING running} state.
302   *
303   * @param timeout the maximum time to wait
304   * @throws TimeoutException if not all of the services have finished starting within the deadline
305   * @throws IllegalStateException if the service manager reaches a state from which it cannot
306   *     become {@linkplain #isHealthy() healthy}.
307   * @since 28.0
308   */
309  public void awaitHealthy(Duration timeout) throws TimeoutException {
310    awaitHealthy(toNanosSaturated(timeout), TimeUnit.NANOSECONDS);
311  }
312
313  /**
314   * Waits for the {@link ServiceManager} to become {@linkplain #isHealthy() healthy} for no more
315   * than the given time. The manager will become healthy after all the component services have
316   * reached the {@linkplain State#RUNNING running} state.
317   *
318   * @param timeout the maximum time to wait
319   * @param unit the time unit of the timeout argument
320   * @throws TimeoutException if not all of the services have finished starting within the deadline
321   * @throws IllegalStateException if the service manager reaches a state from which it cannot
322   *     become {@linkplain #isHealthy() healthy}.
323   */
324  @SuppressWarnings("GoodTime") // should accept a java.time.Duration
325  public void awaitHealthy(long timeout, TimeUnit unit) throws TimeoutException {
326    state.awaitHealthy(timeout, unit);
327  }
328
329  /**
330   * Initiates service {@linkplain Service#stopAsync shutdown} if necessary on all the services
331   * being managed.
332   *
333   * @return this
334   */
335  @CanIgnoreReturnValue
336  public ServiceManager stopAsync() {
337    for (Service service : services) {
338      service.stopAsync();
339    }
340    return this;
341  }
342
343  /**
344   * Waits for the all the services to reach a terminal state. After this method returns all
345   * services will either be {@linkplain Service.State#TERMINATED terminated} or {@linkplain
346   * Service.State#FAILED failed}.
347   */
348  public void awaitStopped() {
349    state.awaitStopped();
350  }
351
352  /**
353   * Waits for the all the services to reach a terminal state for no more than the given time. After
354   * this method returns all services will either be {@linkplain Service.State#TERMINATED
355   * terminated} or {@linkplain Service.State#FAILED failed}.
356   *
357   * @param timeout the maximum time to wait
358   * @throws TimeoutException if not all of the services have stopped within the deadline
359   * @since 28.0
360   */
361  public void awaitStopped(Duration timeout) throws TimeoutException {
362    awaitStopped(toNanosSaturated(timeout), TimeUnit.NANOSECONDS);
363  }
364
365  /**
366   * Waits for the all the services to reach a terminal state for no more than the given time. After
367   * this method returns all services will either be {@linkplain Service.State#TERMINATED
368   * terminated} or {@linkplain Service.State#FAILED failed}.
369   *
370   * @param timeout the maximum time to wait
371   * @param unit the time unit of the timeout argument
372   * @throws TimeoutException if not all of the services have stopped within the deadline
373   */
374  @SuppressWarnings("GoodTime") // should accept a java.time.Duration
375  public void awaitStopped(long timeout, TimeUnit unit) throws TimeoutException {
376    state.awaitStopped(timeout, unit);
377  }
378
379  /**
380   * Returns true if all services are currently in the {@linkplain State#RUNNING running} state.
381   *
382   * <p>Users who want more detailed information should use the {@link #servicesByState} method to
383   * get detailed information about which services are not running.
384   */
385  public boolean isHealthy() {
386    for (Service service : services) {
387      if (!service.isRunning()) {
388        return false;
389      }
390    }
391    return true;
392  }
393
394  /**
395   * Provides a snapshot of the current state of all the services under management.
396   *
397   * <p>N.B. This snapshot is guaranteed to be consistent, i.e. the set of states returned will
398   * correspond to a point in time view of the services.
399   *
400   * @since 29.0 (present with return type {@code ImmutableMultimap} since 14.0)
401   */
402  @Override
403  public ImmutableSetMultimap<State, Service> servicesByState() {
404    return state.servicesByState();
405  }
406
407  /**
408   * Returns the service load times. This value will only return startup times for services that
409   * have finished starting.
410   *
411   * @return Map of services and their corresponding startup time in millis, the map entries will be
412   *     ordered by startup time.
413   */
414  public ImmutableMap<Service, Long> startupTimes() {
415    return state.startupTimes();
416  }
417
418  /**
419   * Returns the service load times. This value will only return startup times for services that
420   * have finished starting.
421   *
422   * @return Map of services and their corresponding startup time, the map entries will be ordered
423   *     by startup time.
424   * @since 31.0
425   */
426  @J2ObjCIncompatible
427  public ImmutableMap<Service, Duration> startupDurations() {
428    return ImmutableMap.copyOf(
429        Maps.<Service, Long, Duration>transformValues(startupTimes(), Duration::ofMillis));
430  }
431
432  @Override
433  public String toString() {
434    return MoreObjects.toStringHelper(ServiceManager.class)
435        .add("services", Collections2.filter(services, not(instanceOf(NoOpService.class))))
436        .toString();
437  }
438
439  /**
440   * An encapsulation of all the mutable state of the {@link ServiceManager} that needs to be
441   * accessed by instances of {@link ServiceListener}.
442   */
443  private static final class ServiceManagerState {
444    final Monitor monitor = new Monitor();
445
446    @GuardedBy("monitor")
447    final SetMultimap<State, Service> servicesByState =
448        MultimapBuilder.enumKeys(State.class).linkedHashSetValues().build();
449
450    @GuardedBy("monitor")
451    final Multiset<State> states = servicesByState.keys();
452
453    @GuardedBy("monitor")
454    final Map<Service, Stopwatch> startupTimers = Maps.newIdentityHashMap();
455
456    /**
457     * These two booleans are used to mark the state as ready to start.
458     *
459     * <p>{@link #ready}: is set by {@link #markReady} to indicate that all listeners have been
460     * correctly installed
461     *
462     * <p>{@link #transitioned}: is set by {@link #transitionService} to indicate that some
463     * transition has been performed.
464     *
465     * <p>Together, they allow us to enforce that all services have their listeners installed prior
466     * to any service performing a transition, then we can fail in the ServiceManager constructor
467     * rather than in a Service.Listener callback.
468     */
469    @GuardedBy("monitor")
470    boolean ready;
471
472    @GuardedBy("monitor")
473    boolean transitioned;
474
475    final int numberOfServices;
476
477    /**
478     * Controls how long to wait for all the services to either become healthy or reach a state from
479     * which it is guaranteed that it can never become healthy.
480     */
481    final Monitor.Guard awaitHealthGuard = new AwaitHealthGuard();
482
483    @WeakOuter
484    final class AwaitHealthGuard extends Monitor.Guard {
485      AwaitHealthGuard() {
486        super(ServiceManagerState.this.monitor);
487      }
488
489      @Override
490      @GuardedBy("ServiceManagerState.this.monitor")
491      public boolean isSatisfied() {
492        // All services have started or some service has terminated/failed.
493        return states.count(RUNNING) == numberOfServices
494            || states.contains(STOPPING)
495            || states.contains(TERMINATED)
496            || states.contains(FAILED);
497      }
498    }
499
500    /** Controls how long to wait for all services to reach a terminal state. */
501    final Monitor.Guard stoppedGuard = new StoppedGuard();
502
503    @WeakOuter
504    final class StoppedGuard extends Monitor.Guard {
505      StoppedGuard() {
506        super(ServiceManagerState.this.monitor);
507      }
508
509      @Override
510      @GuardedBy("ServiceManagerState.this.monitor")
511      public boolean isSatisfied() {
512        return states.count(TERMINATED) + states.count(FAILED) == numberOfServices;
513      }
514    }
515
516    /** The listeners to notify during a state transition. */
517    final ListenerCallQueue<Listener> listeners = new ListenerCallQueue<>();
518
519    /**
520     * It is implicitly assumed that all the services are NEW and that they will all remain NEW
521     * until all the Listeners are installed and {@link #markReady()} is called. It is our caller's
522     * responsibility to only call {@link #markReady()} if all services were new at the time this
523     * method was called and when all the listeners were installed.
524     */
525    ServiceManagerState(ImmutableCollection<Service> services) {
526      this.numberOfServices = services.size();
527      servicesByState.putAll(NEW, services);
528    }
529
530    /**
531     * Attempts to start the timer immediately prior to the service being started via {@link
532     * Service#startAsync()}.
533     */
534    void tryStartTiming(Service service) {
535      monitor.enter();
536      try {
537        Stopwatch stopwatch = startupTimers.get(service);
538        if (stopwatch == null) {
539          startupTimers.put(service, Stopwatch.createStarted());
540        }
541      } finally {
542        monitor.leave();
543      }
544    }
545
546    /**
547     * Marks the {@link State} as ready to receive transitions. Returns true if no transitions have
548     * been observed yet.
549     */
550    void markReady() {
551      monitor.enter();
552      try {
553        if (!transitioned) {
554          // nothing has transitioned since construction, good.
555          ready = true;
556        } else {
557          // This should be an extremely rare race condition.
558          List<Service> servicesInBadStates = Lists.newArrayList();
559          for (Service service : servicesByState().values()) {
560            if (service.state() != NEW) {
561              servicesInBadStates.add(service);
562            }
563          }
564          throw new IllegalArgumentException(
565              "Services started transitioning asynchronously before "
566                  + "the ServiceManager was constructed: "
567                  + servicesInBadStates);
568        }
569      } finally {
570        monitor.leave();
571      }
572    }
573
574    void addListener(Listener listener, Executor executor) {
575      listeners.addListener(listener, executor);
576    }
577
578    void awaitHealthy() {
579      monitor.enterWhenUninterruptibly(awaitHealthGuard);
580      try {
581        checkHealthy();
582      } finally {
583        monitor.leave();
584      }
585    }
586
587    void awaitHealthy(long timeout, TimeUnit unit) throws TimeoutException {
588      monitor.enter();
589      try {
590        if (!monitor.waitForUninterruptibly(awaitHealthGuard, timeout, unit)) {
591          throw new TimeoutException(
592              "Timeout waiting for the services to become healthy. The "
593                  + "following services have not started: "
594                  + Multimaps.filterKeys(servicesByState, in(ImmutableSet.of(NEW, STARTING))));
595        }
596        checkHealthy();
597      } finally {
598        monitor.leave();
599      }
600    }
601
602    void awaitStopped() {
603      monitor.enterWhenUninterruptibly(stoppedGuard);
604      monitor.leave();
605    }
606
607    void awaitStopped(long timeout, TimeUnit unit) throws TimeoutException {
608      monitor.enter();
609      try {
610        if (!monitor.waitForUninterruptibly(stoppedGuard, timeout, unit)) {
611          throw new TimeoutException(
612              "Timeout waiting for the services to stop. The following "
613                  + "services have not stopped: "
614                  + Multimaps.filterKeys(servicesByState, not(in(EnumSet.of(TERMINATED, FAILED)))));
615        }
616      } finally {
617        monitor.leave();
618      }
619    }
620
621    ImmutableSetMultimap<State, Service> servicesByState() {
622      ImmutableSetMultimap.Builder<State, Service> builder = ImmutableSetMultimap.builder();
623      monitor.enter();
624      try {
625        for (Entry<State, Service> entry : servicesByState.entries()) {
626          if (!(entry.getValue() instanceof NoOpService)) {
627            builder.put(entry);
628          }
629        }
630      } finally {
631        monitor.leave();
632      }
633      return builder.build();
634    }
635
636    ImmutableMap<Service, Long> startupTimes() {
637      List<Entry<Service, Long>> loadTimes;
638      monitor.enter();
639      try {
640        loadTimes = Lists.newArrayListWithCapacity(startupTimers.size());
641        // N.B. There will only be an entry in the map if the service has started
642        for (Entry<Service, Stopwatch> entry : startupTimers.entrySet()) {
643          Service service = entry.getKey();
644          Stopwatch stopwatch = entry.getValue();
645          if (!stopwatch.isRunning() && !(service instanceof NoOpService)) {
646            loadTimes.add(Maps.immutableEntry(service, stopwatch.elapsed(MILLISECONDS)));
647          }
648        }
649      } finally {
650        monitor.leave();
651      }
652      Collections.sort(
653          loadTimes,
654          Ordering.natural()
655              .onResultOf(
656                  new Function<Entry<Service, Long>, Long>() {
657                    @Override
658                    public Long apply(Entry<Service, Long> input) {
659                      return input.getValue();
660                    }
661                  }));
662      return ImmutableMap.copyOf(loadTimes);
663    }
664
665    /**
666     * Updates the state with the given service transition.
667     *
668     * <p>This method performs the main logic of ServiceManager in the following steps.
669     *
670     * <ol>
671     *   <li>Update the {@link #servicesByState()}
672     *   <li>Update the {@link #startupTimers}
673     *   <li>Based on the new state queue listeners to run
674     *   <li>Run the listeners (outside of the lock)
675     * </ol>
676     */
677    void transitionService(final Service service, State from, State to) {
678      checkNotNull(service);
679      checkArgument(from != to);
680      monitor.enter();
681      try {
682        transitioned = true;
683        if (!ready) {
684          return;
685        }
686        // Update state.
687        checkState(
688            servicesByState.remove(from, service),
689            "Service %s not at the expected location in the state map %s",
690            service,
691            from);
692        checkState(
693            servicesByState.put(to, service),
694            "Service %s in the state map unexpectedly at %s",
695            service,
696            to);
697        // Update the timer
698        Stopwatch stopwatch = startupTimers.get(service);
699        if (stopwatch == null) {
700          // This means the service was started by some means other than ServiceManager.startAsync
701          stopwatch = Stopwatch.createStarted();
702          startupTimers.put(service, stopwatch);
703        }
704        if (to.compareTo(RUNNING) >= 0 && stopwatch.isRunning()) {
705          // N.B. if we miss the STARTING event then we may never record a startup time.
706          stopwatch.stop();
707          if (!(service instanceof NoOpService)) {
708            logger.log(Level.FINE, "Started {0} in {1}.", new Object[] {service, stopwatch});
709          }
710        }
711        // Queue our listeners
712
713        // Did a service fail?
714        if (to == FAILED) {
715          enqueueFailedEvent(service);
716        }
717
718        if (states.count(RUNNING) == numberOfServices) {
719          // This means that the manager is currently healthy. N.B. If other threads call isHealthy
720          // they are not guaranteed to get 'true', because any service could fail right now.
721          enqueueHealthyEvent();
722        } else if (states.count(TERMINATED) + states.count(FAILED) == numberOfServices) {
723          enqueueStoppedEvent();
724        }
725      } finally {
726        monitor.leave();
727        // Run our executors outside of the lock
728        dispatchListenerEvents();
729      }
730    }
731
732    void enqueueStoppedEvent() {
733      listeners.enqueue(STOPPED_EVENT);
734    }
735
736    void enqueueHealthyEvent() {
737      listeners.enqueue(HEALTHY_EVENT);
738    }
739
740    void enqueueFailedEvent(final Service service) {
741      listeners.enqueue(
742          new ListenerCallQueue.Event<Listener>() {
743            @Override
744            public void call(Listener listener) {
745              listener.failure(service);
746            }
747
748            @Override
749            public String toString() {
750              return "failed({service=" + service + "})";
751            }
752          });
753    }
754
755    /** Attempts to execute all the listeners in {@link #listeners}. */
756    void dispatchListenerEvents() {
757      checkState(
758          !monitor.isOccupiedByCurrentThread(),
759          "It is incorrect to execute listeners with the monitor held.");
760      listeners.dispatch();
761    }
762
763    @GuardedBy("monitor")
764    void checkHealthy() {
765      if (states.count(RUNNING) != numberOfServices) {
766        IllegalStateException exception =
767            new IllegalStateException(
768                "Expected to be healthy after starting. The following services are not running: "
769                    + Multimaps.filterKeys(servicesByState, not(equalTo(RUNNING))));
770        for (Service service : servicesByState.get(State.FAILED)) {
771          exception.addSuppressed(new FailedService(service));
772        }
773        throw exception;
774      }
775    }
776  }
777
778  /**
779   * A {@link Service} that wraps another service and times how long it takes for it to start and
780   * also calls the {@link ServiceManagerState#transitionService(Service, State, State)}, to record
781   * the state transitions.
782   */
783  private static final class ServiceListener extends Service.Listener {
784    final Service service;
785    // We store the state in a weak reference to ensure that if something went wrong while
786    // constructing the ServiceManager we don't pointlessly keep updating the state.
787    final WeakReference<ServiceManagerState> state;
788
789    ServiceListener(Service service, WeakReference<ServiceManagerState> state) {
790      this.service = service;
791      this.state = state;
792    }
793
794    @Override
795    public void starting() {
796      ServiceManagerState state = this.state.get();
797      if (state != null) {
798        state.transitionService(service, NEW, STARTING);
799        if (!(service instanceof NoOpService)) {
800          logger.log(Level.FINE, "Starting {0}.", service);
801        }
802      }
803    }
804
805    @Override
806    public void running() {
807      ServiceManagerState state = this.state.get();
808      if (state != null) {
809        state.transitionService(service, STARTING, RUNNING);
810      }
811    }
812
813    @Override
814    public void stopping(State from) {
815      ServiceManagerState state = this.state.get();
816      if (state != null) {
817        state.transitionService(service, from, STOPPING);
818      }
819    }
820
821    @Override
822    public void terminated(State from) {
823      ServiceManagerState state = this.state.get();
824      if (state != null) {
825        if (!(service instanceof NoOpService)) {
826          logger.log(
827              Level.FINE,
828              "Service {0} has terminated. Previous state was: {1}",
829              new Object[] {service, from});
830        }
831        state.transitionService(service, from, TERMINATED);
832      }
833    }
834
835    @Override
836    public void failed(State from, Throwable failure) {
837      ServiceManagerState state = this.state.get();
838      if (state != null) {
839        // Log before the transition, so that if the process exits in response to server failure,
840        // there is a higher likelihood that the cause will be in the logs.
841        boolean log = !(service instanceof NoOpService);
842        /*
843         * We have already exposed startup exceptions to the user in the form of suppressed
844         * exceptions. We don't need to log those exceptions again.
845         */
846        log &= from != State.STARTING;
847        if (log) {
848          logger.log(
849              Level.SEVERE,
850              "Service " + service + " has failed in the " + from + " state.",
851              failure);
852        }
853        state.transitionService(service, from, FAILED);
854      }
855    }
856  }
857
858  /**
859   * A {@link Service} instance that does nothing. This is only useful as a placeholder to ensure
860   * that the {@link ServiceManager} functions properly even when it is managing no services.
861   *
862   * <p>The use of this class is considered an implementation detail of ServiceManager and as such
863   * it is excluded from {@link #servicesByState}, {@link #startupTimes}, {@link #toString} and all
864   * logging statements.
865   */
866  private static final class NoOpService extends AbstractService {
867    @Override
868    protected void doStart() {
869      notifyStarted();
870    }
871
872    @Override
873    protected void doStop() {
874      notifyStopped();
875    }
876  }
877
878  /** This is never thrown but only used for logging. */
879  private static final class EmptyServiceManagerWarning extends Throwable {}
880
881  private static final class FailedService extends Throwable {
882    FailedService(Service service) {
883      super(
884          service.toString(),
885          service.failureCause(),
886          false /* don't enable suppression */,
887          false /* don't calculate a stack trace. */);
888    }
889  }
890}