001/*
002 * Copyright (C) 2017 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.graph;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021import static java.util.Objects.requireNonNull;
022
023import com.google.common.annotations.Beta;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableSet;
026import com.google.errorprone.annotations.DoNotMock;
027import java.util.ArrayDeque;
028import java.util.Deque;
029import java.util.HashSet;
030import java.util.Iterator;
031import java.util.Set;
032import javax.annotation.CheckForNull;
033
034/**
035 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s)
036 * using a specified {@link SuccessorsFunction}.
037 *
038 * <p>There are two entry points for creating a {@code Traverser}: {@link
039 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one
040 * based on your answers to the following questions:
041 *
042 * <ol>
043 *   <li>Is there only one path to any node that's reachable from any start node? (If so, the graph
044 *       to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.)
045 *   <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a
046 *       href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>?
047 * </ol>
048 *
049 * <p>If your answers are:
050 *
051 * <ul>
052 *   <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}.
053 *   <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}.
054 *   <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient.
055 *   <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node
056 *       objects into a non-recursive form, you can use {@code forGraph()}.
057 * </ul>
058 *
059 * @author Jens Nyman
060 * @param <N> Node parameter type
061 * @since 23.1
062 */
063@Beta
064@DoNotMock(
065    "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with"
066        + " GraphBuilder)")
067@ElementTypesAreNonnullByDefault
068public abstract class Traverser<N> {
069  private final SuccessorsFunction<N> successorFunction;
070
071  private Traverser(SuccessorsFunction<N> successorFunction) {
072    this.successorFunction = checkNotNull(successorFunction);
073  }
074
075  /**
076   * Creates a new traverser for the given general {@code graph}.
077   *
078   * <p>Traversers created using this method are guaranteed to visit each node reachable from the
079   * start node(s) at most once.
080   *
081   * <p>If you know that no node in {@code graph} is reachable by more than one path from the start
082   * node(s), consider using {@link #forTree(SuccessorsFunction)} instead.
083   *
084   * <p><b>Performance notes</b>
085   *
086   * <ul>
087   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
088   *       the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and
089   *       {@code hashCode()} implementations. (See the <a
090   *       href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys">
091   *       notes on element objects</a> for more information.)
092   *   <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number
093   *       of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the
094   *       number of nodes that have been seen but not yet visited, that is, the "horizon").
095   * </ul>
096   *
097   * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles.
098   */
099  public static <N> Traverser<N> forGraph(SuccessorsFunction<N> graph) {
100    return new Traverser<N>(graph) {
101      @Override
102      Traversal<N> newTraversal() {
103        return Traversal.inGraph(graph);
104      }
105    };
106  }
107
108  /**
109   * Creates a new traverser for a directed acyclic graph that has at most one path from the start
110   * node(s) to any node reachable from the start node(s), and has no paths from any start node to
111   * any other start node, such as a tree or forest.
112   *
113   * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data
114   * structure being traversed is, in addition to being a tree/forest, also defined <a
115   * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>.
116   * This is because the {@code forTree()}-based implementations don't keep track of visited nodes,
117   * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves
118   * both time and space versus traversing the same graph using {@code forGraph()}.
119   *
120   * <p>Providing a graph to be traversed for which there is more than one path from the start
121   * node(s) to any node may lead to:
122   *
123   * <ul>
124   *   <li>Traversal not terminating (if the graph has cycles)
125   *   <li>Nodes being visited multiple times (if multiple paths exist from any start node to any
126   *       node reachable from any start node)
127   * </ul>
128   *
129   * <p><b>Performance notes</b>
130   *
131   * <ul>
132   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
133   *       the start node).
134   *   <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number
135   *       of nodes that have been seen but not yet visited, that is, the "horizon").
136   * </ul>
137   *
138   * <p><b>Examples</b> (all edges are directed facing downwards)
139   *
140   * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code
141   * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and
142   * {@code h}.
143   *
144   * <pre>{@code
145   *    a     b      c
146   *   / \   / \     |
147   *  /   \ /   \    |
148   * d     e     f   g
149   *       |
150   *       |
151   *       h
152   * }</pre>
153   *
154   * <p>.
155   *
156   * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code
157   * b} were a start node, there would be multiple paths to {@code f}.
158   *
159   * <pre>{@code
160   *    a     b
161   *   / \   / \
162   *  /   \ /   \
163   * c     d     e
164   *        \   /
165   *         \ /
166   *          f
167   * }</pre>
168   *
169   * <p><b>Note on binary trees</b>
170   *
171   * <p>This method can be used to traverse over a binary tree. Given methods {@code
172   * leftChild(node)} and {@code rightChild(node)}, this method can be called as
173   *
174   * <pre>{@code
175   * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node)));
176   * }</pre>
177   *
178   * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most
179   *     one path between any two nodes
180   */
181  public static <N> Traverser<N> forTree(SuccessorsFunction<N> tree) {
182    if (tree instanceof BaseGraph) {
183      checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees.");
184    }
185    if (tree instanceof Network) {
186      checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees.");
187    }
188    return new Traverser<N>(tree) {
189      @Override
190      Traversal<N> newTraversal() {
191        return Traversal.inTree(tree);
192      }
193    };
194  }
195
196  /**
197   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
198   * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then
199   * depth 1, then 2, and so on.
200   *
201   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
202   * the order {@code abcdef} (assuming successors are returned in alphabetical order).
203   *
204   * <pre>{@code
205   * b ---- a ---- d
206   * |      |
207   * |      |
208   * e ---- c ---- f
209   * }</pre>
210   *
211   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
212   * while iteration is in progress.
213   *
214   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
215   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
216   * number of nodes as follows:
217   *
218   * <pre>{@code
219   * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes);
220   * }</pre>
221   *
222   * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more
223   * info.
224   *
225   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
226   */
227  public final Iterable<N> breadthFirst(N startNode) {
228    return breadthFirst(ImmutableSet.of(startNode));
229  }
230
231  /**
232   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
233   * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first
234   * traversal of a graph with an additional root node whose successors are the listed {@code
235   * startNodes}.
236   *
237   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
238   * @see #breadthFirst(Object)
239   * @since 24.1
240   */
241  public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) {
242    ImmutableSet<N> validated = validate(startNodes);
243    return new Iterable<N>() {
244      @Override
245      public Iterator<N> iterator() {
246        return newTraversal().breadthFirst(validated.iterator());
247      }
248    };
249  }
250
251  /**
252   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
253   * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the
254   * {@code Iterable} in the order in which they are first visited.
255   *
256   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
257   * the order {@code abecfd} (assuming successors are returned in alphabetical order).
258   *
259   * <pre>{@code
260   * b ---- a ---- d
261   * |      |
262   * |      |
263   * e ---- c ---- f
264   * }</pre>
265   *
266   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
267   * while iteration is in progress.
268   *
269   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
270   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
271   * number of nodes as follows:
272   *
273   * <pre>{@code
274   * Iterables.limit(
275   *     Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes);
276   * }</pre>
277   *
278   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
279   *
280   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
281   */
282  public final Iterable<N> depthFirstPreOrder(N startNode) {
283    return depthFirstPreOrder(ImmutableSet.of(startNode));
284  }
285
286  /**
287   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
288   * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a
289   * depth-first pre-order traversal of a graph with an additional root node whose successors are
290   * the listed {@code startNodes}.
291   *
292   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
293   * @see #depthFirstPreOrder(Object)
294   * @since 24.1
295   */
296  public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) {
297    ImmutableSet<N> validated = validate(startNodes);
298    return new Iterable<N>() {
299      @Override
300      public Iterator<N> iterator() {
301        return newTraversal().preOrder(validated.iterator());
302      }
303    };
304  }
305
306  /**
307   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
308   * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the
309   * {@code Iterable} in the order in which they are visited for the last time.
310   *
311   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
312   * the order {@code fcebda} (assuming successors are returned in alphabetical order).
313   *
314   * <pre>{@code
315   * b ---- a ---- d
316   * |      |
317   * |      |
318   * e ---- c ---- f
319   * }</pre>
320   *
321   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
322   * while iteration is in progress.
323   *
324   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
325   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
326   * number of nodes as follows:
327   *
328   * <pre>{@code
329   * Iterables.limit(
330   *     Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes);
331   * }</pre>
332   *
333   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
334   *
335   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
336   */
337  public final Iterable<N> depthFirstPostOrder(N startNode) {
338    return depthFirstPostOrder(ImmutableSet.of(startNode));
339  }
340
341  /**
342   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
343   * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a
344   * depth-first post-order traversal of a graph with an additional root node whose successors are
345   * the listed {@code startNodes}.
346   *
347   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
348   * @see #depthFirstPostOrder(Object)
349   * @since 24.1
350   */
351  public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) {
352    ImmutableSet<N> validated = validate(startNodes);
353    return new Iterable<N>() {
354      @Override
355      public Iterator<N> iterator() {
356        return newTraversal().postOrder(validated.iterator());
357      }
358    };
359  }
360
361  abstract Traversal<N> newTraversal();
362
363  @SuppressWarnings("CheckReturnValue")
364  private ImmutableSet<N> validate(Iterable<? extends N> startNodes) {
365    ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes);
366    for (N node : copy) {
367      successorFunction.successors(node); // Will throw if node doesn't exist
368    }
369    return copy;
370  }
371
372  /**
373   * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take
374   * the next element from the next non-empty iterator; for graph, we need to loop through the next
375   * non-empty iterator to find first unvisited node.
376   */
377  private abstract static class Traversal<N> {
378    final SuccessorsFunction<N> successorFunction;
379
380    Traversal(SuccessorsFunction<N> successorFunction) {
381      this.successorFunction = successorFunction;
382    }
383
384    static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) {
385      Set<N> visited = new HashSet<>();
386      return new Traversal<N>(graph) {
387        @Override
388        @CheckForNull
389        N visitNext(Deque<Iterator<? extends N>> horizon) {
390          Iterator<? extends N> top = horizon.getFirst();
391          while (top.hasNext()) {
392            N element = top.next();
393            // requireNonNull is safe because horizon contains only graph nodes.
394            /*
395             * TODO(cpovirk): Replace these two statements with one (`N element =
396             * requireNonNull(top.next())`) once our checker supports it.
397             *
398             * (The problem is likely
399             * https://github.com/jspecify/jspecify-reference-checker/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896)
400             */
401            requireNonNull(element);
402            if (visited.add(element)) {
403              return element;
404            }
405          }
406          horizon.removeFirst();
407          return null;
408        }
409      };
410    }
411
412    static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) {
413      return new Traversal<N>(tree) {
414        @CheckForNull
415        @Override
416        N visitNext(Deque<Iterator<? extends N>> horizon) {
417          Iterator<? extends N> top = horizon.getFirst();
418          if (top.hasNext()) {
419            return checkNotNull(top.next());
420          }
421          horizon.removeFirst();
422          return null;
423        }
424      };
425    }
426
427    final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) {
428      return topDown(startNodes, InsertionOrder.BACK);
429    }
430
431    final Iterator<N> preOrder(Iterator<? extends N> startNodes) {
432      return topDown(startNodes, InsertionOrder.FRONT);
433    }
434
435    /**
436     * In top-down traversal, an ancestor node is always traversed before any of its descendant
437     * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are
438     * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before
439     * aunts for pre-order; while in BFS they are placed at the BACK after aunts.
440     */
441    private Iterator<N> topDown(Iterator<? extends N> startNodes, InsertionOrder order) {
442      Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
443      horizon.add(startNodes);
444      return new AbstractIterator<N>() {
445        @Override
446        @CheckForNull
447        protected N computeNext() {
448          do {
449            N next = visitNext(horizon);
450            if (next != null) {
451              Iterator<? extends N> successors = successorFunction.successors(next).iterator();
452              if (successors.hasNext()) {
453                // BFS: horizon.addLast(successors)
454                // Pre-order: horizon.addFirst(successors)
455                order.insertInto(horizon, successors);
456              }
457              return next;
458            }
459          } while (!horizon.isEmpty());
460          return endOfData();
461        }
462      };
463    }
464
465    final Iterator<N> postOrder(Iterator<? extends N> startNodes) {
466      Deque<N> ancestorStack = new ArrayDeque<>();
467      Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
468      horizon.add(startNodes);
469      return new AbstractIterator<N>() {
470        @Override
471        @CheckForNull
472        protected N computeNext() {
473          for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) {
474            Iterator<? extends N> successors = successorFunction.successors(next).iterator();
475            if (!successors.hasNext()) {
476              return next;
477            }
478            horizon.addFirst(successors);
479            ancestorStack.push(next);
480          }
481          // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker.
482          if (!ancestorStack.isEmpty()) {
483            return ancestorStack.pop();
484          }
485          return endOfData();
486        }
487      };
488    }
489
490    /**
491     * Visits the next node from the top iterator of {@code horizon} and returns the visited node.
492     * Null is returned to indicate reaching the end of the top iterator.
493     *
494     * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return
495     * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure.
496     * (Note, however, that the callers of {@code visitNext()} often insert additional iterators
497     * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive
498     * additional values interleaved with those shown above.)
499     */
500    @CheckForNull
501    abstract N visitNext(Deque<Iterator<? extends N>> horizon);
502  }
503
504  /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */
505  private enum InsertionOrder {
506    FRONT {
507      @Override
508      <T> void insertInto(Deque<T> deque, T value) {
509        deque.addFirst(value);
510      }
511    },
512    BACK {
513      @Override
514      <T> void insertInto(Deque<T> deque, T value) {
515        deque.addLast(value);
516      }
517    };
518
519    abstract <T> void insertInto(Deque<T> deque, T value);
520  }
521}