001/*
002 * Copyright (C) 2017 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.graph;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021import static java.util.Objects.requireNonNull;
022
023import com.google.common.annotations.Beta;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableSet;
026import com.google.errorprone.annotations.DoNotMock;
027import java.util.ArrayDeque;
028import java.util.Deque;
029import java.util.HashSet;
030import java.util.Iterator;
031import java.util.Set;
032import org.checkerframework.checker.nullness.qual.Nullable;
033
034/**
035 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s)
036 * using a specified {@link SuccessorsFunction}.
037 *
038 * <p>There are two entry points for creating a {@code Traverser}: {@link
039 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one
040 * based on your answers to the following questions:
041 *
042 * <ol>
043 *   <li>Is there only one path to any node that's reachable from any start node? (If so, the graph
044 *       to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.)
045 *   <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a
046 *       href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>?
047 * </ol>
048 *
049 * <p>If your answers are:
050 *
051 * <ul>
052 *   <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}.
053 *   <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}.
054 *   <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient.
055 *   <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node
056 *       objects into a non-recursive form, you can use {@code forGraph()}.
057 * </ul>
058 *
059 * @author Jens Nyman
060 * @param <N> Node parameter type
061 * @since 23.1
062 */
063@Beta
064@DoNotMock(
065    "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with"
066        + " GraphBuilder)")
067public abstract class Traverser<N> {
068  private final SuccessorsFunction<N> successorFunction;
069
070  private Traverser(SuccessorsFunction<N> successorFunction) {
071    this.successorFunction = checkNotNull(successorFunction);
072  }
073
074  /**
075   * Creates a new traverser for the given general {@code graph}.
076   *
077   * <p>Traversers created using this method are guaranteed to visit each node reachable from the
078   * start node(s) at most once.
079   *
080   * <p>If you know that no node in {@code graph} is reachable by more than one path from the start
081   * node(s), consider using {@link #forTree(SuccessorsFunction)} instead.
082   *
083   * <p><b>Performance notes</b>
084   *
085   * <ul>
086   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
087   *       the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and
088   *       {@code hashCode()} implementations. (See the <a
089   *       href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys">
090   *       notes on element objects</a> for more information.)
091   *   <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number
092   *       of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the
093   *       number of nodes that have been seen but not yet visited, that is, the "horizon").
094   * </ul>
095   *
096   * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles.
097   */
098  public static <N> Traverser<N> forGraph(SuccessorsFunction<N> graph) {
099    return new Traverser<N>(graph) {
100      @Override
101      Traversal<N> newTraversal() {
102        return Traversal.inGraph(graph);
103      }
104    };
105  }
106
107  /**
108   * Creates a new traverser for a directed acyclic graph that has at most one path from the start
109   * node(s) to any node reachable from the start node(s), and has no paths from any start node to
110   * any other start node, such as a tree or forest.
111   *
112   * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data
113   * structure being traversed is, in addition to being a tree/forest, also defined <a
114   * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>.
115   * This is because the {@code forTree()}-based implementations don't keep track of visited nodes,
116   * and therefore don't need to call {@code equals()} or {@code hashCode()} on the node objects;
117   * this saves both time and space versus traversing the same graph using {@code forGraph()}.
118   *
119   * <p>Providing a graph to be traversed for which there is more than one path from the start
120   * node(s) to any node may lead to:
121   *
122   * <ul>
123   *   <li>Traversal not terminating (if the graph has cycles)
124   *   <li>Nodes being visited multiple times (if multiple paths exist from any start node to any
125   *       node reachable from any start node)
126   * </ul>
127   *
128   * <p><b>Performance notes</b>
129   *
130   * <ul>
131   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
132   *       the start node).
133   *   <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number
134   *       of nodes that have been seen but not yet visited, that is, the "horizon").
135   * </ul>
136   *
137   * <p><b>Examples</b> (all edges are directed facing downwards)
138   *
139   * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code
140   * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and
141   * {@code h}.
142   *
143   * <pre>{@code
144   *    a     b      c
145   *   / \   / \     |
146   *  /   \ /   \    |
147   * d     e     f   g
148   *       |
149   *       |
150   *       h
151   * }</pre>
152   *
153   * <p>.
154   *
155   * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code
156   * b} were a start node, there would be multiple paths to {@code f}.
157   *
158   * <pre>{@code
159   *    a     b
160   *   / \   / \
161   *  /   \ /   \
162   * c     d     e
163   *        \   /
164   *         \ /
165   *          f
166   * }</pre>
167   *
168   * <p><b>Note on binary trees</b>
169   *
170   * <p>This method can be used to traverse over a binary tree. Given methods {@code
171   * leftChild(node)} and {@code rightChild(node)}, this method can be called as
172   *
173   * <pre>{@code
174   * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node)));
175   * }</pre>
176   *
177   * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most
178   *     one path between any two nodes
179   */
180  public static <N> Traverser<N> forTree(SuccessorsFunction<N> tree) {
181    if (tree instanceof BaseGraph) {
182      checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees.");
183    }
184    if (tree instanceof Network) {
185      checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees.");
186    }
187    return new Traverser<N>(tree) {
188      @Override
189      Traversal<N> newTraversal() {
190        return Traversal.inTree(tree);
191      }
192    };
193  }
194
195  /**
196   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
197   * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then
198   * depth 1, then 2, and so on.
199   *
200   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
201   * the order {@code abcdef} (assuming successors are returned in alphabetical order).
202   *
203   * <pre>{@code
204   * b ---- a ---- d
205   * |      |
206   * |      |
207   * e ---- c ---- f
208   * }</pre>
209   *
210   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
211   * while iteration is in progress.
212   *
213   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
214   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
215   * number of nodes as follows:
216   *
217   * <pre>{@code
218   * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes);
219   * }</pre>
220   *
221   * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more
222   * info.
223   *
224   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
225   */
226  public final Iterable<N> breadthFirst(N startNode) {
227    return breadthFirst(ImmutableSet.of(startNode));
228  }
229
230  /**
231   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
232   * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first
233   * traversal of a graph with an additional root node whose successors are the listed {@code
234   * startNodes}.
235   *
236   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
237   * @see #breadthFirst(Object)
238   * @since 24.1
239   */
240  public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) {
241    ImmutableSet<N> validated = validate(startNodes);
242    return new Iterable<N>() {
243      @Override
244      public Iterator<N> iterator() {
245        return newTraversal().breadthFirst(validated.iterator());
246      }
247    };
248  }
249
250  /**
251   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
252   * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the
253   * {@code Iterable} in the order in which they are first visited.
254   *
255   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
256   * the order {@code abecfd} (assuming successors are returned in alphabetical order).
257   *
258   * <pre>{@code
259   * b ---- a ---- d
260   * |      |
261   * |      |
262   * e ---- c ---- f
263   * }</pre>
264   *
265   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
266   * while iteration is in progress.
267   *
268   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
269   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
270   * number of nodes as follows:
271   *
272   * <pre>{@code
273   * Iterables.limit(
274   *     Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes);
275   * }</pre>
276   *
277   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
278   *
279   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
280   */
281  public final Iterable<N> depthFirstPreOrder(N startNode) {
282    return depthFirstPreOrder(ImmutableSet.of(startNode));
283  }
284
285  /**
286   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
287   * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a
288   * depth-first pre-order traversal of a graph with an additional root node whose successors are
289   * the listed {@code startNodes}.
290   *
291   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
292   * @see #depthFirstPreOrder(Object)
293   * @since 24.1
294   */
295  public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) {
296    ImmutableSet<N> validated = validate(startNodes);
297    return new Iterable<N>() {
298      @Override
299      public Iterator<N> iterator() {
300        return newTraversal().preOrder(validated.iterator());
301      }
302    };
303  }
304
305  /**
306   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
307   * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the
308   * {@code Iterable} in the order in which they are visited for the last time.
309   *
310   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
311   * the order {@code fcebda} (assuming successors are returned in alphabetical order).
312   *
313   * <pre>{@code
314   * b ---- a ---- d
315   * |      |
316   * |      |
317   * e ---- c ---- f
318   * }</pre>
319   *
320   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
321   * while iteration is in progress.
322   *
323   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
324   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
325   * number of nodes as follows:
326   *
327   * <pre>{@code
328   * Iterables.limit(
329   *     Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes);
330   * }</pre>
331   *
332   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
333   *
334   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
335   */
336  public final Iterable<N> depthFirstPostOrder(N startNode) {
337    return depthFirstPostOrder(ImmutableSet.of(startNode));
338  }
339
340  /**
341   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
342   * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a
343   * depth-first post-order traversal of a graph with an additional root node whose successors are
344   * the listed {@code startNodes}.
345   *
346   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
347   * @see #depthFirstPostOrder(Object)
348   * @since 24.1
349   */
350  public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) {
351    ImmutableSet<N> validated = validate(startNodes);
352    return new Iterable<N>() {
353      @Override
354      public Iterator<N> iterator() {
355        return newTraversal().postOrder(validated.iterator());
356      }
357    };
358  }
359
360  abstract Traversal<N> newTraversal();
361
362  @SuppressWarnings("CheckReturnValue")
363  private ImmutableSet<N> validate(Iterable<? extends N> startNodes) {
364    ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes);
365    for (N node : copy) {
366      successorFunction.successors(node); // Will throw if node doesn't exist
367    }
368    return copy;
369  }
370
371  /**
372   * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take
373   * the next element from the next non-empty iterator; for graph, we need to loop through the next
374   * non-empty iterator to find first unvisited node.
375   */
376  private abstract static class Traversal<N> {
377    final SuccessorsFunction<N> successorFunction;
378
379    Traversal(SuccessorsFunction<N> successorFunction) {
380      this.successorFunction = successorFunction;
381    }
382
383    static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) {
384      Set<N> visited = new HashSet<>();
385      return new Traversal<N>(graph) {
386        @Override
387        @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) {
388          Iterator<? extends N> top = horizon.getFirst();
389          while (top.hasNext()) {
390            N element = top.next();
391            // requireNonNull is safe because horizon contains only graph nodes.
392            /*
393             * TODO(cpovirk): Replace these two statements with one (`N element =
394             * requireNonNull(top.next())`) once our checker supports it.
395             *
396             * (The problem is likely
397             * https://github.com/jspecify/jspecify-reference-checker/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896)
398             */
399            requireNonNull(element);
400            if (visited.add(element)) {
401              return element;
402            }
403          }
404          horizon.removeFirst();
405          return null;
406        }
407      };
408    }
409
410    static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) {
411      return new Traversal<N>(tree) {
412        @Override
413        @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) {
414          Iterator<? extends N> top = horizon.getFirst();
415          if (top.hasNext()) {
416            return checkNotNull(top.next());
417          }
418          horizon.removeFirst();
419          return null;
420        }
421      };
422    }
423
424    final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) {
425      return topDown(startNodes, InsertionOrder.BACK);
426    }
427
428    final Iterator<N> preOrder(Iterator<? extends N> startNodes) {
429      return topDown(startNodes, InsertionOrder.FRONT);
430    }
431
432    /**
433     * In top-down traversal, an ancestor node is always traversed before any of its descendant
434     * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are
435     * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before
436     * aunts for pre-order; while in BFS they are placed at the BACK after aunts.
437     */
438    private Iterator<N> topDown(Iterator<? extends N> startNodes, InsertionOrder order) {
439      Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
440      horizon.add(startNodes);
441      return new AbstractIterator<N>() {
442        @Override
443        protected @Nullable N computeNext() {
444          do {
445            N next = visitNext(horizon);
446            if (next != null) {
447              Iterator<? extends N> successors = successorFunction.successors(next).iterator();
448              if (successors.hasNext()) {
449                // BFS: horizon.addLast(successors)
450                // Pre-order: horizon.addFirst(successors)
451                order.insertInto(horizon, successors);
452              }
453              return next;
454            }
455          } while (!horizon.isEmpty());
456          return endOfData();
457        }
458      };
459    }
460
461    final Iterator<N> postOrder(Iterator<? extends N> startNodes) {
462      Deque<N> ancestorStack = new ArrayDeque<>();
463      Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
464      horizon.add(startNodes);
465      return new AbstractIterator<N>() {
466        @Override
467        protected @Nullable N computeNext() {
468          for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) {
469            Iterator<? extends N> successors = successorFunction.successors(next).iterator();
470            if (!successors.hasNext()) {
471              return next;
472            }
473            horizon.addFirst(successors);
474            ancestorStack.push(next);
475          }
476          // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker.
477          if (!ancestorStack.isEmpty()) {
478            return ancestorStack.pop();
479          }
480          return endOfData();
481        }
482      };
483    }
484
485    /**
486     * Visits the next node from the top iterator of {@code horizon} and returns the visited node.
487     * Null is returned to indicate reaching the end of the top iterator.
488     *
489     * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return
490     * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure.
491     * (Note, however, that the callers of {@code visitNext()} often insert additional iterators
492     * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive
493     * additional values interleaved with those shown above.)
494     */
495    abstract @Nullable N visitNext(Deque<Iterator<? extends N>> horizon);
496  }
497
498  /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */
499  private enum InsertionOrder {
500    FRONT {
501      @Override
502      <T> void insertInto(Deque<T> deque, T value) {
503        deque.addFirst(value);
504      }
505    },
506    BACK {
507      @Override
508      <T> void insertInto(Deque<T> deque, T value) {
509        deque.addLast(value);
510      }
511    };
512
513    abstract <T> void insertInto(Deque<T> deque, T value);
514  }
515}