001/*
002 * Copyright (C) 2017 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.graph;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021
022import com.google.common.annotations.Beta;
023import com.google.common.collect.AbstractIterator;
024import com.google.common.collect.ImmutableSet;
025import com.google.errorprone.annotations.DoNotMock;
026import java.util.ArrayDeque;
027import java.util.Deque;
028import java.util.HashSet;
029import java.util.Iterator;
030import java.util.Set;
031import org.checkerframework.checker.nullness.qual.Nullable;
032
033/**
034 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s)
035 * using a specified {@link SuccessorsFunction}.
036 *
037 * <p>There are two entry points for creating a {@code Traverser}: {@link
038 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one
039 * based on your answers to the following questions:
040 *
041 * <ol>
042 *   <li>Is there only one path to any node that's reachable from any start node? (If so, the graph
043 *       to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.)
044 *   <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a
045 *       href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>?
046 * </ol>
047 *
048 * <p>If your answers are:
049 *
050 * <ul>
051 *   <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}.
052 *   <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}.
053 *   <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient.
054 *   <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node
055 *       objects into a non-recursive form, you can use {@code forGraph()}.
056 * </ul>
057 *
058 * @author Jens Nyman
059 * @param <N> Node parameter type
060 * @since 23.1
061 */
062@Beta
063@DoNotMock(
064    "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with"
065        + " GraphBuilder)")
066public abstract class Traverser<N> {
067  private final SuccessorsFunction<N> successorFunction;
068
069  private Traverser(SuccessorsFunction<N> successorFunction) {
070    this.successorFunction = checkNotNull(successorFunction);
071  }
072
073  /**
074   * Creates a new traverser for the given general {@code graph}.
075   *
076   * <p>Traversers created using this method are guaranteed to visit each node reachable from the
077   * start node(s) at most once.
078   *
079   * <p>If you know that no node in {@code graph} is reachable by more than one path from the start
080   * node(s), consider using {@link #forTree(SuccessorsFunction)} instead.
081   *
082   * <p><b>Performance notes</b>
083   *
084   * <ul>
085   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
086   *       the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and
087   *       {@code hashCode()} implementations. (See the <a
088   *       href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys">
089   *       notes on element objects</a> for more information.)
090   *   <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number
091   *       of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the
092   *       number of nodes that have been seen but not yet visited, that is, the "horizon").
093   * </ul>
094   *
095   * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles.
096   */
097  public static <N> Traverser<N> forGraph(final SuccessorsFunction<N> graph) {
098    return new Traverser<N>(graph) {
099      @Override
100      Traversal<N> newTraversal() {
101        return Traversal.inGraph(graph);
102      }
103    };
104  }
105
106  /**
107   * Creates a new traverser for a directed acyclic graph that has at most one path from the start
108   * node(s) to any node reachable from the start node(s), and has no paths from any start node to
109   * any other start node, such as a tree or forest.
110   *
111   * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data
112   * structure being traversed is, in addition to being a tree/forest, also defined <a
113   * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>.
114   * This is because the {@code forTree()}-based implementations don't keep track of visited nodes,
115   * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves
116   * both time and space versus traversing the same graph using {@code forGraph()}.
117   *
118   * <p>Providing a graph to be traversed for which there is more than one path from the start
119   * node(s) to any node may lead to:
120   *
121   * <ul>
122   *   <li>Traversal not terminating (if the graph has cycles)
123   *   <li>Nodes being visited multiple times (if multiple paths exist from any start node to any
124   *       node reachable from any start node)
125   * </ul>
126   *
127   * <p><b>Performance notes</b>
128   *
129   * <ul>
130   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
131   *       the start node).
132   *   <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number
133   *       of nodes that have been seen but not yet visited, that is, the "horizon").
134   * </ul>
135   *
136   * <p><b>Examples</b> (all edges are directed facing downwards)
137   *
138   * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code
139   * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and
140   * {@code h}.
141   *
142   * <pre>{@code
143   *    a     b      c
144   *   / \   / \     |
145   *  /   \ /   \    |
146   * d     e     f   g
147   *       |
148   *       |
149   *       h
150   * }</pre>
151   *
152   * <p>.
153   *
154   * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code
155   * b} were a start node, there would be multiple paths to {@code f}.
156   *
157   * <pre>{@code
158   *    a     b
159   *   / \   / \
160   *  /   \ /   \
161   * c     d     e
162   *        \   /
163   *         \ /
164   *          f
165   * }</pre>
166   *
167   * <p><b>Note on binary trees</b>
168   *
169   * <p>This method can be used to traverse over a binary tree. Given methods {@code
170   * leftChild(node)} and {@code rightChild(node)}, this method can be called as
171   *
172   * <pre>{@code
173   * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node)));
174   * }</pre>
175   *
176   * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most
177   *     one path between any two nodes
178   */
179  public static <N> Traverser<N> forTree(final SuccessorsFunction<N> tree) {
180    if (tree instanceof BaseGraph) {
181      checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees.");
182    }
183    if (tree instanceof Network) {
184      checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees.");
185    }
186    return new Traverser<N>(tree) {
187      @Override
188      Traversal<N> newTraversal() {
189        return Traversal.inTree(tree);
190      }
191    };
192  }
193
194  /**
195   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
196   * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then
197   * depth 1, then 2, and so on.
198   *
199   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
200   * the order {@code abcdef} (assuming successors are returned in alphabetical order).
201   *
202   * <pre>{@code
203   * b ---- a ---- d
204   * |      |
205   * |      |
206   * e ---- c ---- f
207   * }</pre>
208   *
209   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
210   * while iteration is in progress.
211   *
212   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
213   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
214   * number of nodes as follows:
215   *
216   * <pre>{@code
217   * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes);
218   * }</pre>
219   *
220   * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more
221   * info.
222   *
223   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
224   */
225  public final Iterable<N> breadthFirst(N startNode) {
226    return breadthFirst(ImmutableSet.of(startNode));
227  }
228
229  /**
230   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
231   * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first
232   * traversal of a graph with an additional root node whose successors are the listed {@code
233   * startNodes}.
234   *
235   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
236   * @see #breadthFirst(Object)
237   * @since 24.1
238   */
239  public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) {
240    final ImmutableSet<N> validated = validate(startNodes);
241    return new Iterable<N>() {
242      @Override
243      public Iterator<N> iterator() {
244        return newTraversal().breadthFirst(validated.iterator());
245      }
246    };
247  }
248
249  /**
250   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
251   * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the
252   * {@code Iterable} in the order in which they are first visited.
253   *
254   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
255   * the order {@code abecfd} (assuming successors are returned in alphabetical order).
256   *
257   * <pre>{@code
258   * b ---- a ---- d
259   * |      |
260   * |      |
261   * e ---- c ---- f
262   * }</pre>
263   *
264   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
265   * while iteration is in progress.
266   *
267   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
268   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
269   * number of nodes as follows:
270   *
271   * <pre>{@code
272   * Iterables.limit(
273   *     Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes);
274   * }</pre>
275   *
276   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
277   *
278   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
279   */
280  public final Iterable<N> depthFirstPreOrder(N startNode) {
281    return depthFirstPreOrder(ImmutableSet.of(startNode));
282  }
283
284  /**
285   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
286   * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a
287   * depth-first pre-order traversal of a graph with an additional root node whose successors are
288   * the listed {@code startNodes}.
289   *
290   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
291   * @see #depthFirstPreOrder(Object)
292   * @since 24.1
293   */
294  public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) {
295    final ImmutableSet<N> validated = validate(startNodes);
296    return new Iterable<N>() {
297      @Override
298      public Iterator<N> iterator() {
299        return newTraversal().preOrder(validated.iterator());
300      }
301    };
302  }
303
304  /**
305   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
306   * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the
307   * {@code Iterable} in the order in which they are visited for the last time.
308   *
309   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
310   * the order {@code fcebda} (assuming successors are returned in alphabetical order).
311   *
312   * <pre>{@code
313   * b ---- a ---- d
314   * |      |
315   * |      |
316   * e ---- c ---- f
317   * }</pre>
318   *
319   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
320   * while iteration is in progress.
321   *
322   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
323   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
324   * number of nodes as follows:
325   *
326   * <pre>{@code
327   * Iterables.limit(
328   *     Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes);
329   * }</pre>
330   *
331   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
332   *
333   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
334   */
335  public final Iterable<N> depthFirstPostOrder(N startNode) {
336    return depthFirstPostOrder(ImmutableSet.of(startNode));
337  }
338
339  /**
340   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
341   * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a
342   * depth-first post-order traversal of a graph with an additional root node whose successors are
343   * the listed {@code startNodes}.
344   *
345   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
346   * @see #depthFirstPostOrder(Object)
347   * @since 24.1
348   */
349  public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) {
350    final ImmutableSet<N> validated = validate(startNodes);
351    return new Iterable<N>() {
352      @Override
353      public Iterator<N> iterator() {
354        return newTraversal().postOrder(validated.iterator());
355      }
356    };
357  }
358
359  abstract Traversal<N> newTraversal();
360
361  @SuppressWarnings("CheckReturnValue")
362  private ImmutableSet<N> validate(Iterable<? extends N> startNodes) {
363    ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes);
364    for (N node : copy) {
365      successorFunction.successors(node); // Will throw if node doesn't exist
366    }
367    return copy;
368  }
369
370  /**
371   * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take
372   * the next element from the next non-empty iterator; for graph, we need to loop through the next
373   * non-empty iterator to find first unvisited node.
374   */
375  private abstract static class Traversal<N> {
376    final SuccessorsFunction<N> successorFunction;
377
378    Traversal(SuccessorsFunction<N> successorFunction) {
379      this.successorFunction = successorFunction;
380    }
381
382    static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) {
383      final Set<N> visited = new HashSet<>();
384      return new Traversal<N>(graph) {
385        @Override
386        N visitNext(Deque<Iterator<? extends N>> horizon) {
387          Iterator<? extends N> top = horizon.getFirst();
388          while (top.hasNext()) {
389            N element = checkNotNull(top.next());
390            if (visited.add(element)) {
391              return element;
392            }
393          }
394          horizon.removeFirst();
395          return null;
396        }
397      };
398    }
399
400    static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) {
401      return new Traversal<N>(tree) {
402        @Override
403        N visitNext(Deque<Iterator<? extends N>> horizon) {
404          Iterator<? extends N> top = horizon.getFirst();
405          if (top.hasNext()) {
406            return checkNotNull(top.next());
407          }
408          horizon.removeFirst();
409          return null;
410        }
411      };
412    }
413
414    final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) {
415      return topDown(startNodes, InsertionOrder.BACK);
416    }
417
418    final Iterator<N> preOrder(Iterator<? extends N> startNodes) {
419      return topDown(startNodes, InsertionOrder.FRONT);
420    }
421
422    /**
423     * In top-down traversal, an ancestor node is always traversed before any of its descendant
424     * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are
425     * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before
426     * aunts for pre-order; while in BFS they are placed at the BACK after aunts.
427     */
428    private Iterator<N> topDown(Iterator<? extends N> startNodes, final InsertionOrder order) {
429      final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
430      horizon.add(startNodes);
431      return new AbstractIterator<N>() {
432        @Override
433        protected N computeNext() {
434          do {
435            N next = visitNext(horizon);
436            if (next != null) {
437              Iterator<? extends N> successors = successorFunction.successors(next).iterator();
438              if (successors.hasNext()) {
439                // BFS: horizon.addLast(successors)
440                // Pre-order: horizon.addFirst(successors)
441                order.insertInto(horizon, successors);
442              }
443              return next;
444            }
445          } while (!horizon.isEmpty());
446          return endOfData();
447        }
448      };
449    }
450
451    final Iterator<N> postOrder(Iterator<? extends N> startNodes) {
452      final Deque<N> ancestorStack = new ArrayDeque<>();
453      final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
454      horizon.add(startNodes);
455      return new AbstractIterator<N>() {
456        @Override
457        protected N computeNext() {
458          for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) {
459            Iterator<? extends N> successors = successorFunction.successors(next).iterator();
460            if (!successors.hasNext()) {
461              return next;
462            }
463            horizon.addFirst(successors);
464            ancestorStack.push(next);
465          }
466          return ancestorStack.isEmpty() ? endOfData() : ancestorStack.pop();
467        }
468      };
469    }
470
471    /**
472     * Visits the next node from the top iterator of {@code horizon} and returns the visited node.
473     * Null is returned to indicate reaching the end of the top iterator.
474     *
475     * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return
476     * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure.
477     * (Note, however, that the callers of {@code visitNext()} often insert additional iterators
478     * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive
479     * additional values interleaved with those shown above.)
480     */
481    @Nullable
482    abstract N visitNext(Deque<Iterator<? extends N>> horizon);
483  }
484
485  /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */
486  private enum InsertionOrder {
487    FRONT {
488      @Override
489      <T> void insertInto(Deque<T> deque, T value) {
490        deque.addFirst(value);
491      }
492    },
493    BACK {
494      @Override
495      <T> void insertInto(Deque<T> deque, T value) {
496        deque.addLast(value);
497      }
498    };
499
500    abstract <T> void insertInto(Deque<T> deque, T value);
501  }
502}