001/* 002 * Copyright (C) 2017 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.graph; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021import static java.util.Objects.requireNonNull; 022 023import com.google.common.annotations.Beta; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableSet; 026import com.google.errorprone.annotations.DoNotMock; 027import java.util.ArrayDeque; 028import java.util.Deque; 029import java.util.HashSet; 030import java.util.Iterator; 031import java.util.Set; 032import org.jspecify.annotations.Nullable; 033 034/** 035 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s) 036 * using a specified {@link SuccessorsFunction}. 037 * 038 * <p>There are two entry points for creating a {@code Traverser}: {@link 039 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one 040 * based on your answers to the following questions: 041 * 042 * <ol> 043 * <li>Is there only one path to any node that's reachable from any start node? (If so, the graph 044 * to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.) 045 * <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a 046 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>? 047 * </ol> 048 * 049 * <p>If your answers are: 050 * 051 * <ul> 052 * <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}. 053 * <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}. 054 * <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient. 055 * <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node 056 * objects into a non-recursive form, you can use {@code forGraph()}. 057 * </ul> 058 * 059 * @author Jens Nyman 060 * @param <N> Node parameter type 061 * @since 23.1 062 */ 063@Beta 064@DoNotMock( 065 "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with" 066 + " GraphBuilder)") 067public abstract class Traverser<N> { 068 private final SuccessorsFunction<N> successorFunction; 069 070 private Traverser(SuccessorsFunction<N> successorFunction) { 071 this.successorFunction = checkNotNull(successorFunction); 072 } 073 074 /** 075 * Creates a new traverser for the given general {@code graph}. 076 * 077 * <p>Traversers created using this method are guaranteed to visit each node reachable from the 078 * start node(s) at most once. 079 * 080 * <p>If you know that no node in {@code graph} is reachable by more than one path from the start 081 * node(s), consider using {@link #forTree(SuccessorsFunction)} instead. 082 * 083 * <p><b>Performance notes</b> 084 * 085 * <ul> 086 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 087 * the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and 088 * {@code hashCode()} implementations. (See the <a 089 * href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys"> 090 * notes on element objects</a> for more information.) 091 * <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number 092 * of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the 093 * number of nodes that have been seen but not yet visited, that is, the "horizon"). 094 * </ul> 095 * 096 * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles. 097 */ 098 public static <N> Traverser<N> forGraph(SuccessorsFunction<N> graph) { 099 return new Traverser<N>(graph) { 100 @Override 101 Traversal<N> newTraversal() { 102 return Traversal.inGraph(graph); 103 } 104 }; 105 } 106 107 /** 108 * Creates a new traverser for a directed acyclic graph that has at most one path from the start 109 * node(s) to any node reachable from the start node(s), and has no paths from any start node to 110 * any other start node, such as a tree or forest. 111 * 112 * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data 113 * structure being traversed is, in addition to being a tree/forest, also defined <a 114 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>. 115 * This is because the {@code forTree()}-based implementations don't keep track of visited nodes, 116 * and therefore don't need to call {@code equals()} or {@code hashCode()} on the node objects; 117 * this saves both time and space versus traversing the same graph using {@code forGraph()}. 118 * 119 * <p>Providing a graph to be traversed for which there is more than one path from the start 120 * node(s) to any node may lead to: 121 * 122 * <ul> 123 * <li>Traversal not terminating (if the graph has cycles) 124 * <li>Nodes being visited multiple times (if multiple paths exist from any start node to any 125 * node reachable from any start node) 126 * </ul> 127 * 128 * <p><b>Performance notes</b> 129 * 130 * <ul> 131 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 132 * the start node). 133 * <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number 134 * of nodes that have been seen but not yet visited, that is, the "horizon"). 135 * </ul> 136 * 137 * <p><b>Examples</b> (all edges are directed facing downwards) 138 * 139 * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code 140 * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and 141 * {@code h}. 142 * 143 * <pre>{@code 144 * a b c 145 * / \ / \ | 146 * / \ / \ | 147 * d e f g 148 * | 149 * | 150 * h 151 * }</pre> 152 * 153 * <p>. 154 * 155 * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code 156 * b} were a start node, there would be multiple paths to {@code f}. 157 * 158 * <pre>{@code 159 * a b 160 * / \ / \ 161 * / \ / \ 162 * c d e 163 * \ / 164 * \ / 165 * f 166 * }</pre> 167 * 168 * <p><b>Note on binary trees</b> 169 * 170 * <p>This method can be used to traverse over a binary tree. Given methods {@code 171 * leftChild(node)} and {@code rightChild(node)}, this method can be called as 172 * 173 * <pre>{@code 174 * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node))); 175 * }</pre> 176 * 177 * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most 178 * one path between any two nodes 179 */ 180 public static <N> Traverser<N> forTree(SuccessorsFunction<N> tree) { 181 if (tree instanceof BaseGraph) { 182 checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees."); 183 } 184 if (tree instanceof Network) { 185 checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees."); 186 } 187 return new Traverser<N>(tree) { 188 @Override 189 Traversal<N> newTraversal() { 190 return Traversal.inTree(tree); 191 } 192 }; 193 } 194 195 /** 196 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 197 * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then 198 * depth 1, then 2, and so on. 199 * 200 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 201 * the order {@code abcdef} (assuming successors are returned in alphabetical order). 202 * 203 * <pre>{@code 204 * b ---- a ---- d 205 * | | 206 * | | 207 * e ---- c ---- f 208 * }</pre> 209 * 210 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 211 * while iteration is in progress. 212 * 213 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 214 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 215 * number of nodes as follows: 216 * 217 * <pre>{@code 218 * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes); 219 * }</pre> 220 * 221 * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more 222 * info. 223 * 224 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 225 */ 226 public final Iterable<N> breadthFirst(N startNode) { 227 return breadthFirst(ImmutableSet.of(startNode)); 228 } 229 230 /** 231 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 232 * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first 233 * traversal of a graph with an additional root node whose successors are the listed {@code 234 * startNodes}. 235 * 236 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 237 * @see #breadthFirst(Object) 238 * @since 24.1 239 */ 240 public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) { 241 ImmutableSet<N> validated = validate(startNodes); 242 return () -> newTraversal().breadthFirst(validated.iterator()); 243 } 244 245 /** 246 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 247 * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the 248 * {@code Iterable} in the order in which they are first visited. 249 * 250 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 251 * the order {@code abecfd} (assuming successors are returned in alphabetical order). 252 * 253 * <pre>{@code 254 * b ---- a ---- d 255 * | | 256 * | | 257 * e ---- c ---- f 258 * }</pre> 259 * 260 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 261 * while iteration is in progress. 262 * 263 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 264 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 265 * number of nodes as follows: 266 * 267 * <pre>{@code 268 * Iterables.limit( 269 * Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes); 270 * }</pre> 271 * 272 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 273 * 274 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 275 */ 276 public final Iterable<N> depthFirstPreOrder(N startNode) { 277 return depthFirstPreOrder(ImmutableSet.of(startNode)); 278 } 279 280 /** 281 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 282 * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a 283 * depth-first pre-order traversal of a graph with an additional root node whose successors are 284 * the listed {@code startNodes}. 285 * 286 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 287 * @see #depthFirstPreOrder(Object) 288 * @since 24.1 289 */ 290 public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) { 291 ImmutableSet<N> validated = validate(startNodes); 292 return () -> newTraversal().preOrder(validated.iterator()); 293 } 294 295 /** 296 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 297 * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the 298 * {@code Iterable} in the order in which they are visited for the last time. 299 * 300 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 301 * the order {@code fcebda} (assuming successors are returned in alphabetical order). 302 * 303 * <pre>{@code 304 * b ---- a ---- d 305 * | | 306 * | | 307 * e ---- c ---- f 308 * }</pre> 309 * 310 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 311 * while iteration is in progress. 312 * 313 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 314 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 315 * number of nodes as follows: 316 * 317 * <pre>{@code 318 * Iterables.limit( 319 * Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes); 320 * }</pre> 321 * 322 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 323 * 324 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 325 */ 326 public final Iterable<N> depthFirstPostOrder(N startNode) { 327 return depthFirstPostOrder(ImmutableSet.of(startNode)); 328 } 329 330 /** 331 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 332 * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a 333 * depth-first post-order traversal of a graph with an additional root node whose successors are 334 * the listed {@code startNodes}. 335 * 336 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 337 * @see #depthFirstPostOrder(Object) 338 * @since 24.1 339 */ 340 public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) { 341 ImmutableSet<N> validated = validate(startNodes); 342 return () -> newTraversal().postOrder(validated.iterator()); 343 } 344 345 abstract Traversal<N> newTraversal(); 346 347 @SuppressWarnings("CheckReturnValue") 348 private ImmutableSet<N> validate(Iterable<? extends N> startNodes) { 349 ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes); 350 for (N node : copy) { 351 successorFunction.successors(node); // Will throw if node doesn't exist 352 } 353 return copy; 354 } 355 356 /** 357 * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take 358 * the next element from the next non-empty iterator; for graph, we need to loop through the next 359 * non-empty iterator to find first unvisited node. 360 */ 361 private abstract static class Traversal<N> { 362 final SuccessorsFunction<N> successorFunction; 363 364 Traversal(SuccessorsFunction<N> successorFunction) { 365 this.successorFunction = successorFunction; 366 } 367 368 static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) { 369 Set<N> visited = new HashSet<>(); 370 return new Traversal<N>(graph) { 371 @Override 372 @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) { 373 Iterator<? extends N> top = horizon.getFirst(); 374 while (top.hasNext()) { 375 N element = top.next(); 376 // requireNonNull is safe because horizon contains only graph nodes. 377 /* 378 * TODO(cpovirk): Replace these two statements with one (`N element = 379 * requireNonNull(top.next())`) once our checker supports it. 380 * 381 * (The problem is likely 382 * https://github.com/jspecify/jspecify-reference-checker/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896) 383 */ 384 requireNonNull(element); 385 if (visited.add(element)) { 386 return element; 387 } 388 } 389 horizon.removeFirst(); 390 return null; 391 } 392 }; 393 } 394 395 static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) { 396 return new Traversal<N>(tree) { 397 @Override 398 @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) { 399 Iterator<? extends N> top = horizon.getFirst(); 400 if (top.hasNext()) { 401 return checkNotNull(top.next()); 402 } 403 horizon.removeFirst(); 404 return null; 405 } 406 }; 407 } 408 409 final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) { 410 return topDown(startNodes, InsertionOrder.BACK); 411 } 412 413 final Iterator<N> preOrder(Iterator<? extends N> startNodes) { 414 return topDown(startNodes, InsertionOrder.FRONT); 415 } 416 417 /** 418 * In top-down traversal, an ancestor node is always traversed before any of its descendant 419 * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are 420 * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before 421 * aunts for pre-order; while in BFS they are placed at the BACK after aunts. 422 */ 423 private Iterator<N> topDown(Iterator<? extends N> startNodes, InsertionOrder order) { 424 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 425 horizon.add(startNodes); 426 return new AbstractIterator<N>() { 427 @Override 428 protected @Nullable N computeNext() { 429 do { 430 N next = visitNext(horizon); 431 if (next != null) { 432 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 433 if (successors.hasNext()) { 434 // BFS: horizon.addLast(successors) 435 // Pre-order: horizon.addFirst(successors) 436 order.insertInto(horizon, successors); 437 } 438 return next; 439 } 440 } while (!horizon.isEmpty()); 441 return endOfData(); 442 } 443 }; 444 } 445 446 final Iterator<N> postOrder(Iterator<? extends N> startNodes) { 447 Deque<N> ancestorStack = new ArrayDeque<>(); 448 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 449 horizon.add(startNodes); 450 return new AbstractIterator<N>() { 451 @Override 452 protected @Nullable N computeNext() { 453 for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) { 454 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 455 if (!successors.hasNext()) { 456 return next; 457 } 458 horizon.addFirst(successors); 459 ancestorStack.push(next); 460 } 461 // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker. 462 if (!ancestorStack.isEmpty()) { 463 return ancestorStack.pop(); 464 } 465 return endOfData(); 466 } 467 }; 468 } 469 470 /** 471 * Visits the next node from the top iterator of {@code horizon} and returns the visited node. 472 * Null is returned to indicate reaching the end of the top iterator. 473 * 474 * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return 475 * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure. 476 * (Note, however, that the callers of {@code visitNext()} often insert additional iterators 477 * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive 478 * additional values interleaved with those shown above.) 479 */ 480 abstract @Nullable N visitNext(Deque<Iterator<? extends N>> horizon); 481 } 482 483 /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */ 484 private enum InsertionOrder { 485 FRONT { 486 @Override 487 <T> void insertInto(Deque<T> deque, T value) { 488 deque.addFirst(value); 489 } 490 }, 491 BACK { 492 @Override 493 <T> void insertInto(Deque<T> deque, T value) { 494 deque.addLast(value); 495 } 496 }; 497 498 abstract <T> void insertInto(Deque<T> deque, T value); 499 } 500}