001/* 002 * Copyright (C) 2017 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.graph; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021import static java.util.Objects.requireNonNull; 022 023import com.google.common.annotations.Beta; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableSet; 026import com.google.errorprone.annotations.DoNotMock; 027import java.util.ArrayDeque; 028import java.util.Deque; 029import java.util.HashSet; 030import java.util.Iterator; 031import java.util.Set; 032import javax.annotation.CheckForNull; 033 034/** 035 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s) 036 * using a specified {@link SuccessorsFunction}. 037 * 038 * <p>There are two entry points for creating a {@code Traverser}: {@link 039 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one 040 * based on your answers to the following questions: 041 * 042 * <ol> 043 * <li>Is there only one path to any node that's reachable from any start node? (If so, the graph 044 * to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.) 045 * <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a 046 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>? 047 * </ol> 048 * 049 * <p>If your answers are: 050 * 051 * <ul> 052 * <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}. 053 * <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}. 054 * <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient. 055 * <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node 056 * objects into a non-recursive form, you can use {@code forGraph()}. 057 * </ul> 058 * 059 * @author Jens Nyman 060 * @param <N> Node parameter type 061 * @since 23.1 062 */ 063@Beta 064@DoNotMock( 065 "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with" 066 + " GraphBuilder)") 067@ElementTypesAreNonnullByDefault 068public abstract class Traverser<N> { 069 private final SuccessorsFunction<N> successorFunction; 070 071 private Traverser(SuccessorsFunction<N> successorFunction) { 072 this.successorFunction = checkNotNull(successorFunction); 073 } 074 075 /** 076 * Creates a new traverser for the given general {@code graph}. 077 * 078 * <p>Traversers created using this method are guaranteed to visit each node reachable from the 079 * start node(s) at most once. 080 * 081 * <p>If you know that no node in {@code graph} is reachable by more than one path from the start 082 * node(s), consider using {@link #forTree(SuccessorsFunction)} instead. 083 * 084 * <p><b>Performance notes</b> 085 * 086 * <ul> 087 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 088 * the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and 089 * {@code hashCode()} implementations. (See the <a 090 * href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys"> 091 * notes on element objects</a> for more information.) 092 * <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number 093 * of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the 094 * number of nodes that have been seen but not yet visited, that is, the "horizon"). 095 * </ul> 096 * 097 * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles. 098 */ 099 public static <N> Traverser<N> forGraph(SuccessorsFunction<N> graph) { 100 return new Traverser<N>(graph) { 101 @Override 102 Traversal<N> newTraversal() { 103 return Traversal.inGraph(graph); 104 } 105 }; 106 } 107 108 /** 109 * Creates a new traverser for a directed acyclic graph that has at most one path from the start 110 * node(s) to any node reachable from the start node(s), and has no paths from any start node to 111 * any other start node, such as a tree or forest. 112 * 113 * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data 114 * structure being traversed is, in addition to being a tree/forest, also defined <a 115 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>. 116 * This is because the {@code forTree()}-based implementations don't keep track of visited nodes, 117 * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves 118 * both time and space versus traversing the same graph using {@code forGraph()}. 119 * 120 * <p>Providing a graph to be traversed for which there is more than one path from the start 121 * node(s) to any node may lead to: 122 * 123 * <ul> 124 * <li>Traversal not terminating (if the graph has cycles) 125 * <li>Nodes being visited multiple times (if multiple paths exist from any start node to any 126 * node reachable from any start node) 127 * </ul> 128 * 129 * <p><b>Performance notes</b> 130 * 131 * <ul> 132 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 133 * the start node). 134 * <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number 135 * of nodes that have been seen but not yet visited, that is, the "horizon"). 136 * </ul> 137 * 138 * <p><b>Examples</b> (all edges are directed facing downwards) 139 * 140 * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code 141 * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and 142 * {@code h}. 143 * 144 * <pre>{@code 145 * a b c 146 * / \ / \ | 147 * / \ / \ | 148 * d e f g 149 * | 150 * | 151 * h 152 * }</pre> 153 * 154 * <p>. 155 * 156 * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code 157 * b} were a start node, there would be multiple paths to {@code f}. 158 * 159 * <pre>{@code 160 * a b 161 * / \ / \ 162 * / \ / \ 163 * c d e 164 * \ / 165 * \ / 166 * f 167 * }</pre> 168 * 169 * <p><b>Note on binary trees</b> 170 * 171 * <p>This method can be used to traverse over a binary tree. Given methods {@code 172 * leftChild(node)} and {@code rightChild(node)}, this method can be called as 173 * 174 * <pre>{@code 175 * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node))); 176 * }</pre> 177 * 178 * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most 179 * one path between any two nodes 180 */ 181 public static <N> Traverser<N> forTree(SuccessorsFunction<N> tree) { 182 if (tree instanceof BaseGraph) { 183 checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees."); 184 } 185 if (tree instanceof Network) { 186 checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees."); 187 } 188 return new Traverser<N>(tree) { 189 @Override 190 Traversal<N> newTraversal() { 191 return Traversal.inTree(tree); 192 } 193 }; 194 } 195 196 /** 197 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 198 * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then 199 * depth 1, then 2, and so on. 200 * 201 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 202 * the order {@code abcdef} (assuming successors are returned in alphabetical order). 203 * 204 * <pre>{@code 205 * b ---- a ---- d 206 * | | 207 * | | 208 * e ---- c ---- f 209 * }</pre> 210 * 211 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 212 * while iteration is in progress. 213 * 214 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 215 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 216 * number of nodes as follows: 217 * 218 * <pre>{@code 219 * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes); 220 * }</pre> 221 * 222 * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more 223 * info. 224 * 225 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 226 */ 227 public final Iterable<N> breadthFirst(N startNode) { 228 return breadthFirst(ImmutableSet.of(startNode)); 229 } 230 231 /** 232 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 233 * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first 234 * traversal of a graph with an additional root node whose successors are the listed {@code 235 * startNodes}. 236 * 237 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 238 * @see #breadthFirst(Object) 239 * @since 24.1 240 */ 241 public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) { 242 ImmutableSet<N> validated = validate(startNodes); 243 return new Iterable<N>() { 244 @Override 245 public Iterator<N> iterator() { 246 return newTraversal().breadthFirst(validated.iterator()); 247 } 248 }; 249 } 250 251 /** 252 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 253 * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the 254 * {@code Iterable} in the order in which they are first visited. 255 * 256 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 257 * the order {@code abecfd} (assuming successors are returned in alphabetical order). 258 * 259 * <pre>{@code 260 * b ---- a ---- d 261 * | | 262 * | | 263 * e ---- c ---- f 264 * }</pre> 265 * 266 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 267 * while iteration is in progress. 268 * 269 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 270 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 271 * number of nodes as follows: 272 * 273 * <pre>{@code 274 * Iterables.limit( 275 * Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes); 276 * }</pre> 277 * 278 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 279 * 280 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 281 */ 282 public final Iterable<N> depthFirstPreOrder(N startNode) { 283 return depthFirstPreOrder(ImmutableSet.of(startNode)); 284 } 285 286 /** 287 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 288 * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a 289 * depth-first pre-order traversal of a graph with an additional root node whose successors are 290 * the listed {@code startNodes}. 291 * 292 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 293 * @see #depthFirstPreOrder(Object) 294 * @since 24.1 295 */ 296 public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) { 297 ImmutableSet<N> validated = validate(startNodes); 298 return new Iterable<N>() { 299 @Override 300 public Iterator<N> iterator() { 301 return newTraversal().preOrder(validated.iterator()); 302 } 303 }; 304 } 305 306 /** 307 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 308 * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the 309 * {@code Iterable} in the order in which they are visited for the last time. 310 * 311 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 312 * the order {@code fcebda} (assuming successors are returned in alphabetical order). 313 * 314 * <pre>{@code 315 * b ---- a ---- d 316 * | | 317 * | | 318 * e ---- c ---- f 319 * }</pre> 320 * 321 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 322 * while iteration is in progress. 323 * 324 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 325 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 326 * number of nodes as follows: 327 * 328 * <pre>{@code 329 * Iterables.limit( 330 * Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes); 331 * }</pre> 332 * 333 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 334 * 335 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 336 */ 337 public final Iterable<N> depthFirstPostOrder(N startNode) { 338 return depthFirstPostOrder(ImmutableSet.of(startNode)); 339 } 340 341 /** 342 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 343 * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a 344 * depth-first post-order traversal of a graph with an additional root node whose successors are 345 * the listed {@code startNodes}. 346 * 347 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 348 * @see #depthFirstPostOrder(Object) 349 * @since 24.1 350 */ 351 public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) { 352 ImmutableSet<N> validated = validate(startNodes); 353 return new Iterable<N>() { 354 @Override 355 public Iterator<N> iterator() { 356 return newTraversal().postOrder(validated.iterator()); 357 } 358 }; 359 } 360 361 abstract Traversal<N> newTraversal(); 362 363 @SuppressWarnings("CheckReturnValue") 364 private ImmutableSet<N> validate(Iterable<? extends N> startNodes) { 365 ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes); 366 for (N node : copy) { 367 successorFunction.successors(node); // Will throw if node doesn't exist 368 } 369 return copy; 370 } 371 372 /** 373 * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take 374 * the next element from the next non-empty iterator; for graph, we need to loop through the next 375 * non-empty iterator to find first unvisited node. 376 */ 377 private abstract static class Traversal<N> { 378 final SuccessorsFunction<N> successorFunction; 379 380 Traversal(SuccessorsFunction<N> successorFunction) { 381 this.successorFunction = successorFunction; 382 } 383 384 static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) { 385 Set<N> visited = new HashSet<>(); 386 return new Traversal<N>(graph) { 387 @Override 388 @CheckForNull 389 N visitNext(Deque<Iterator<? extends N>> horizon) { 390 Iterator<? extends N> top = horizon.getFirst(); 391 while (top.hasNext()) { 392 N element = top.next(); 393 // requireNonNull is safe because horizon contains only graph nodes. 394 /* 395 * TODO(cpovirk): Replace these two statements with one (`N element = 396 * requireNonNull(top.next())`) once our checker supports it. 397 * 398 * (The problem is likely 399 * https://github.com/jspecify/jspecify-reference-checker/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896) 400 */ 401 requireNonNull(element); 402 if (visited.add(element)) { 403 return element; 404 } 405 } 406 horizon.removeFirst(); 407 return null; 408 } 409 }; 410 } 411 412 static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) { 413 return new Traversal<N>(tree) { 414 @CheckForNull 415 @Override 416 N visitNext(Deque<Iterator<? extends N>> horizon) { 417 Iterator<? extends N> top = horizon.getFirst(); 418 if (top.hasNext()) { 419 return checkNotNull(top.next()); 420 } 421 horizon.removeFirst(); 422 return null; 423 } 424 }; 425 } 426 427 final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) { 428 return topDown(startNodes, InsertionOrder.BACK); 429 } 430 431 final Iterator<N> preOrder(Iterator<? extends N> startNodes) { 432 return topDown(startNodes, InsertionOrder.FRONT); 433 } 434 435 /** 436 * In top-down traversal, an ancestor node is always traversed before any of its descendant 437 * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are 438 * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before 439 * aunts for pre-order; while in BFS they are placed at the BACK after aunts. 440 */ 441 private Iterator<N> topDown(Iterator<? extends N> startNodes, InsertionOrder order) { 442 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 443 horizon.add(startNodes); 444 return new AbstractIterator<N>() { 445 @Override 446 @CheckForNull 447 protected N computeNext() { 448 do { 449 N next = visitNext(horizon); 450 if (next != null) { 451 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 452 if (successors.hasNext()) { 453 // BFS: horizon.addLast(successors) 454 // Pre-order: horizon.addFirst(successors) 455 order.insertInto(horizon, successors); 456 } 457 return next; 458 } 459 } while (!horizon.isEmpty()); 460 return endOfData(); 461 } 462 }; 463 } 464 465 final Iterator<N> postOrder(Iterator<? extends N> startNodes) { 466 Deque<N> ancestorStack = new ArrayDeque<>(); 467 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 468 horizon.add(startNodes); 469 return new AbstractIterator<N>() { 470 @Override 471 @CheckForNull 472 protected N computeNext() { 473 for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) { 474 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 475 if (!successors.hasNext()) { 476 return next; 477 } 478 horizon.addFirst(successors); 479 ancestorStack.push(next); 480 } 481 // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker. 482 if (!ancestorStack.isEmpty()) { 483 return ancestorStack.pop(); 484 } 485 return endOfData(); 486 } 487 }; 488 } 489 490 /** 491 * Visits the next node from the top iterator of {@code horizon} and returns the visited node. 492 * Null is returned to indicate reaching the end of the top iterator. 493 * 494 * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return 495 * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure. 496 * (Note, however, that the callers of {@code visitNext()} often insert additional iterators 497 * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive 498 * additional values interleaved with those shown above.) 499 */ 500 @CheckForNull 501 abstract N visitNext(Deque<Iterator<? extends N>> horizon); 502 } 503 504 /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */ 505 private enum InsertionOrder { 506 FRONT { 507 @Override 508 <T> void insertInto(Deque<T> deque, T value) { 509 deque.addFirst(value); 510 } 511 }, 512 BACK { 513 @Override 514 <T> void insertInto(Deque<T> deque, T value) { 515 deque.addLast(value); 516 } 517 }; 518 519 abstract <T> void insertInto(Deque<T> deque, T value); 520 } 521}