001/* 002 * Copyright (C) 2017 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.graph; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021import static java.util.Objects.requireNonNull; 022 023import com.google.common.annotations.Beta; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableSet; 026import com.google.errorprone.annotations.DoNotMock; 027import java.util.ArrayDeque; 028import java.util.Deque; 029import java.util.HashSet; 030import java.util.Iterator; 031import java.util.Set; 032import org.jspecify.annotations.Nullable; 033 034/** 035 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s) 036 * using a specified {@link SuccessorsFunction}. 037 * 038 * <p>There are two entry points for creating a {@code Traverser}: {@link 039 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one 040 * based on your answers to the following questions: 041 * 042 * <ol> 043 * <li>Is there only one path to any node that's reachable from any start node? (If so, the graph 044 * to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.) 045 * <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a 046 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>? 047 * </ol> 048 * 049 * <p>If your answers are: 050 * 051 * <ul> 052 * <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}. 053 * <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}. 054 * <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient. 055 * <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node 056 * objects into a non-recursive form, you can use {@code forGraph()}. 057 * </ul> 058 * 059 * @author Jens Nyman 060 * @param <N> Node parameter type 061 * @since 23.1 062 */ 063@Beta 064@DoNotMock( 065 "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with" 066 + " GraphBuilder)") 067public abstract class Traverser<N> { 068 private final SuccessorsFunction<N> successorFunction; 069 070 private Traverser(SuccessorsFunction<N> successorFunction) { 071 this.successorFunction = checkNotNull(successorFunction); 072 } 073 074 /** 075 * Creates a new traverser for the given general {@code graph}. 076 * 077 * <p>Traversers created using this method are guaranteed to visit each node reachable from the 078 * start node(s) at most once. 079 * 080 * <p>If you know that no node in {@code graph} is reachable by more than one path from the start 081 * node(s), consider using {@link #forTree(SuccessorsFunction)} instead. 082 * 083 * <p><b>Performance notes</b> 084 * 085 * <ul> 086 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 087 * the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and 088 * {@code hashCode()} implementations. (See the <a 089 * href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys"> 090 * notes on element objects</a> for more information.) 091 * <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number 092 * of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the 093 * number of nodes that have been seen but not yet visited, that is, the "horizon"). 094 * </ul> 095 * 096 * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles. 097 */ 098 public static <N> Traverser<N> forGraph(SuccessorsFunction<N> graph) { 099 return new Traverser<N>(graph) { 100 @Override 101 Traversal<N> newTraversal() { 102 return Traversal.inGraph(graph); 103 } 104 }; 105 } 106 107 /** 108 * Creates a new traverser for a directed acyclic graph that has at most one path from the start 109 * node(s) to any node reachable from the start node(s), and has no paths from any start node to 110 * any other start node, such as a tree or forest. 111 * 112 * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data 113 * structure being traversed is, in addition to being a tree/forest, also defined <a 114 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>. 115 * This is because the {@code forTree()}-based implementations don't keep track of visited nodes, 116 * and therefore don't need to call {@code equals()} or {@code hashCode()} on the node objects; 117 * this saves both time and space versus traversing the same graph using {@code forGraph()}. 118 * 119 * <p>Providing a graph to be traversed for which there is more than one path from the start 120 * node(s) to any node may lead to: 121 * 122 * <ul> 123 * <li>Traversal not terminating (if the graph has cycles) 124 * <li>Nodes being visited multiple times (if multiple paths exist from any start node to any 125 * node reachable from any start node) 126 * </ul> 127 * 128 * <p><b>Performance notes</b> 129 * 130 * <ul> 131 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 132 * the start node). 133 * <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number 134 * of nodes that have been seen but not yet visited, that is, the "horizon"). 135 * </ul> 136 * 137 * <p><b>Examples</b> (all edges are directed facing downwards) 138 * 139 * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code 140 * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and 141 * {@code h}. 142 * 143 * <pre>{@code 144 * a b c 145 * / \ / \ | 146 * / \ / \ | 147 * d e f g 148 * | 149 * | 150 * h 151 * }</pre> 152 * 153 * <p>. 154 * 155 * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code 156 * b} were a start node, there would be multiple paths to {@code f}. 157 * 158 * <pre>{@code 159 * a b 160 * / \ / \ 161 * / \ / \ 162 * c d e 163 * \ / 164 * \ / 165 * f 166 * }</pre> 167 * 168 * <p><b>Note on binary trees</b> 169 * 170 * <p>This method can be used to traverse over a binary tree. Given methods {@code 171 * leftChild(node)} and {@code rightChild(node)}, this method can be called as 172 * 173 * <pre>{@code 174 * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node))); 175 * }</pre> 176 * 177 * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most 178 * one path between any two nodes 179 */ 180 public static <N> Traverser<N> forTree(SuccessorsFunction<N> tree) { 181 if (tree instanceof BaseGraph) { 182 checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees."); 183 } 184 if (tree instanceof Network) { 185 checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees."); 186 } 187 return new Traverser<N>(tree) { 188 @Override 189 Traversal<N> newTraversal() { 190 return Traversal.inTree(tree); 191 } 192 }; 193 } 194 195 /** 196 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 197 * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then 198 * depth 1, then 2, and so on. 199 * 200 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 201 * the order {@code abcdef} (assuming successors are returned in alphabetical order). 202 * 203 * <pre>{@code 204 * b ---- a ---- d 205 * | | 206 * | | 207 * e ---- c ---- f 208 * }</pre> 209 * 210 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 211 * while iteration is in progress. 212 * 213 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 214 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 215 * number of nodes as follows: 216 * 217 * <pre>{@code 218 * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes); 219 * }</pre> 220 * 221 * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more 222 * info. 223 * 224 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 225 */ 226 public final Iterable<N> breadthFirst(N startNode) { 227 return breadthFirst(ImmutableSet.of(startNode)); 228 } 229 230 /** 231 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 232 * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first 233 * traversal of a graph with an additional root node whose successors are the listed {@code 234 * startNodes}. 235 * 236 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 237 * @see #breadthFirst(Object) 238 * @since 24.1 239 */ 240 public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) { 241 ImmutableSet<N> validated = validate(startNodes); 242 return new Iterable<N>() { 243 @Override 244 public Iterator<N> iterator() { 245 return newTraversal().breadthFirst(validated.iterator()); 246 } 247 }; 248 } 249 250 /** 251 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 252 * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the 253 * {@code Iterable} in the order in which they are first visited. 254 * 255 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 256 * the order {@code abecfd} (assuming successors are returned in alphabetical order). 257 * 258 * <pre>{@code 259 * b ---- a ---- d 260 * | | 261 * | | 262 * e ---- c ---- f 263 * }</pre> 264 * 265 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 266 * while iteration is in progress. 267 * 268 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 269 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 270 * number of nodes as follows: 271 * 272 * <pre>{@code 273 * Iterables.limit( 274 * Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes); 275 * }</pre> 276 * 277 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 278 * 279 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 280 */ 281 public final Iterable<N> depthFirstPreOrder(N startNode) { 282 return depthFirstPreOrder(ImmutableSet.of(startNode)); 283 } 284 285 /** 286 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 287 * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a 288 * depth-first pre-order traversal of a graph with an additional root node whose successors are 289 * the listed {@code startNodes}. 290 * 291 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 292 * @see #depthFirstPreOrder(Object) 293 * @since 24.1 294 */ 295 public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) { 296 ImmutableSet<N> validated = validate(startNodes); 297 return new Iterable<N>() { 298 @Override 299 public Iterator<N> iterator() { 300 return newTraversal().preOrder(validated.iterator()); 301 } 302 }; 303 } 304 305 /** 306 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 307 * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the 308 * {@code Iterable} in the order in which they are visited for the last time. 309 * 310 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 311 * the order {@code fcebda} (assuming successors are returned in alphabetical order). 312 * 313 * <pre>{@code 314 * b ---- a ---- d 315 * | | 316 * | | 317 * e ---- c ---- f 318 * }</pre> 319 * 320 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 321 * while iteration is in progress. 322 * 323 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 324 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 325 * number of nodes as follows: 326 * 327 * <pre>{@code 328 * Iterables.limit( 329 * Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes); 330 * }</pre> 331 * 332 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 333 * 334 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 335 */ 336 public final Iterable<N> depthFirstPostOrder(N startNode) { 337 return depthFirstPostOrder(ImmutableSet.of(startNode)); 338 } 339 340 /** 341 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 342 * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a 343 * depth-first post-order traversal of a graph with an additional root node whose successors are 344 * the listed {@code startNodes}. 345 * 346 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 347 * @see #depthFirstPostOrder(Object) 348 * @since 24.1 349 */ 350 public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) { 351 ImmutableSet<N> validated = validate(startNodes); 352 return new Iterable<N>() { 353 @Override 354 public Iterator<N> iterator() { 355 return newTraversal().postOrder(validated.iterator()); 356 } 357 }; 358 } 359 360 abstract Traversal<N> newTraversal(); 361 362 @SuppressWarnings("CheckReturnValue") 363 private ImmutableSet<N> validate(Iterable<? extends N> startNodes) { 364 ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes); 365 for (N node : copy) { 366 successorFunction.successors(node); // Will throw if node doesn't exist 367 } 368 return copy; 369 } 370 371 /** 372 * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take 373 * the next element from the next non-empty iterator; for graph, we need to loop through the next 374 * non-empty iterator to find first unvisited node. 375 */ 376 private abstract static class Traversal<N> { 377 final SuccessorsFunction<N> successorFunction; 378 379 Traversal(SuccessorsFunction<N> successorFunction) { 380 this.successorFunction = successorFunction; 381 } 382 383 static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) { 384 Set<N> visited = new HashSet<>(); 385 return new Traversal<N>(graph) { 386 @Override 387 @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) { 388 Iterator<? extends N> top = horizon.getFirst(); 389 while (top.hasNext()) { 390 N element = top.next(); 391 // requireNonNull is safe because horizon contains only graph nodes. 392 /* 393 * TODO(cpovirk): Replace these two statements with one (`N element = 394 * requireNonNull(top.next())`) once our checker supports it. 395 * 396 * (The problem is likely 397 * https://github.com/jspecify/jspecify-reference-checker/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896) 398 */ 399 requireNonNull(element); 400 if (visited.add(element)) { 401 return element; 402 } 403 } 404 horizon.removeFirst(); 405 return null; 406 } 407 }; 408 } 409 410 static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) { 411 return new Traversal<N>(tree) { 412 @Override 413 @Nullable N visitNext(Deque<Iterator<? extends N>> horizon) { 414 Iterator<? extends N> top = horizon.getFirst(); 415 if (top.hasNext()) { 416 return checkNotNull(top.next()); 417 } 418 horizon.removeFirst(); 419 return null; 420 } 421 }; 422 } 423 424 final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) { 425 return topDown(startNodes, InsertionOrder.BACK); 426 } 427 428 final Iterator<N> preOrder(Iterator<? extends N> startNodes) { 429 return topDown(startNodes, InsertionOrder.FRONT); 430 } 431 432 /** 433 * In top-down traversal, an ancestor node is always traversed before any of its descendant 434 * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are 435 * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before 436 * aunts for pre-order; while in BFS they are placed at the BACK after aunts. 437 */ 438 private Iterator<N> topDown(Iterator<? extends N> startNodes, InsertionOrder order) { 439 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 440 horizon.add(startNodes); 441 return new AbstractIterator<N>() { 442 @Override 443 protected @Nullable N computeNext() { 444 do { 445 N next = visitNext(horizon); 446 if (next != null) { 447 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 448 if (successors.hasNext()) { 449 // BFS: horizon.addLast(successors) 450 // Pre-order: horizon.addFirst(successors) 451 order.insertInto(horizon, successors); 452 } 453 return next; 454 } 455 } while (!horizon.isEmpty()); 456 return endOfData(); 457 } 458 }; 459 } 460 461 final Iterator<N> postOrder(Iterator<? extends N> startNodes) { 462 Deque<N> ancestorStack = new ArrayDeque<>(); 463 Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 464 horizon.add(startNodes); 465 return new AbstractIterator<N>() { 466 @Override 467 protected @Nullable N computeNext() { 468 for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) { 469 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 470 if (!successors.hasNext()) { 471 return next; 472 } 473 horizon.addFirst(successors); 474 ancestorStack.push(next); 475 } 476 // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker. 477 if (!ancestorStack.isEmpty()) { 478 return ancestorStack.pop(); 479 } 480 return endOfData(); 481 } 482 }; 483 } 484 485 /** 486 * Visits the next node from the top iterator of {@code horizon} and returns the visited node. 487 * Null is returned to indicate reaching the end of the top iterator. 488 * 489 * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return 490 * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure. 491 * (Note, however, that the callers of {@code visitNext()} often insert additional iterators 492 * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive 493 * additional values interleaved with those shown above.) 494 */ 495 abstract @Nullable N visitNext(Deque<Iterator<? extends N>> horizon); 496 } 497 498 /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */ 499 private enum InsertionOrder { 500 FRONT { 501 @Override 502 <T> void insertInto(Deque<T> deque, T value) { 503 deque.addFirst(value); 504 } 505 }, 506 BACK { 507 @Override 508 <T> void insertInto(Deque<T> deque, T value) { 509 deque.addLast(value); 510 } 511 }; 512 513 abstract <T> void insertInto(Deque<T> deque, T value); 514 } 515}