001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021 022import com.google.common.annotations.Beta; 023import com.google.common.base.Optional; 024import com.google.common.base.Splitter; 025import com.google.common.collect.AbstractIterator; 026import com.google.common.collect.ImmutableList; 027import com.google.common.collect.Lists; 028 029import java.io.BufferedReader; 030import java.io.IOException; 031import java.io.Reader; 032import java.io.Writer; 033import java.nio.charset.Charset; 034import java.util.Iterator; 035import java.util.List; 036import java.util.regex.Pattern; 037 038import javax.annotation.Nullable; 039 040/** 041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 042 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 043 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 044 * 045 * <p>{@code CharSource} provides two kinds of methods: 046 * <ul> 047 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 048 * instance each time they are called. The caller is responsible for ensuring that the returned 049 * reader is closed. 050 * <li><b>Convenience methods:</b> These are implementations of common operations that are 051 * typically implemented by opening a reader using one of the methods in the first category, 052 * doing something and finally closing the reader that was opened. 053 * </ul> 054 * 055 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the 056 * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 057 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 058 * there to be an empty line at the end if the contents are terminated with a line separator. 059 * 060 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 061 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 062 * 063 * @since 14.0 064 * @author Colin Decker 065 */ 066public abstract class CharSource { 067 068 /** 069 * Constructor for use by subclasses. 070 */ 071 protected CharSource() {} 072 073 /** 074 * Opens a new {@link Reader} for reading from this source. This method should return a new, 075 * independent reader each time it is called. 076 * 077 * <p>The caller is responsible for ensuring that the returned reader is closed. 078 * 079 * @throws IOException if an I/O error occurs in the process of opening the reader 080 */ 081 public abstract Reader openStream() throws IOException; 082 083 /** 084 * Opens a new {@link BufferedReader} for reading from this source. This method should return a 085 * new, independent reader each time it is called. 086 * 087 * <p>The caller is responsible for ensuring that the returned reader is closed. 088 * 089 * @throws IOException if an I/O error occurs in the process of opening the reader 090 */ 091 public BufferedReader openBufferedStream() throws IOException { 092 Reader reader = openStream(); 093 return (reader instanceof BufferedReader) 094 ? (BufferedReader) reader 095 : new BufferedReader(reader); 096 } 097 098 /** 099 * Returns the size of this source in chars, if the size can be easily determined without 100 * actually opening the data stream. 101 * 102 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 103 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 104 * <i>possible</i> that this method will return a different number of chars than would be 105 * returned by reading all of the chars. 106 * 107 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read 108 * may return a different number of chars if the contents are changed. 109 * 110 * @since 19.0 111 */ 112 @Beta 113 public Optional<Long> lengthIfKnown() { 114 return Optional.absent(); 115 } 116 117 /** 118 * Returns the length of this source in chars, even if doing so requires opening and traversing 119 * an entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 120 * 121 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. 122 * If absent, it will fall back to a heavyweight operation that will open a stream, 123 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 124 * that were skipped. 125 * 126 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 127 * implementation, it is <i>possible</i> that this method will return a different number of chars 128 * than would be returned by reading all of the chars. 129 * 130 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 131 * number of chars if the contents are changed. 132 * 133 * @throws IOException if an I/O error occurs in the process of reading the length of this source 134 * @since 19.0 135 */ 136 @Beta 137 public long length() throws IOException { 138 Optional<Long> lengthIfKnown = lengthIfKnown(); 139 if (lengthIfKnown.isPresent()) { 140 return lengthIfKnown.get(); 141 } 142 143 Closer closer = Closer.create(); 144 try { 145 Reader reader = closer.register(openStream()); 146 return countBySkipping(reader); 147 } catch (Throwable e) { 148 throw closer.rethrow(e); 149 } finally { 150 closer.close(); 151 } 152 } 153 154 private long countBySkipping(Reader reader) throws IOException { 155 long count = 0; 156 long read; 157 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 158 count += read; 159 } 160 return count; 161 } 162 163 /** 164 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 165 * Does not close {@code appendable} if it is {@code Closeable}. 166 * 167 * @throws IOException if an I/O error occurs in the process of reading from this source or 168 * writing to {@code appendable} 169 */ 170 public long copyTo(Appendable appendable) throws IOException { 171 checkNotNull(appendable); 172 173 Closer closer = Closer.create(); 174 try { 175 Reader reader = closer.register(openStream()); 176 return CharStreams.copy(reader, appendable); 177 } catch (Throwable e) { 178 throw closer.rethrow(e); 179 } finally { 180 closer.close(); 181 } 182 } 183 184 /** 185 * Copies the contents of this source to the given sink. 186 * 187 * @throws IOException if an I/O error occurs in the process of reading from this source or 188 * writing to {@code sink} 189 */ 190 public long copyTo(CharSink sink) throws IOException { 191 checkNotNull(sink); 192 193 Closer closer = Closer.create(); 194 try { 195 Reader reader = closer.register(openStream()); 196 Writer writer = closer.register(sink.openStream()); 197 return CharStreams.copy(reader, writer); 198 } catch (Throwable e) { 199 throw closer.rethrow(e); 200 } finally { 201 closer.close(); 202 } 203 } 204 205 /** 206 * Reads the contents of this source as a string. 207 * 208 * @throws IOException if an I/O error occurs in the process of reading from this source 209 */ 210 public String read() throws IOException { 211 Closer closer = Closer.create(); 212 try { 213 Reader reader = closer.register(openStream()); 214 return CharStreams.toString(reader); 215 } catch (Throwable e) { 216 throw closer.rethrow(e); 217 } finally { 218 closer.close(); 219 } 220 } 221 222 /** 223 * Reads the first link of this source as a string. Returns {@code null} if this source is empty. 224 * 225 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 226 * {@code \r\n}, does not include the line separator in the returned line and does not consider 227 * there to be an extra empty line at the end if the content is terminated with a line separator. 228 * 229 * @throws IOException if an I/O error occurs in the process of reading from this source 230 */ 231 @Nullable public String readFirstLine() throws IOException { 232 Closer closer = Closer.create(); 233 try { 234 BufferedReader reader = closer.register(openBufferedStream()); 235 return reader.readLine(); 236 } catch (Throwable e) { 237 throw closer.rethrow(e); 238 } finally { 239 closer.close(); 240 } 241 } 242 243 /** 244 * Reads all the lines of this source as a list of strings. The returned list will be empty if 245 * this source is empty. 246 * 247 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 248 * {@code \r\n}, does not include the line separator in the returned lines and does not consider 249 * there to be an extra empty line at the end if the content is terminated with a line separator. 250 * 251 * @throws IOException if an I/O error occurs in the process of reading from this source 252 */ 253 public ImmutableList<String> readLines() throws IOException { 254 Closer closer = Closer.create(); 255 try { 256 BufferedReader reader = closer.register(openBufferedStream()); 257 List<String> result = Lists.newArrayList(); 258 String line; 259 while ((line = reader.readLine()) != null) { 260 result.add(line); 261 } 262 return ImmutableList.copyOf(result); 263 } catch (Throwable e) { 264 throw closer.rethrow(e); 265 } finally { 266 closer.close(); 267 } 268 } 269 270 /** 271 * Reads lines of text from this source, processing each line as it is read using the given 272 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 273 * returns {@code false} and returns the result produced by the processor. 274 * 275 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 276 * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor} 277 * and does not consider there to be an extra empty line at the end if the content is terminated 278 * with a line separator. 279 * 280 * @throws IOException if an I/O error occurs in the process of reading from this source or if 281 * {@code processor} throws an {@code IOException} 282 * @since 16.0 283 */ 284 @Beta 285 public <T> T readLines(LineProcessor<T> processor) throws IOException { 286 checkNotNull(processor); 287 288 Closer closer = Closer.create(); 289 try { 290 Reader reader = closer.register(openStream()); 291 return CharStreams.readLines(reader, processor); 292 } catch (Throwable e) { 293 throw closer.rethrow(e); 294 } finally { 295 closer.close(); 296 } 297 } 298 299 /** 300 * Returns whether the source has zero chars. The default implementation returns true if 301 * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking 302 * for EOF if the length is not known. 303 * 304 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 305 * chars are actually available for reading. This means that a source may return {@code true} from 306 * {@code isEmpty()} despite having readable content. 307 * 308 * @throws IOException if an I/O error occurs 309 * @since 15.0 310 */ 311 public boolean isEmpty() throws IOException { 312 Optional<Long> lengthIfKnown = lengthIfKnown(); 313 if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) { 314 return true; 315 } 316 Closer closer = Closer.create(); 317 try { 318 Reader reader = closer.register(openStream()); 319 return reader.read() == -1; 320 } catch (Throwable e) { 321 throw closer.rethrow(e); 322 } finally { 323 closer.close(); 324 } 325 } 326 327 /** 328 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 329 * the source will contain the concatenated data from the streams of the underlying sources. 330 * 331 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 332 * close the open underlying stream. 333 * 334 * @param sources the sources to concatenate 335 * @return a {@code CharSource} containing the concatenated data 336 * @since 15.0 337 */ 338 public static CharSource concat(Iterable<? extends CharSource> sources) { 339 return new ConcatenatedCharSource(sources); 340 } 341 342 /** 343 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 344 * the source will contain the concatenated data from the streams of the underlying sources. 345 * 346 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 347 * close the open underlying stream. 348 * 349 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 350 * method is called. This will fail if the iterator is infinite and may cause problems if the 351 * iterator eagerly fetches data for each source when iterated (rather than producing sources 352 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 353 * overload if possible. 354 * 355 * @param sources the sources to concatenate 356 * @return a {@code CharSource} containing the concatenated data 357 * @throws NullPointerException if any of {@code sources} is {@code null} 358 * @since 15.0 359 */ 360 public static CharSource concat(Iterator<? extends CharSource> sources) { 361 return concat(ImmutableList.copyOf(sources)); 362 } 363 364 /** 365 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 366 * the source will contain the concatenated data from the streams of the underlying sources. 367 * 368 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 369 * close the open underlying stream. 370 * 371 * @param sources the sources to concatenate 372 * @return a {@code CharSource} containing the concatenated data 373 * @throws NullPointerException if any of {@code sources} is {@code null} 374 * @since 15.0 375 */ 376 public static CharSource concat(CharSource... sources) { 377 return concat(ImmutableList.copyOf(sources)); 378 } 379 380 /** 381 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 382 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 383 * the {@code charSequence} is mutated while it is being read, so don't do that. 384 * 385 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 386 */ 387 public static CharSource wrap(CharSequence charSequence) { 388 return new CharSequenceCharSource(charSequence); 389 } 390 391 /** 392 * Returns an immutable {@link CharSource} that contains no characters. 393 * 394 * @since 15.0 395 */ 396 public static CharSource empty() { 397 return EmptyCharSource.INSTANCE; 398 } 399 400 private static class CharSequenceCharSource extends CharSource { 401 402 private static final Splitter LINE_SPLITTER 403 = Splitter.on(Pattern.compile("\r\n|\n|\r")); 404 405 private final CharSequence seq; 406 407 protected CharSequenceCharSource(CharSequence seq) { 408 this.seq = checkNotNull(seq); 409 } 410 411 @Override 412 public Reader openStream() { 413 return new CharSequenceReader(seq); 414 } 415 416 @Override 417 public String read() { 418 return seq.toString(); 419 } 420 421 @Override 422 public boolean isEmpty() { 423 return seq.length() == 0; 424 } 425 426 @Override 427 public long length() { 428 return seq.length(); 429 } 430 431 @Override 432 public Optional<Long> lengthIfKnown() { 433 return Optional.of((long) seq.length()); 434 } 435 436 /** 437 * Returns an iterable over the lines in the string. If the string ends in 438 * a newline, a final empty string is not included to match the behavior of 439 * BufferedReader/LineReader.readLine(). 440 */ 441 private Iterable<String> lines() { 442 return new Iterable<String>() { 443 @Override 444 public Iterator<String> iterator() { 445 return new AbstractIterator<String>() { 446 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 447 448 @Override 449 protected String computeNext() { 450 if (lines.hasNext()) { 451 String next = lines.next(); 452 // skip last line if it's empty 453 if (lines.hasNext() || !next.isEmpty()) { 454 return next; 455 } 456 } 457 return endOfData(); 458 } 459 }; 460 } 461 }; 462 } 463 464 @Override 465 public String readFirstLine() { 466 Iterator<String> lines = lines().iterator(); 467 return lines.hasNext() ? lines.next() : null; 468 } 469 470 @Override 471 public ImmutableList<String> readLines() { 472 return ImmutableList.copyOf(lines()); 473 } 474 475 @Override 476 public <T> T readLines(LineProcessor<T> processor) throws IOException { 477 for (String line : lines()) { 478 if (!processor.processLine(line)) { 479 break; 480 } 481 } 482 return processor.getResult(); 483 } 484 485 @Override 486 public String toString() { 487 return "CharSource.wrap(" + truncate(seq, 30, "...") + ")"; 488 } 489 490 /** 491 * Truncates the given character sequence to the given maximum length. If the length of the 492 * sequence is greater than {@code maxLength}, the returned string will be exactly 493 * {@code maxLength} chars in length and will end with the given {@code truncationIndicator}. 494 * Otherwise, the sequence will be returned as a string with no changes to the content. 495 * 496 * <p>Examples: 497 * 498 * <pre> {@code 499 * truncate("foobar", 7, "..."); // returns "foobar" 500 * truncate("foobar", 5, "..."); // returns "fo..." }</pre> 501 * 502 * <p><b>Note:</b> This method <i>may</i> work with certain non-ASCII text but is not safe for 503 * use with arbitrary Unicode text. It is mostly intended for use with text that is known to be 504 * safe for use with it (such as all-ASCII text) and for simple debugging text. When using this 505 * method, consider the following: 506 * 507 * <ul> 508 * <li>it may split surrogate pairs</li> 509 * <li>it may split characters and combining characters</li> 510 * <li>it does not consider word boundaries</li> 511 * <li>if truncating for display to users, there are other considerations that must be taken 512 * into account</li> 513 * <li>the appropriate truncation indicator may be locale-dependent</li> 514 * <li>it is safe to use non-ASCII characters in the truncation indicator</li> 515 * </ul> 516 * 517 * 518 * @throws IllegalArgumentException if {@code maxLength} is less than the length of 519 * {@code truncationIndicator} 520 */ 521 /* 522 * <p>TODO(user, cpovirk): Use Ascii.truncate once it is available in our internal copy of 523 * guava_jdk5. 524 */ 525 private static String truncate(CharSequence seq, int maxLength, String truncationIndicator) { 526 checkNotNull(seq); 527 528 // length to truncate the sequence to, not including the truncation indicator 529 int truncationLength = maxLength - truncationIndicator.length(); 530 531 // in this worst case, this allows a maxLength equal to the length of the truncationIndicator, 532 // meaning that a string will be truncated to just the truncation indicator itself 533 checkArgument(truncationLength >= 0, 534 "maxLength (%s) must be >= length of the truncation indicator (%s)", 535 maxLength, truncationIndicator.length()); 536 537 if (seq.length() <= maxLength) { 538 String string = seq.toString(); 539 if (string.length() <= maxLength) { 540 return string; 541 } 542 // if the length of the toString() result was > maxLength for some reason, truncate that 543 seq = string; 544 } 545 546 return new StringBuilder(maxLength) 547 .append(seq, 0, truncationLength) 548 .append(truncationIndicator) 549 .toString(); 550 } 551 } 552 553 private static final class EmptyCharSource extends CharSequenceCharSource { 554 555 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 556 557 private EmptyCharSource() { 558 super(""); 559 } 560 561 @Override 562 public String toString() { 563 return "CharSource.empty()"; 564 } 565 } 566 567 private static final class ConcatenatedCharSource extends CharSource { 568 569 private final Iterable<? extends CharSource> sources; 570 571 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 572 this.sources = checkNotNull(sources); 573 } 574 575 @Override 576 public Reader openStream() throws IOException { 577 return new MultiReader(sources.iterator()); 578 } 579 580 @Override 581 public boolean isEmpty() throws IOException { 582 for (CharSource source : sources) { 583 if (!source.isEmpty()) { 584 return false; 585 } 586 } 587 return true; 588 } 589 590 @Override 591 public Optional<Long> lengthIfKnown() { 592 long result = 0L; 593 for (CharSource source : sources) { 594 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 595 if (!lengthIfKnown.isPresent()) { 596 return Optional.absent(); 597 } 598 result += lengthIfKnown.get(); 599 } 600 return Optional.of(result); 601 } 602 603 @Override 604 public long length() throws IOException { 605 long result = 0L; 606 for (CharSource source : sources) { 607 result += source.length(); 608 } 609 return result; 610 } 611 612 @Override 613 public String toString() { 614 return "CharSource.concat(" + sources + ")"; 615 } 616 } 617}