001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.Writer; 033import java.nio.charset.Charset; 034import java.util.Iterator; 035import java.util.List; 036import javax.annotation.Nullable; 037 038/** 039 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 040 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 041 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 042 * 043 * <p>{@code CharSource} provides two kinds of methods: 044 * <ul> 045 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 046 * instance each time they are called. The caller is responsible for ensuring that the returned 047 * reader is closed. 048 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically 049 * implemented by opening a reader using one of the methods in the first category, doing 050 * something and finally closing the reader that was opened. 051 * </ul> 052 * 053 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 054 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 055 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 056 * there to be an empty line at the end if the contents are terminated with a line separator. 057 * 058 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 059 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 060 * 061 * @since 14.0 062 * @author Colin Decker 063 */ 064@GwtIncompatible 065public abstract class CharSource { 066 067 /** 068 * Constructor for use by subclasses. 069 */ 070 protected CharSource() {} 071 072 /** 073 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 074 * as bytes using the given {@link Charset}. 075 * 076 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 077 * the default implementation of this method will ensure that the original {@code CharSource} is 078 * returned, rather than round-trip encoding. Subclasses that override this method should behave 079 * the same way. 080 * 081 * @since 20.0 082 */ 083 @Beta 084 public ByteSource asByteSource(Charset charset) { 085 return new AsByteSource(charset); 086 } 087 088 /** 089 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 090 * reader each time it is called. 091 * 092 * <p>The caller is responsible for ensuring that the returned reader is closed. 093 * 094 * @throws IOException if an I/O error occurs while opening the reader 095 */ 096 public abstract Reader openStream() throws IOException; 097 098 /** 099 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 100 * independent reader each time it is called. 101 * 102 * <p>The caller is responsible for ensuring that the returned reader is closed. 103 * 104 * @throws IOException if an I/O error occurs while of opening the reader 105 */ 106 public BufferedReader openBufferedStream() throws IOException { 107 Reader reader = openStream(); 108 return (reader instanceof BufferedReader) 109 ? (BufferedReader) reader 110 : new BufferedReader(reader); 111 } 112 113 /** 114 * Returns the size of this source in chars, if the size can be easily determined without actually 115 * opening the data stream. 116 * 117 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 118 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 119 * <i>possible</i> that this method will return a different number of chars than would be returned 120 * by reading all of the chars. 121 * 122 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 123 * return a different number of chars if the contents are changed. 124 * 125 * @since 19.0 126 */ 127 @Beta 128 public Optional<Long> lengthIfKnown() { 129 return Optional.absent(); 130 } 131 132 /** 133 * Returns the length of this source in chars, even if doing so requires opening and traversing an 134 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 135 * 136 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 137 * absent, it will fall back to a heavyweight operation that will open a stream, 138 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 139 * that were skipped. 140 * 141 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 142 * implementation, it is <i>possible</i> that this method will return a different number of chars 143 * than would be returned by reading all of the chars. 144 * 145 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 146 * number of chars if the contents are changed. 147 * 148 * @throws IOException if an I/O error occurs while reading the length of this source 149 * @since 19.0 150 */ 151 @Beta 152 public long length() throws IOException { 153 Optional<Long> lengthIfKnown = lengthIfKnown(); 154 if (lengthIfKnown.isPresent()) { 155 return lengthIfKnown.get(); 156 } 157 158 Closer closer = Closer.create(); 159 try { 160 Reader reader = closer.register(openStream()); 161 return countBySkipping(reader); 162 } catch (Throwable e) { 163 throw closer.rethrow(e); 164 } finally { 165 closer.close(); 166 } 167 } 168 169 private long countBySkipping(Reader reader) throws IOException { 170 long count = 0; 171 long read; 172 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 173 count += read; 174 } 175 return count; 176 } 177 178 /** 179 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 180 * Does not close {@code appendable} if it is {@code Closeable}. 181 * 182 * @return the number of characters copied 183 * @throws IOException if an I/O error occurs while reading from this source or writing to 184 * {@code appendable} 185 */ 186 @CanIgnoreReturnValue 187 public long copyTo(Appendable appendable) throws IOException { 188 checkNotNull(appendable); 189 190 Closer closer = Closer.create(); 191 try { 192 Reader reader = closer.register(openStream()); 193 return CharStreams.copy(reader, appendable); 194 } catch (Throwable e) { 195 throw closer.rethrow(e); 196 } finally { 197 closer.close(); 198 } 199 } 200 201 /** 202 * Copies the contents of this source to the given sink. 203 * 204 * @return the number of characters copied 205 * @throws IOException if an I/O error occurs while reading from this source or writing to 206 * {@code sink} 207 */ 208 @CanIgnoreReturnValue 209 public long copyTo(CharSink sink) throws IOException { 210 checkNotNull(sink); 211 212 Closer closer = Closer.create(); 213 try { 214 Reader reader = closer.register(openStream()); 215 Writer writer = closer.register(sink.openStream()); 216 return CharStreams.copy(reader, writer); 217 } catch (Throwable e) { 218 throw closer.rethrow(e); 219 } finally { 220 closer.close(); 221 } 222 } 223 224 /** 225 * Reads the contents of this source as a string. 226 * 227 * @throws IOException if an I/O error occurs while reading from this source 228 */ 229 public String read() throws IOException { 230 Closer closer = Closer.create(); 231 try { 232 Reader reader = closer.register(openStream()); 233 return CharStreams.toString(reader); 234 } catch (Throwable e) { 235 throw closer.rethrow(e); 236 } finally { 237 closer.close(); 238 } 239 } 240 241 /** 242 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 243 * 244 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 245 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 246 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 247 * as if it does. 248 * 249 * @throws IOException if an I/O error occurs while reading from this source 250 */ 251 @Nullable 252 public String readFirstLine() throws IOException { 253 Closer closer = Closer.create(); 254 try { 255 BufferedReader reader = closer.register(openBufferedStream()); 256 return reader.readLine(); 257 } catch (Throwable e) { 258 throw closer.rethrow(e); 259 } finally { 260 closer.close(); 261 } 262 } 263 264 /** 265 * Reads all the lines of this source as a list of strings. The returned list will be empty if 266 * this source is empty. 267 * 268 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 269 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 270 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 271 * as if it does. 272 * 273 * @throws IOException if an I/O error occurs while reading from this source 274 */ 275 public ImmutableList<String> readLines() throws IOException { 276 Closer closer = Closer.create(); 277 try { 278 BufferedReader reader = closer.register(openBufferedStream()); 279 List<String> result = Lists.newArrayList(); 280 String line; 281 while ((line = reader.readLine()) != null) { 282 result.add(line); 283 } 284 return ImmutableList.copyOf(result); 285 } catch (Throwable e) { 286 throw closer.rethrow(e); 287 } finally { 288 closer.close(); 289 } 290 } 291 292 /** 293 * Reads lines of text from this source, processing each line as it is read using the given 294 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 295 * returns {@code false} and returns the result produced by the processor. 296 * 297 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 298 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 299 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 300 * as if it does. 301 * 302 * @throws IOException if an I/O error occurs while reading from this source or if 303 * {@code processor} throws an {@code IOException} 304 * @since 16.0 305 */ 306 @Beta 307 @CanIgnoreReturnValue // some processors won't return a useful result 308 public <T> T readLines(LineProcessor<T> processor) throws IOException { 309 checkNotNull(processor); 310 311 Closer closer = Closer.create(); 312 try { 313 Reader reader = closer.register(openStream()); 314 return CharStreams.readLines(reader, processor); 315 } catch (Throwable e) { 316 throw closer.rethrow(e); 317 } finally { 318 closer.close(); 319 } 320 } 321 322 /** 323 * Returns whether the source has zero chars. The default implementation returns true if 324 * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if 325 * the length is not known. 326 * 327 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 328 * chars are actually available for reading. This means that a source may return {@code true} from 329 * {@code isEmpty()} despite having readable content. 330 * 331 * @throws IOException if an I/O error occurs 332 * @since 15.0 333 */ 334 public boolean isEmpty() throws IOException { 335 Optional<Long> lengthIfKnown = lengthIfKnown(); 336 if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) { 337 return true; 338 } 339 Closer closer = Closer.create(); 340 try { 341 Reader reader = closer.register(openStream()); 342 return reader.read() == -1; 343 } catch (Throwable e) { 344 throw closer.rethrow(e); 345 } finally { 346 closer.close(); 347 } 348 } 349 350 /** 351 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 352 * the source will contain the concatenated data from the streams of the underlying sources. 353 * 354 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 355 * close the open underlying stream. 356 * 357 * @param sources the sources to concatenate 358 * @return a {@code CharSource} containing the concatenated data 359 * @since 15.0 360 */ 361 public static CharSource concat(Iterable<? extends CharSource> sources) { 362 return new ConcatenatedCharSource(sources); 363 } 364 365 /** 366 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 367 * the source will contain the concatenated data from the streams of the underlying sources. 368 * 369 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 370 * close the open underlying stream. 371 * 372 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 373 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 374 * eagerly fetches data for each source when iterated (rather than producing sources that only 375 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 376 * possible. 377 * 378 * @param sources the sources to concatenate 379 * @return a {@code CharSource} containing the concatenated data 380 * @throws NullPointerException if any of {@code sources} is {@code null} 381 * @since 15.0 382 */ 383 public static CharSource concat(Iterator<? extends CharSource> sources) { 384 return concat(ImmutableList.copyOf(sources)); 385 } 386 387 /** 388 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 389 * the source will contain the concatenated data from the streams of the underlying sources. 390 * 391 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 392 * close the open underlying stream. 393 * 394 * @param sources the sources to concatenate 395 * @return a {@code CharSource} containing the concatenated data 396 * @throws NullPointerException if any of {@code sources} is {@code null} 397 * @since 15.0 398 */ 399 public static CharSource concat(CharSource... sources) { 400 return concat(ImmutableList.copyOf(sources)); 401 } 402 403 /** 404 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 405 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 406 * the {@code charSequence} is mutated while it is being read, so don't do that. 407 * 408 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 409 */ 410 public static CharSource wrap(CharSequence charSequence) { 411 return new CharSequenceCharSource(charSequence); 412 } 413 414 /** 415 * Returns an immutable {@link CharSource} that contains no characters. 416 * 417 * @since 15.0 418 */ 419 public static CharSource empty() { 420 return EmptyCharSource.INSTANCE; 421 } 422 423 /** 424 * A byte source that reads chars from this source and encodes them as bytes using a charset. 425 */ 426 private final class AsByteSource extends ByteSource { 427 428 final Charset charset; 429 430 AsByteSource(Charset charset) { 431 this.charset = checkNotNull(charset); 432 } 433 434 @Override 435 public CharSource asCharSource(Charset charset) { 436 if (charset.equals(this.charset)) { 437 return CharSource.this; 438 } 439 return super.asCharSource(charset); 440 } 441 442 @Override 443 public InputStream openStream() throws IOException { 444 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 445 } 446 447 @Override 448 public String toString() { 449 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 450 } 451 } 452 453 private static class CharSequenceCharSource extends CharSource { 454 455 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 456 457 private final CharSequence seq; 458 459 protected CharSequenceCharSource(CharSequence seq) { 460 this.seq = checkNotNull(seq); 461 } 462 463 @Override 464 public Reader openStream() { 465 return new CharSequenceReader(seq); 466 } 467 468 @Override 469 public String read() { 470 return seq.toString(); 471 } 472 473 @Override 474 public boolean isEmpty() { 475 return seq.length() == 0; 476 } 477 478 @Override 479 public long length() { 480 return seq.length(); 481 } 482 483 @Override 484 public Optional<Long> lengthIfKnown() { 485 return Optional.of((long) seq.length()); 486 } 487 488 /** 489 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 490 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 491 */ 492 private Iterator<String> linesIterator() { 493 return new AbstractIterator<String>() { 494 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 495 496 @Override 497 protected String computeNext() { 498 if (lines.hasNext()) { 499 String next = lines.next(); 500 // skip last line if it's empty 501 if (lines.hasNext() || !next.isEmpty()) { 502 return next; 503 } 504 } 505 return endOfData(); 506 } 507 }; 508 } 509 510 @Override 511 public String readFirstLine() { 512 Iterator<String> lines = linesIterator(); 513 return lines.hasNext() ? lines.next() : null; 514 } 515 516 @Override 517 public ImmutableList<String> readLines() { 518 return ImmutableList.copyOf(linesIterator()); 519 } 520 521 @Override 522 public <T> T readLines(LineProcessor<T> processor) throws IOException { 523 Iterator<String> lines = linesIterator(); 524 while (lines.hasNext()) { 525 if (!processor.processLine(lines.next())) { 526 break; 527 } 528 } 529 return processor.getResult(); 530 } 531 532 @Override 533 public String toString() { 534 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 535 } 536 } 537 538 private static final class EmptyCharSource extends CharSequenceCharSource { 539 540 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 541 542 private EmptyCharSource() { 543 super(""); 544 } 545 546 @Override 547 public String toString() { 548 return "CharSource.empty()"; 549 } 550 } 551 552 private static final class ConcatenatedCharSource extends CharSource { 553 554 private final Iterable<? extends CharSource> sources; 555 556 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 557 this.sources = checkNotNull(sources); 558 } 559 560 @Override 561 public Reader openStream() throws IOException { 562 return new MultiReader(sources.iterator()); 563 } 564 565 @Override 566 public boolean isEmpty() throws IOException { 567 for (CharSource source : sources) { 568 if (!source.isEmpty()) { 569 return false; 570 } 571 } 572 return true; 573 } 574 575 @Override 576 public Optional<Long> lengthIfKnown() { 577 long result = 0L; 578 for (CharSource source : sources) { 579 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 580 if (!lengthIfKnown.isPresent()) { 581 return Optional.absent(); 582 } 583 result += lengthIfKnown.get(); 584 } 585 return Optional.of(result); 586 } 587 588 @Override 589 public long length() throws IOException { 590 long result = 0L; 591 for (CharSource source : sources) { 592 result += source.length(); 593 } 594 return result; 595 } 596 597 @Override 598 public String toString() { 599 return "CharSource.concat(" + sources + ")"; 600 } 601 } 602}