001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.Writer; 033import java.nio.charset.Charset; 034import java.util.Iterator; 035import java.util.List; 036import javax.annotation.Nullable; 037 038/** 039 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 040 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 041 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 042 * 043 * <p>{@code CharSource} provides two kinds of methods: 044 * <ul> 045 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 046 * instance each time they are called. The caller is responsible for ensuring that the returned 047 * reader is closed. 048 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically 049 * implemented by opening a reader using one of the methods in the first category, doing 050 * something and finally closing the reader that was opened. 051 * </ul> 052 * 053 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 054 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 055 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 056 * there to be an empty line at the end if the contents are terminated with a line separator. 057 * 058 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 059 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 060 * 061 * @since 14.0 062 * @author Colin Decker 063 */ 064@GwtIncompatible 065public abstract class CharSource { 066 067 /** 068 * Constructor for use by subclasses. 069 */ 070 protected CharSource() {} 071 072 /** 073 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 074 * as bytes using the given {@link Charset}. 075 * 076 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 077 * the default implementation of this method will ensure that the original {@code CharSource} is 078 * returned, rather than round-trip encoding. Subclasses that override this method should behave 079 * the same way. 080 * 081 * @since 20.0 082 */ 083 @Beta 084 public ByteSource asByteSource(Charset charset) { 085 return new AsByteSource(charset); 086 } 087 088 /** 089 * Opens a new {@link Reader} for reading from this source. This method should return a new, 090 * independent reader each time it is called. 091 * 092 * <p>The caller is responsible for ensuring that the returned reader is closed. 093 * 094 * @throws IOException if an I/O error occurs in the process of opening the reader 095 */ 096 public abstract Reader openStream() throws IOException; 097 098 /** 099 * Opens a new {@link BufferedReader} for reading from this source. This method should return a 100 * new, independent reader each time it is called. 101 * 102 * <p>The caller is responsible for ensuring that the returned reader is closed. 103 * 104 * @throws IOException if an I/O error occurs in the process of opening the reader 105 */ 106 public BufferedReader openBufferedStream() throws IOException { 107 Reader reader = openStream(); 108 return (reader instanceof BufferedReader) 109 ? (BufferedReader) reader 110 : new BufferedReader(reader); 111 } 112 113 /** 114 * Returns the size of this source in chars, if the size can be easily determined without actually 115 * opening the data stream. 116 * 117 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 118 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 119 * <i>possible</i> that this method will return a different number of chars than would be returned 120 * by reading all of the chars. 121 * 122 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 123 * return a different number of chars if the contents are changed. 124 * 125 * @since 19.0 126 */ 127 @Beta 128 public Optional<Long> lengthIfKnown() { 129 return Optional.absent(); 130 } 131 132 /** 133 * Returns the length of this source in chars, even if doing so requires opening and traversing an 134 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 135 * 136 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 137 * absent, it will fall back to a heavyweight operation that will open a stream, 138 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 139 * that were skipped. 140 * 141 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 142 * implementation, it is <i>possible</i> that this method will return a different number of chars 143 * than would be returned by reading all of the chars. 144 * 145 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 146 * number of chars if the contents are changed. 147 * 148 * @throws IOException if an I/O error occurs in the process of reading the length of this source 149 * @since 19.0 150 */ 151 @Beta 152 public long length() throws IOException { 153 Optional<Long> lengthIfKnown = lengthIfKnown(); 154 if (lengthIfKnown.isPresent()) { 155 return lengthIfKnown.get(); 156 } 157 158 Closer closer = Closer.create(); 159 try { 160 Reader reader = closer.register(openStream()); 161 return countBySkipping(reader); 162 } catch (Throwable e) { 163 throw closer.rethrow(e); 164 } finally { 165 closer.close(); 166 } 167 } 168 169 private long countBySkipping(Reader reader) throws IOException { 170 long count = 0; 171 long read; 172 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 173 count += read; 174 } 175 return count; 176 } 177 178 /** 179 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 180 * Does not close {@code appendable} if it is {@code Closeable}. 181 * 182 * @return the number of characters copied 183 * @throws IOException if an I/O error occurs in the process of reading from this source or 184 * writing to {@code appendable} 185 */ 186 @CanIgnoreReturnValue 187 public long copyTo(Appendable appendable) throws IOException { 188 checkNotNull(appendable); 189 190 Closer closer = Closer.create(); 191 try { 192 Reader reader = closer.register(openStream()); 193 return CharStreams.copy(reader, appendable); 194 } catch (Throwable e) { 195 throw closer.rethrow(e); 196 } finally { 197 closer.close(); 198 } 199 } 200 201 /** 202 * Copies the contents of this source to the given sink. 203 * 204 * @return the number of characters copied 205 * @throws IOException if an I/O error occurs in the process of reading from this source or 206 * writing to {@code sink} 207 */ 208 @CanIgnoreReturnValue 209 public long copyTo(CharSink sink) throws IOException { 210 checkNotNull(sink); 211 212 Closer closer = Closer.create(); 213 try { 214 Reader reader = closer.register(openStream()); 215 Writer writer = closer.register(sink.openStream()); 216 return CharStreams.copy(reader, writer); 217 } catch (Throwable e) { 218 throw closer.rethrow(e); 219 } finally { 220 closer.close(); 221 } 222 } 223 224 /** 225 * Reads the contents of this source as a string. 226 * 227 * @throws IOException if an I/O error occurs in the process of reading from this source 228 */ 229 public String read() throws IOException { 230 Closer closer = Closer.create(); 231 try { 232 Reader reader = closer.register(openStream()); 233 return CharStreams.toString(reader); 234 } catch (Throwable e) { 235 throw closer.rethrow(e); 236 } finally { 237 closer.close(); 238 } 239 } 240 241 /** 242 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 243 * 244 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 245 * {@code \r\n}, does not include the line separator in the returned line and does not consider 246 * there to be an extra empty line at the end if the content is terminated with a line separator. 247 * 248 * @throws IOException if an I/O error occurs in the process of reading from this source 249 */ 250 @Nullable 251 public String readFirstLine() throws IOException { 252 Closer closer = Closer.create(); 253 try { 254 BufferedReader reader = closer.register(openBufferedStream()); 255 return reader.readLine(); 256 } catch (Throwable e) { 257 throw closer.rethrow(e); 258 } finally { 259 closer.close(); 260 } 261 } 262 263 /** 264 * Reads all the lines of this source as a list of strings. The returned list will be empty if 265 * this source is empty. 266 * 267 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 268 * {@code \r\n}, does not include the line separator in the returned lines and does not consider 269 * there to be an extra empty line at the end if the content is terminated with a line separator. 270 * 271 * @throws IOException if an I/O error occurs in the process of reading from this source 272 */ 273 public ImmutableList<String> readLines() throws IOException { 274 Closer closer = Closer.create(); 275 try { 276 BufferedReader reader = closer.register(openBufferedStream()); 277 List<String> result = Lists.newArrayList(); 278 String line; 279 while ((line = reader.readLine()) != null) { 280 result.add(line); 281 } 282 return ImmutableList.copyOf(result); 283 } catch (Throwable e) { 284 throw closer.rethrow(e); 285 } finally { 286 closer.close(); 287 } 288 } 289 290 /** 291 * Reads lines of text from this source, processing each line as it is read using the given 292 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 293 * returns {@code false} and returns the result produced by the processor. 294 * 295 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 296 * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor} 297 * and does not consider there to be an extra empty line at the end if the content is terminated 298 * with a line separator. 299 * 300 * @throws IOException if an I/O error occurs in the process of reading from this source or if 301 * {@code processor} throws an {@code IOException} 302 * @since 16.0 303 */ 304 @Beta 305 @CanIgnoreReturnValue // some processors won't return a useful result 306 public <T> T readLines(LineProcessor<T> processor) throws IOException { 307 checkNotNull(processor); 308 309 Closer closer = Closer.create(); 310 try { 311 Reader reader = closer.register(openStream()); 312 return CharStreams.readLines(reader, processor); 313 } catch (Throwable e) { 314 throw closer.rethrow(e); 315 } finally { 316 closer.close(); 317 } 318 } 319 320 /** 321 * Returns whether the source has zero chars. The default implementation returns true if 322 * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if 323 * the length is not known. 324 * 325 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 326 * chars are actually available for reading. This means that a source may return {@code true} from 327 * {@code isEmpty()} despite having readable content. 328 * 329 * @throws IOException if an I/O error occurs 330 * @since 15.0 331 */ 332 public boolean isEmpty() throws IOException { 333 Optional<Long> lengthIfKnown = lengthIfKnown(); 334 if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) { 335 return true; 336 } 337 Closer closer = Closer.create(); 338 try { 339 Reader reader = closer.register(openStream()); 340 return reader.read() == -1; 341 } catch (Throwable e) { 342 throw closer.rethrow(e); 343 } finally { 344 closer.close(); 345 } 346 } 347 348 /** 349 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 350 * the source will contain the concatenated data from the streams of the underlying sources. 351 * 352 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 353 * close the open underlying stream. 354 * 355 * @param sources the sources to concatenate 356 * @return a {@code CharSource} containing the concatenated data 357 * @since 15.0 358 */ 359 public static CharSource concat(Iterable<? extends CharSource> sources) { 360 return new ConcatenatedCharSource(sources); 361 } 362 363 /** 364 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 365 * the source will contain the concatenated data from the streams of the underlying sources. 366 * 367 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 368 * close the open underlying stream. 369 * 370 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 371 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 372 * eagerly fetches data for each source when iterated (rather than producing sources that only 373 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 374 * possible. 375 * 376 * @param sources the sources to concatenate 377 * @return a {@code CharSource} containing the concatenated data 378 * @throws NullPointerException if any of {@code sources} is {@code null} 379 * @since 15.0 380 */ 381 public static CharSource concat(Iterator<? extends CharSource> sources) { 382 return concat(ImmutableList.copyOf(sources)); 383 } 384 385 /** 386 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 387 * the source will contain the concatenated data from the streams of the underlying sources. 388 * 389 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 390 * close the open underlying stream. 391 * 392 * @param sources the sources to concatenate 393 * @return a {@code CharSource} containing the concatenated data 394 * @throws NullPointerException if any of {@code sources} is {@code null} 395 * @since 15.0 396 */ 397 public static CharSource concat(CharSource... sources) { 398 return concat(ImmutableList.copyOf(sources)); 399 } 400 401 /** 402 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 403 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 404 * the {@code charSequence} is mutated while it is being read, so don't do that. 405 * 406 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 407 */ 408 public static CharSource wrap(CharSequence charSequence) { 409 return new CharSequenceCharSource(charSequence); 410 } 411 412 /** 413 * Returns an immutable {@link CharSource} that contains no characters. 414 * 415 * @since 15.0 416 */ 417 public static CharSource empty() { 418 return EmptyCharSource.INSTANCE; 419 } 420 421 /** 422 * A byte source that reads chars from this source and encodes them as bytes using a charset. 423 */ 424 private final class AsByteSource extends ByteSource { 425 426 final Charset charset; 427 428 AsByteSource(Charset charset) { 429 this.charset = checkNotNull(charset); 430 } 431 432 @Override 433 public CharSource asCharSource(Charset charset) { 434 if (charset.equals(this.charset)) { 435 return CharSource.this; 436 } 437 return super.asCharSource(charset); 438 } 439 440 @Override 441 public InputStream openStream() throws IOException { 442 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 443 } 444 445 @Override 446 public String toString() { 447 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 448 } 449 } 450 451 private static class CharSequenceCharSource extends CharSource { 452 453 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 454 455 private final CharSequence seq; 456 457 protected CharSequenceCharSource(CharSequence seq) { 458 this.seq = checkNotNull(seq); 459 } 460 461 @Override 462 public Reader openStream() { 463 return new CharSequenceReader(seq); 464 } 465 466 @Override 467 public String read() { 468 return seq.toString(); 469 } 470 471 @Override 472 public boolean isEmpty() { 473 return seq.length() == 0; 474 } 475 476 @Override 477 public long length() { 478 return seq.length(); 479 } 480 481 @Override 482 public Optional<Long> lengthIfKnown() { 483 return Optional.of((long) seq.length()); 484 } 485 486 /** 487 * Returns an iterable over the lines in the string. If the string ends in a newline, a final 488 * empty string is not included to match the behavior of BufferedReader/LineReader.readLine(). 489 */ 490 private Iterable<String> lines() { 491 return new Iterable<String>() { 492 @Override 493 public Iterator<String> iterator() { 494 return new AbstractIterator<String>() { 495 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 496 497 @Override 498 protected String computeNext() { 499 if (lines.hasNext()) { 500 String next = lines.next(); 501 // skip last line if it's empty 502 if (lines.hasNext() || !next.isEmpty()) { 503 return next; 504 } 505 } 506 return endOfData(); 507 } 508 }; 509 } 510 }; 511 } 512 513 @Override 514 public String readFirstLine() { 515 Iterator<String> lines = lines().iterator(); 516 return lines.hasNext() ? lines.next() : null; 517 } 518 519 @Override 520 public ImmutableList<String> readLines() { 521 return ImmutableList.copyOf(lines()); 522 } 523 524 @Override 525 public <T> T readLines(LineProcessor<T> processor) throws IOException { 526 for (String line : lines()) { 527 if (!processor.processLine(line)) { 528 break; 529 } 530 } 531 return processor.getResult(); 532 } 533 534 @Override 535 public String toString() { 536 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 537 } 538 } 539 540 private static final class EmptyCharSource extends CharSequenceCharSource { 541 542 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 543 544 private EmptyCharSource() { 545 super(""); 546 } 547 548 @Override 549 public String toString() { 550 return "CharSource.empty()"; 551 } 552 } 553 554 private static final class ConcatenatedCharSource extends CharSource { 555 556 private final Iterable<? extends CharSource> sources; 557 558 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 559 this.sources = checkNotNull(sources); 560 } 561 562 @Override 563 public Reader openStream() throws IOException { 564 return new MultiReader(sources.iterator()); 565 } 566 567 @Override 568 public boolean isEmpty() throws IOException { 569 for (CharSource source : sources) { 570 if (!source.isEmpty()) { 571 return false; 572 } 573 } 574 return true; 575 } 576 577 @Override 578 public Optional<Long> lengthIfKnown() { 579 long result = 0L; 580 for (CharSource source : sources) { 581 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 582 if (!lengthIfKnown.isPresent()) { 583 return Optional.absent(); 584 } 585 result += lengthIfKnown.get(); 586 } 587 return Optional.of(result); 588 } 589 590 @Override 591 public long length() throws IOException { 592 long result = 0L; 593 for (CharSource source : sources) { 594 result += source.length(); 595 } 596 return result; 597 } 598 599 @Override 600 public String toString() { 601 return "CharSource.concat(" + sources + ")"; 602 } 603 } 604}