001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkNotNull; 020 021import com.google.common.annotations.Beta; 022import com.google.common.base.Ascii; 023import com.google.common.base.Optional; 024import com.google.common.base.Splitter; 025import com.google.common.collect.AbstractIterator; 026import com.google.common.collect.ImmutableList; 027import com.google.common.collect.Lists; 028 029import java.io.BufferedReader; 030import java.io.IOException; 031import java.io.Reader; 032import java.io.Writer; 033import java.nio.charset.Charset; 034import java.util.Iterator; 035import java.util.List; 036import java.util.regex.Pattern; 037 038import javax.annotation.Nullable; 039 040/** 041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 042 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 043 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 044 * 045 * <p>{@code CharSource} provides two kinds of methods: 046 * <ul> 047 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 048 * instance each time they are called. The caller is responsible for ensuring that the returned 049 * reader is closed. 050 * <li><b>Convenience methods:</b> These are implementations of common operations that are 051 * typically implemented by opening a reader using one of the methods in the first category, 052 * doing something and finally closing the reader that was opened. 053 * </ul> 054 * 055 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the 056 * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 057 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 058 * there to be an empty line at the end if the contents are terminated with a line separator. 059 * 060 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 061 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 062 * 063 * @since 14.0 064 * @author Colin Decker 065 */ 066public abstract class CharSource { 067 068 /** 069 * Constructor for use by subclasses. 070 */ 071 protected CharSource() {} 072 073 /** 074 * Opens a new {@link Reader} for reading from this source. This method should return a new, 075 * independent reader each time it is called. 076 * 077 * <p>The caller is responsible for ensuring that the returned reader is closed. 078 * 079 * @throws IOException if an I/O error occurs in the process of opening the reader 080 */ 081 public abstract Reader openStream() throws IOException; 082 083 /** 084 * Opens a new {@link BufferedReader} for reading from this source. This method should return a 085 * new, independent reader each time it is called. 086 * 087 * <p>The caller is responsible for ensuring that the returned reader is closed. 088 * 089 * @throws IOException if an I/O error occurs in the process of opening the reader 090 */ 091 public BufferedReader openBufferedStream() throws IOException { 092 Reader reader = openStream(); 093 return (reader instanceof BufferedReader) 094 ? (BufferedReader) reader 095 : new BufferedReader(reader); 096 } 097 098 /** 099 * Returns the size of this source in chars, if the size can be easily determined without 100 * actually opening the data stream. 101 * 102 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 103 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 104 * <i>possible</i> that this method will return a different number of chars than would be 105 * returned by reading all of the chars. 106 * 107 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read 108 * may return a different number of chars if the contents are changed. 109 * 110 * @since 19.0 111 */ 112 @Beta 113 public Optional<Long> lengthIfKnown() { 114 return Optional.absent(); 115 } 116 117 /** 118 * Returns the length of this source in chars, even if doing so requires opening and traversing 119 * an entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 120 * 121 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. 122 * If absent, it will fall back to a heavyweight operation that will open a stream, 123 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 124 * that were skipped. 125 * 126 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 127 * implementation, it is <i>possible</i> that this method will return a different number of chars 128 * than would be returned by reading all of the chars. 129 * 130 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 131 * number of chars if the contents are changed. 132 * 133 * @throws IOException if an I/O error occurs in the process of reading the length of this source 134 * @since 19.0 135 */ 136 @Beta 137 public long length() throws IOException { 138 Optional<Long> lengthIfKnown = lengthIfKnown(); 139 if (lengthIfKnown.isPresent()) { 140 return lengthIfKnown.get(); 141 } 142 143 Closer closer = Closer.create(); 144 try { 145 Reader reader = closer.register(openStream()); 146 return countBySkipping(reader); 147 } catch (Throwable e) { 148 throw closer.rethrow(e); 149 } finally { 150 closer.close(); 151 } 152 } 153 154 private long countBySkipping(Reader reader) throws IOException { 155 long count = 0; 156 long read; 157 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 158 count += read; 159 } 160 return count; 161 } 162 163 /** 164 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 165 * Does not close {@code appendable} if it is {@code Closeable}. 166 * 167 * @throws IOException if an I/O error occurs in the process of reading from this source or 168 * writing to {@code appendable} 169 */ 170 public long copyTo(Appendable appendable) throws IOException { 171 checkNotNull(appendable); 172 173 Closer closer = Closer.create(); 174 try { 175 Reader reader = closer.register(openStream()); 176 return CharStreams.copy(reader, appendable); 177 } catch (Throwable e) { 178 throw closer.rethrow(e); 179 } finally { 180 closer.close(); 181 } 182 } 183 184 /** 185 * Copies the contents of this source to the given sink. 186 * 187 * @throws IOException if an I/O error occurs in the process of reading from this source or 188 * writing to {@code sink} 189 */ 190 public long copyTo(CharSink sink) throws IOException { 191 checkNotNull(sink); 192 193 Closer closer = Closer.create(); 194 try { 195 Reader reader = closer.register(openStream()); 196 Writer writer = closer.register(sink.openStream()); 197 return CharStreams.copy(reader, writer); 198 } catch (Throwable e) { 199 throw closer.rethrow(e); 200 } finally { 201 closer.close(); 202 } 203 } 204 205 /** 206 * Reads the contents of this source as a string. 207 * 208 * @throws IOException if an I/O error occurs in the process of reading from this source 209 */ 210 public String read() throws IOException { 211 Closer closer = Closer.create(); 212 try { 213 Reader reader = closer.register(openStream()); 214 return CharStreams.toString(reader); 215 } catch (Throwable e) { 216 throw closer.rethrow(e); 217 } finally { 218 closer.close(); 219 } 220 } 221 222 /** 223 * Reads the first link of this source as a string. Returns {@code null} if this source is empty. 224 * 225 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 226 * {@code \r\n}, does not include the line separator in the returned line and does not consider 227 * there to be an extra empty line at the end if the content is terminated with a line separator. 228 * 229 * @throws IOException if an I/O error occurs in the process of reading from this source 230 */ 231 @Nullable public String readFirstLine() throws IOException { 232 Closer closer = Closer.create(); 233 try { 234 BufferedReader reader = closer.register(openBufferedStream()); 235 return reader.readLine(); 236 } catch (Throwable e) { 237 throw closer.rethrow(e); 238 } finally { 239 closer.close(); 240 } 241 } 242 243 /** 244 * Reads all the lines of this source as a list of strings. The returned list will be empty if 245 * this source is empty. 246 * 247 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 248 * {@code \r\n}, does not include the line separator in the returned lines and does not consider 249 * there to be an extra empty line at the end if the content is terminated with a line separator. 250 * 251 * @throws IOException if an I/O error occurs in the process of reading from this source 252 */ 253 public ImmutableList<String> readLines() throws IOException { 254 Closer closer = Closer.create(); 255 try { 256 BufferedReader reader = closer.register(openBufferedStream()); 257 List<String> result = Lists.newArrayList(); 258 String line; 259 while ((line = reader.readLine()) != null) { 260 result.add(line); 261 } 262 return ImmutableList.copyOf(result); 263 } catch (Throwable e) { 264 throw closer.rethrow(e); 265 } finally { 266 closer.close(); 267 } 268 } 269 270 /** 271 * Reads lines of text from this source, processing each line as it is read using the given 272 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 273 * returns {@code false} and returns the result produced by the processor. 274 * 275 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 276 * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor} 277 * and does not consider there to be an extra empty line at the end if the content is terminated 278 * with a line separator. 279 * 280 * @throws IOException if an I/O error occurs in the process of reading from this source or if 281 * {@code processor} throws an {@code IOException} 282 * @since 16.0 283 */ 284 @Beta 285 public <T> T readLines(LineProcessor<T> processor) throws IOException { 286 checkNotNull(processor); 287 288 Closer closer = Closer.create(); 289 try { 290 Reader reader = closer.register(openStream()); 291 return CharStreams.readLines(reader, processor); 292 } catch (Throwable e) { 293 throw closer.rethrow(e); 294 } finally { 295 closer.close(); 296 } 297 } 298 299 /** 300 * Returns whether the source has zero chars. The default implementation returns true if 301 * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking 302 * for EOF if the length is not known. 303 * 304 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 305 * chars are actually available for reading. This means that a source may return {@code true} from 306 * {@code isEmpty()} despite having readable content. 307 * 308 * @throws IOException if an I/O error occurs 309 * @since 15.0 310 */ 311 public boolean isEmpty() throws IOException { 312 Optional<Long> lengthIfKnown = lengthIfKnown(); 313 if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) { 314 return true; 315 } 316 Closer closer = Closer.create(); 317 try { 318 Reader reader = closer.register(openStream()); 319 return reader.read() == -1; 320 } catch (Throwable e) { 321 throw closer.rethrow(e); 322 } finally { 323 closer.close(); 324 } 325 } 326 327 /** 328 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 329 * the source will contain the concatenated data from the streams of the underlying sources. 330 * 331 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 332 * close the open underlying stream. 333 * 334 * @param sources the sources to concatenate 335 * @return a {@code CharSource} containing the concatenated data 336 * @since 15.0 337 */ 338 public static CharSource concat(Iterable<? extends CharSource> sources) { 339 return new ConcatenatedCharSource(sources); 340 } 341 342 /** 343 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 344 * the source will contain the concatenated data from the streams of the underlying sources. 345 * 346 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 347 * close the open underlying stream. 348 * 349 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 350 * method is called. This will fail if the iterator is infinite and may cause problems if the 351 * iterator eagerly fetches data for each source when iterated (rather than producing sources 352 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 353 * overload if possible. 354 * 355 * @param sources the sources to concatenate 356 * @return a {@code CharSource} containing the concatenated data 357 * @throws NullPointerException if any of {@code sources} is {@code null} 358 * @since 15.0 359 */ 360 public static CharSource concat(Iterator<? extends CharSource> sources) { 361 return concat(ImmutableList.copyOf(sources)); 362 } 363 364 /** 365 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 366 * the source will contain the concatenated data from the streams of the underlying sources. 367 * 368 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 369 * close the open underlying stream. 370 * 371 * @param sources the sources to concatenate 372 * @return a {@code CharSource} containing the concatenated data 373 * @throws NullPointerException if any of {@code sources} is {@code null} 374 * @since 15.0 375 */ 376 public static CharSource concat(CharSource... sources) { 377 return concat(ImmutableList.copyOf(sources)); 378 } 379 380 /** 381 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 382 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 383 * the {@code charSequence} is mutated while it is being read, so don't do that. 384 * 385 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 386 */ 387 public static CharSource wrap(CharSequence charSequence) { 388 return new CharSequenceCharSource(charSequence); 389 } 390 391 /** 392 * Returns an immutable {@link CharSource} that contains no characters. 393 * 394 * @since 15.0 395 */ 396 public static CharSource empty() { 397 return EmptyCharSource.INSTANCE; 398 } 399 400 private static class CharSequenceCharSource extends CharSource { 401 402 private static final Splitter LINE_SPLITTER 403 = Splitter.on(Pattern.compile("\r\n|\n|\r")); 404 405 private final CharSequence seq; 406 407 protected CharSequenceCharSource(CharSequence seq) { 408 this.seq = checkNotNull(seq); 409 } 410 411 @Override 412 public Reader openStream() { 413 return new CharSequenceReader(seq); 414 } 415 416 @Override 417 public String read() { 418 return seq.toString(); 419 } 420 421 @Override 422 public boolean isEmpty() { 423 return seq.length() == 0; 424 } 425 426 @Override 427 public long length() { 428 return seq.length(); 429 } 430 431 @Override 432 public Optional<Long> lengthIfKnown() { 433 return Optional.of((long) seq.length()); 434 } 435 436 /** 437 * Returns an iterable over the lines in the string. If the string ends in 438 * a newline, a final empty string is not included to match the behavior of 439 * BufferedReader/LineReader.readLine(). 440 */ 441 private Iterable<String> lines() { 442 return new Iterable<String>() { 443 @Override 444 public Iterator<String> iterator() { 445 return new AbstractIterator<String>() { 446 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 447 448 @Override 449 protected String computeNext() { 450 if (lines.hasNext()) { 451 String next = lines.next(); 452 // skip last line if it's empty 453 if (lines.hasNext() || !next.isEmpty()) { 454 return next; 455 } 456 } 457 return endOfData(); 458 } 459 }; 460 } 461 }; 462 } 463 464 @Override 465 public String readFirstLine() { 466 Iterator<String> lines = lines().iterator(); 467 return lines.hasNext() ? lines.next() : null; 468 } 469 470 @Override 471 public ImmutableList<String> readLines() { 472 return ImmutableList.copyOf(lines()); 473 } 474 475 @Override 476 public <T> T readLines(LineProcessor<T> processor) throws IOException { 477 for (String line : lines()) { 478 if (!processor.processLine(line)) { 479 break; 480 } 481 } 482 return processor.getResult(); 483 } 484 485 @Override 486 public String toString() { 487 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 488 } 489 } 490 491 private static final class EmptyCharSource extends CharSequenceCharSource { 492 493 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 494 495 private EmptyCharSource() { 496 super(""); 497 } 498 499 @Override 500 public String toString() { 501 return "CharSource.empty()"; 502 } 503 } 504 505 private static final class ConcatenatedCharSource extends CharSource { 506 507 private final Iterable<? extends CharSource> sources; 508 509 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 510 this.sources = checkNotNull(sources); 511 } 512 513 @Override 514 public Reader openStream() throws IOException { 515 return new MultiReader(sources.iterator()); 516 } 517 518 @Override 519 public boolean isEmpty() throws IOException { 520 for (CharSource source : sources) { 521 if (!source.isEmpty()) { 522 return false; 523 } 524 } 525 return true; 526 } 527 528 @Override 529 public Optional<Long> lengthIfKnown() { 530 long result = 0L; 531 for (CharSource source : sources) { 532 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 533 if (!lengthIfKnown.isPresent()) { 534 return Optional.absent(); 535 } 536 result += lengthIfKnown.get(); 537 } 538 return Optional.of(result); 539 } 540 541 @Override 542 public long length() throws IOException { 543 long result = 0L; 544 for (CharSource source : sources) { 545 result += source.length(); 546 } 547 return result; 548 } 549 550 @Override 551 public String toString() { 552 return "CharSource.concat(" + sources + ")"; 553 } 554 } 555}