001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkNotNull; 020 021import com.google.common.annotations.Beta; 022import com.google.common.base.Ascii; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.Reader; 031import java.io.Writer; 032import java.nio.charset.Charset; 033import java.util.Iterator; 034import java.util.List; 035import java.util.regex.Pattern; 036 037import javax.annotation.Nullable; 038 039/** 040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 041 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 042 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 043 * 044 * <p>{@code CharSource} provides two kinds of methods: 045 * <ul> 046 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 047 * instance each time they are called. The caller is responsible for ensuring that the returned 048 * reader is closed. 049 * <li><b>Convenience methods:</b> These are implementations of common operations that are 050 * typically implemented by opening a reader using one of the methods in the first category, 051 * doing something and finally closing the reader that was opened. 052 * </ul> 053 * 054 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the 055 * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 056 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 057 * there to be an empty line at the end if the contents are terminated with a line separator. 058 * 059 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 060 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 061 * 062 * @since 14.0 063 * @author Colin Decker 064 */ 065public abstract class CharSource { 066 067 /** 068 * Constructor for use by subclasses. 069 */ 070 protected CharSource() {} 071 072 /** 073 * Opens a new {@link Reader} for reading from this source. This method should return a new, 074 * independent reader each time it is called. 075 * 076 * <p>The caller is responsible for ensuring that the returned reader is closed. 077 * 078 * @throws IOException if an I/O error occurs in the process of opening the reader 079 */ 080 public abstract Reader openStream() throws IOException; 081 082 /** 083 * Opens a new {@link BufferedReader} for reading from this source. This method should return a 084 * new, independent reader each time it is called. 085 * 086 * <p>The caller is responsible for ensuring that the returned reader is closed. 087 * 088 * @throws IOException if an I/O error occurs in the process of opening the reader 089 */ 090 public BufferedReader openBufferedStream() throws IOException { 091 Reader reader = openStream(); 092 return (reader instanceof BufferedReader) 093 ? (BufferedReader) reader 094 : new BufferedReader(reader); 095 } 096 097 /** 098 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 099 * Does not close {@code appendable} if it is {@code Closeable}. 100 * 101 * @throws IOException if an I/O error occurs in the process of reading from this source or 102 * writing to {@code appendable} 103 */ 104 public long copyTo(Appendable appendable) throws IOException { 105 checkNotNull(appendable); 106 107 Closer closer = Closer.create(); 108 try { 109 Reader reader = closer.register(openStream()); 110 return CharStreams.copy(reader, appendable); 111 } catch (Throwable e) { 112 throw closer.rethrow(e); 113 } finally { 114 closer.close(); 115 } 116 } 117 118 /** 119 * Copies the contents of this source to the given sink. 120 * 121 * @throws IOException if an I/O error occurs in the process of reading from this source or 122 * writing to {@code sink} 123 */ 124 public long copyTo(CharSink sink) throws IOException { 125 checkNotNull(sink); 126 127 Closer closer = Closer.create(); 128 try { 129 Reader reader = closer.register(openStream()); 130 Writer writer = closer.register(sink.openStream()); 131 return CharStreams.copy(reader, writer); 132 } catch (Throwable e) { 133 throw closer.rethrow(e); 134 } finally { 135 closer.close(); 136 } 137 } 138 139 /** 140 * Reads the contents of this source as a string. 141 * 142 * @throws IOException if an I/O error occurs in the process of reading from this source 143 */ 144 public String read() throws IOException { 145 Closer closer = Closer.create(); 146 try { 147 Reader reader = closer.register(openStream()); 148 return CharStreams.toString(reader); 149 } catch (Throwable e) { 150 throw closer.rethrow(e); 151 } finally { 152 closer.close(); 153 } 154 } 155 156 /** 157 * Reads the first link of this source as a string. Returns {@code null} if this source is empty. 158 * 159 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 160 * {@code \r\n}, does not include the line separator in the returned line and does not consider 161 * there to be an extra empty line at the end if the content is terminated with a line separator. 162 * 163 * @throws IOException if an I/O error occurs in the process of reading from this source 164 */ 165 public @Nullable String readFirstLine() throws IOException { 166 Closer closer = Closer.create(); 167 try { 168 BufferedReader reader = closer.register(openBufferedStream()); 169 return reader.readLine(); 170 } catch (Throwable e) { 171 throw closer.rethrow(e); 172 } finally { 173 closer.close(); 174 } 175 } 176 177 /** 178 * Reads all the lines of this source as a list of strings. The returned list will be empty if 179 * this source is empty. 180 * 181 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 182 * {@code \r\n}, does not include the line separator in the returned lines and does not consider 183 * there to be an extra empty line at the end if the content is terminated with a line separator. 184 * 185 * @throws IOException if an I/O error occurs in the process of reading from this source 186 */ 187 public ImmutableList<String> readLines() throws IOException { 188 Closer closer = Closer.create(); 189 try { 190 BufferedReader reader = closer.register(openBufferedStream()); 191 List<String> result = Lists.newArrayList(); 192 String line; 193 while ((line = reader.readLine()) != null) { 194 result.add(line); 195 } 196 return ImmutableList.copyOf(result); 197 } catch (Throwable e) { 198 throw closer.rethrow(e); 199 } finally { 200 closer.close(); 201 } 202 } 203 204 /** 205 * Reads lines of text from this source, processing each line as it is read using the given 206 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 207 * returns {@code false} and returns the result produced by the processor. 208 * 209 * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or 210 * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor} 211 * and does not consider there to be an extra empty line at the end if the content is terminated 212 * with a line separator. 213 * 214 * @throws IOException if an I/O error occurs in the process of reading from this source or if 215 * {@code processor} throws an {@code IOException} 216 * @since 16.0 217 */ 218 @Beta 219 public <T> T readLines(LineProcessor<T> processor) throws IOException { 220 checkNotNull(processor); 221 222 Closer closer = Closer.create(); 223 try { 224 Reader reader = closer.register(openStream()); 225 return CharStreams.readLines(reader, processor); 226 } catch (Throwable e) { 227 throw closer.rethrow(e); 228 } finally { 229 closer.close(); 230 } 231 } 232 233 /** 234 * Returns whether the source has zero chars. The default implementation is to open a stream and 235 * check for EOF. 236 * 237 * @throws IOException if an I/O error occurs 238 * @since 15.0 239 */ 240 public boolean isEmpty() throws IOException { 241 Closer closer = Closer.create(); 242 try { 243 Reader reader = closer.register(openStream()); 244 return reader.read() == -1; 245 } catch (Throwable e) { 246 throw closer.rethrow(e); 247 } finally { 248 closer.close(); 249 } 250 } 251 252 /** 253 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 254 * the source will contain the concatenated data from the streams of the underlying sources. 255 * 256 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 257 * close the open underlying stream. 258 * 259 * @param sources the sources to concatenate 260 * @return a {@code CharSource} containing the concatenated data 261 * @since 15.0 262 */ 263 public static CharSource concat(Iterable<? extends CharSource> sources) { 264 return new ConcatenatedCharSource(sources); 265 } 266 267 /** 268 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 269 * the source will contain the concatenated data from the streams of the underlying sources. 270 * 271 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 272 * close the open underlying stream. 273 * 274 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 275 * method is called. This will fail if the iterator is infinite and may cause problems if the 276 * iterator eagerly fetches data for each source when iterated (rather than producing sources 277 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 278 * overload if possible. 279 * 280 * @param sources the sources to concatenate 281 * @return a {@code CharSource} containing the concatenated data 282 * @throws NullPointerException if any of {@code sources} is {@code null} 283 * @since 15.0 284 */ 285 public static CharSource concat(Iterator<? extends CharSource> sources) { 286 return concat(ImmutableList.copyOf(sources)); 287 } 288 289 /** 290 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 291 * the source will contain the concatenated data from the streams of the underlying sources. 292 * 293 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 294 * close the open underlying stream. 295 * 296 * @param sources the sources to concatenate 297 * @return a {@code CharSource} containing the concatenated data 298 * @throws NullPointerException if any of {@code sources} is {@code null} 299 * @since 15.0 300 */ 301 public static CharSource concat(CharSource... sources) { 302 return concat(ImmutableList.copyOf(sources)); 303 } 304 305 /** 306 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 307 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 308 * the {@code charSequence} is mutated while it is being read, so don't do that. 309 * 310 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 311 */ 312 public static CharSource wrap(CharSequence charSequence) { 313 return new CharSequenceCharSource(charSequence); 314 } 315 316 /** 317 * Returns an immutable {@link CharSource} that contains no characters. 318 * 319 * @since 15.0 320 */ 321 public static CharSource empty() { 322 return EmptyCharSource.INSTANCE; 323 } 324 325 private static class CharSequenceCharSource extends CharSource { 326 327 private static final Splitter LINE_SPLITTER 328 = Splitter.on(Pattern.compile("\r\n|\n|\r")); 329 330 private final CharSequence seq; 331 332 protected CharSequenceCharSource(CharSequence seq) { 333 this.seq = checkNotNull(seq); 334 } 335 336 @Override 337 public Reader openStream() { 338 return new CharSequenceReader(seq); 339 } 340 341 @Override 342 public String read() { 343 return seq.toString(); 344 } 345 346 @Override 347 public boolean isEmpty() { 348 return seq.length() == 0; 349 } 350 351 /** 352 * Returns an iterable over the lines in the string. If the string ends in 353 * a newline, a final empty string is not included to match the behavior of 354 * BufferedReader/LineReader.readLine(). 355 */ 356 private Iterable<String> lines() { 357 return new Iterable<String>() { 358 @Override 359 public Iterator<String> iterator() { 360 return new AbstractIterator<String>() { 361 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 362 363 @Override 364 protected String computeNext() { 365 if (lines.hasNext()) { 366 String next = lines.next(); 367 // skip last line if it's empty 368 if (lines.hasNext() || !next.isEmpty()) { 369 return next; 370 } 371 } 372 return endOfData(); 373 } 374 }; 375 } 376 }; 377 } 378 379 @Override 380 public String readFirstLine() { 381 Iterator<String> lines = lines().iterator(); 382 return lines.hasNext() ? lines.next() : null; 383 } 384 385 @Override 386 public ImmutableList<String> readLines() { 387 return ImmutableList.copyOf(lines()); 388 } 389 390 @Override 391 public <T> T readLines(LineProcessor<T> processor) throws IOException { 392 for (String line : lines()) { 393 if (!processor.processLine(line)) { 394 break; 395 } 396 } 397 return processor.getResult(); 398 } 399 400 @Override 401 public String toString() { 402 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 403 } 404 } 405 406 private static final class EmptyCharSource extends CharSequenceCharSource { 407 408 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 409 410 private EmptyCharSource() { 411 super(""); 412 } 413 414 @Override 415 public String toString() { 416 return "CharSource.empty()"; 417 } 418 } 419 420 private static final class ConcatenatedCharSource extends CharSource { 421 422 private final Iterable<? extends CharSource> sources; 423 424 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 425 this.sources = checkNotNull(sources); 426 } 427 428 @Override 429 public Reader openStream() throws IOException { 430 return new MultiReader(sources.iterator()); 431 } 432 433 @Override 434 public boolean isEmpty() throws IOException { 435 for (CharSource source : sources) { 436 if (!source.isEmpty()) { 437 return false; 438 } 439 } 440 return true; 441 } 442 443 @Override 444 public String toString() { 445 return "CharSource.concat(" + sources + ")"; 446 } 447 } 448}