001/* 002 * Copyright (C) 2008 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.base; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndex; 020 021import com.google.common.annotations.GwtCompatible; 022import com.google.common.annotations.GwtIncompatible; 023import com.google.common.annotations.VisibleForTesting; 024import java.util.Arrays; 025import java.util.BitSet; 026 027/** 028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does 029 * for any {@link Object}. Also offers basic text processing methods based on this function. 030 * Implementations are strongly encouraged to be side-effect-free and immutable. 031 * 032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean 033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}". 034 * 035 * <p><b>Warning:</b> This class deals only with {@code char} values; it does not understand 036 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical 037 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher} 038 * treats these just as two separate characters. 039 * 040 * <p>Example usages: 041 * 042 * <pre> 043 * String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput); 044 * if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> 045 * 046 * <p>See the Guava User Guide article on <a 047 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher} 048 * </a>. 049 * 050 * @author Kevin Bourrillion 051 * @since 1.0 052 */ 053@GwtCompatible(emulated = true) 054public abstract class CharMatcher implements Predicate<Character> { 055 /* 056 * N777777777NO 057 * N7777777777777N 058 * M777777777777777N 059 * $N877777777D77777M 060 * N M77777777ONND777M 061 * MN777777777NN D777 062 * N7ZN777777777NN ~M7778 063 * N777777777777MMNN88777N 064 * N777777777777MNZZZ7777O 065 * DZN7777O77777777777777 066 * N7OONND7777777D77777N 067 * 8$M++++?N???$77777$ 068 * M7++++N+M77777777N 069 * N77O777777777777$ M 070 * DNNM$$$$777777N D 071 * N$N:=N$777N7777M NZ 072 * 77Z::::N777777777 ODZZZ 073 * 77N::::::N77777777M NNZZZ$ 074 * $777:::::::77777777MN ZM8ZZZZZ 075 * 777M::::::Z7777777Z77 N++ZZZZNN 076 * 7777M:::::M7777777$777M $++IZZZZM 077 * M777$:::::N777777$M7777M +++++ZZZDN 078 * NN$::::::7777$$M777777N N+++ZZZZNZ 079 * N::::::N:7$O:77777777 N++++ZZZZN 080 * M::::::::::::N77777777+ +?+++++ZZZM 081 * 8::::::::::::D77777777M O+++++ZZ 082 * ::::::::::::M777777777N O+?D 083 * M:::::::::::M77777777778 77= 084 * D=::::::::::N7777777777N 777 085 * INN===::::::=77777777777N I777N 086 * ?777N========N7777777777787M N7777 087 * 77777$D======N77777777777N777N? N777777 088 * I77777$$$N7===M$$77777777$77777777$MMZ77777777N 089 * $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON 090 * M$$$$$$$$M M$$$$$$$$N=N$$$$7777777$$$ND 091 * O77Z$$$$$$$ M$$$$$$$$MNI==$DNNNNM=~N 092 * 7 :N MNN$$$$M$ $$$777$8 8D8I 093 * NMM.:7O 777777778 094 * 7777777MN 095 * M NO .7: 096 * M : M 097 * 8 098 */ 099 100 // Constant matcher factory methods 101 102 /** 103 * Matches any character. 104 * 105 * @since 19.0 (since 1.0 as constant {@code ANY}) 106 */ 107 public static CharMatcher any() { 108 return Any.INSTANCE; 109 } 110 111 /** 112 * Matches no characters. 113 * 114 * @since 19.0 (since 1.0 as constant {@code NONE}) 115 */ 116 public static CharMatcher none() { 117 return None.INSTANCE; 118 } 119 120 /** 121 * Determines whether a character is whitespace according to the latest Unicode standard, as 122 * illustrated 123 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 124 * This is not the same definition used by other Java APIs. (See a 125 * <a href="https://goo.gl/Y6SLWx">comparison of several definitions of 126 * "whitespace"</a>.) 127 * 128 * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to 129 * date. 130 * 131 * @since 19.0 (since 1.0 as constant {@code WHITESPACE}) 132 */ 133 public static CharMatcher whitespace() { 134 return Whitespace.INSTANCE; 135 } 136 137 /** 138 * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 139 * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a 140 * discussion of that term. 141 * 142 * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE}) 143 */ 144 public static CharMatcher breakingWhitespace() { 145 return BreakingWhitespace.INSTANCE; 146 } 147 148 /** 149 * Determines whether a character is ASCII, meaning that its code point is less than 128. 150 * 151 * @since 19.0 (since 1.0 as constant {@code ASCII}) 152 */ 153 public static CharMatcher ascii() { 154 return Ascii.INSTANCE; 155 } 156 157 /** 158 * Determines whether a character is a digit according to 159 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If 160 * you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 161 * 162 * @since 19.0 (since 1.0 as constant {@code DIGIT}) 163 */ 164 public static CharMatcher digit() { 165 return Digit.INSTANCE; 166 } 167 168 /** 169 * Determines whether a character is a digit according to {@linkplain Character#isDigit(char) 170 * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0', 171 * '9')}. 172 * 173 * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT}) 174 */ 175 public static CharMatcher javaDigit() { 176 return JavaDigit.INSTANCE; 177 } 178 179 /** 180 * Determines whether a character is a letter according to {@linkplain Character#isLetter(char) 181 * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code 182 * inRange('a', 'z').or(inRange('A', 'Z'))}. 183 * 184 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER}) 185 */ 186 public static CharMatcher javaLetter() { 187 return JavaLetter.INSTANCE; 188 } 189 190 /** 191 * Determines whether a character is a letter or digit according to 192 * {@linkplain Character#isLetterOrDigit(char) Java's definition}. 193 * 194 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}). 195 */ 196 public static CharMatcher javaLetterOrDigit() { 197 return JavaLetterOrDigit.INSTANCE; 198 } 199 200 /** 201 * Determines whether a character is upper case according to 202 * {@linkplain Character#isUpperCase(char) Java's definition}. 203 * 204 * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE}) 205 */ 206 public static CharMatcher javaUpperCase() { 207 return JavaUpperCase.INSTANCE; 208 } 209 210 /** 211 * Determines whether a character is lower case according to 212 * {@linkplain Character#isLowerCase(char) Java's definition}. 213 * 214 * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE}) 215 */ 216 public static CharMatcher javaLowerCase() { 217 return JavaLowerCase.INSTANCE; 218 } 219 220 /** 221 * Determines whether a character is an ISO control character as specified by 222 * {@link Character#isISOControl(char)}. 223 * 224 * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL}) 225 */ 226 public static CharMatcher javaIsoControl() { 227 return JavaIsoControl.INSTANCE; 228 } 229 230 /** 231 * Determines whether a character is invisible; that is, if its Unicode category is any of 232 * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 233 * PRIVATE_USE according to ICU4J. 234 * 235 * @since 19.0 (since 1.0 as constant {@code INVISIBLE}) 236 */ 237 public static CharMatcher invisible() { 238 return Invisible.INSTANCE; 239 } 240 241 /** 242 * Determines whether a character is single-width (not double-width). When in doubt, this matcher 243 * errs on the side of returning {@code false} (that is, it tends to assume a character is 244 * double-width). 245 * 246 * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to 247 * date. 248 * 249 * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH}) 250 */ 251 public static CharMatcher singleWidth() { 252 return SingleWidth.INSTANCE; 253 } 254 255 // Legacy constants 256 257 /** 258 * Determines whether a character is whitespace according to the latest Unicode 259 * standard, as illustrated 260 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 261 * This is not the same definition used by other Java APIs. (See a 262 * <a href="https://goo.gl/Y6SLWx">comparison of several definitions of 263 * "whitespace"</a>.) 264 * 265 * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant 266 * to keep it up to date. 267 * 268 * @deprecated Use {@link #whitespace()} instead. This constant is scheduled to be 269 * removed in June 2018. 270 */ 271 @Deprecated 272 public static final CharMatcher WHITESPACE = whitespace(); 273 274 /** 275 * Determines whether a character is a breaking whitespace (that is, a whitespace 276 * which can be interpreted as a break between words for formatting purposes). See 277 * {@link #whitespace} for a discussion of that term. 278 * 279 * @since 2.0 280 * @deprecated Use {@link #breakingWhitespace()} instead. This constant is scheduled 281 * to be removed in June 2018. 282 */ 283 @Deprecated 284 public static final CharMatcher BREAKING_WHITESPACE = breakingWhitespace(); 285 286 /** 287 * Determines whether a character is ASCII, meaning that its code point is less than 288 * 128. 289 * 290 * @deprecated Use {@link #ascii()} instead. This constant is scheduled to be 291 * removed in June 2018. 292 */ 293 @Deprecated 294 public static final CharMatcher ASCII = ascii(); 295 296 /** 297 * Determines whether a character is a digit according to 298 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D"> 299 * Unicode</a>. If you only care to match ASCII digits, you can use 300 * {@code inRange('0', '9')}. 301 * 302 * @deprecated Use {@link #digit()} instead. This constant is scheduled to be 303 * removed in June 2018. 304 */ 305 @Deprecated 306 public static final CharMatcher DIGIT = digit(); 307 308 /** 309 * Determines whether a character is a digit according to 310 * {@linkplain Character#isDigit(char) Java's definition}. If you only care to match 311 * ASCII digits, you can use {@code inRange('0', '9')}. 312 * 313 * @deprecated Use {@link #javaDigit()} instead. This constant is scheduled to be 314 * removed in June 2018. 315 */ 316 @Deprecated 317 public static final CharMatcher JAVA_DIGIT = javaDigit(); 318 319 /** 320 * Determines whether a character is a letter according to 321 * {@linkplain Character#isLetter(char) Java's definition}. If you only care to 322 * match letters of the Latin alphabet, you can use 323 * {@code inRange('a', 'z').or(inRange('A', 'Z'))}. 324 * 325 * @deprecated Use {@link #javaLetter()} instead. This constant is scheduled to be 326 * removed in June 2018. 327 */ 328 @Deprecated 329 public static final CharMatcher JAVA_LETTER = javaLetter(); 330 331 /** 332 * Determines whether a character is a letter or digit according to 333 * {@linkplain Character#isLetterOrDigit(char) Java's definition}. 334 * 335 * @deprecated Use {@link #javaLetterOrDigit()} instead. This constant is scheduled 336 * to be removed in June 2018. 337 */ 338 @Deprecated 339 public static final CharMatcher JAVA_LETTER_OR_DIGIT = javaLetterOrDigit(); 340 341 /** 342 * Determines whether a character is upper case according to 343 * {@linkplain Character#isUpperCase(char) Java's definition}. 344 * 345 * @deprecated Use {@link #javaUpperCase()} instead. This constant is scheduled to 346 * be removed in June 2018. 347 */ 348 @Deprecated 349 public static final CharMatcher JAVA_UPPER_CASE = javaUpperCase(); 350 351 /** 352 * Determines whether a character is lower case according to 353 * {@linkplain Character#isLowerCase(char) Java's definition}. 354 * 355 * @deprecated Use {@link #javaLowerCase()} instead. This constant is scheduled to 356 * be removed in June 2018. 357 */ 358 @Deprecated 359 public static final CharMatcher JAVA_LOWER_CASE = javaLowerCase(); 360 361 /** 362 * Determines whether a character is an ISO control character as specified by 363 * {@link Character#isISOControl(char)}. 364 * 365 * @deprecated Use {@link #javaIsoControl()} instead. This constant is scheduled to 366 * be removed in June 2018. 367 */ 368 @Deprecated 369 public static final CharMatcher JAVA_ISO_CONTROL = javaIsoControl(); 370 371 /** 372 * Determines whether a character is invisible; that is, if its Unicode category is 373 * any of SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, 374 * SURROGATE, and PRIVATE_USE according to ICU4J. 375 * 376 * @deprecated Use {@link #invisible()} instead. This constant is scheduled to be 377 * removed in June 2018. 378 */ 379 @Deprecated 380 public static final CharMatcher INVISIBLE = invisible(); 381 382 /** 383 * Determines whether a character is single-width (not double-width). When in doubt, 384 * this matcher errs on the side of returning {@code false} (that is, it tends to 385 * assume a character is double-width). 386 * 387 * <p><b>Note:</b> as the reference file evolves, we will modify this constant to 388 * keep it up to date. 389 * 390 * @deprecated Use {@link #singleWidth()} instead. This constant is scheduled to be 391 * removed in June 2018. 392 */ 393 @Deprecated 394 public static final CharMatcher SINGLE_WIDTH = singleWidth(); 395 396 /** 397 * Matches any character. 398 * 399 * @deprecated Use {@link #any()} instead. This constant is scheduled to be 400 * removed in June 2018. 401 */ 402 @Deprecated 403 public static final CharMatcher ANY = any(); 404 405 /** 406 * Matches no characters. 407 * 408 * @deprecated Use {@link #none()} instead. This constant is scheduled to be 409 * removed in June 2018. 410 */ 411 @Deprecated 412 public static final CharMatcher NONE = none(); 413 414 // Static factories 415 416 /** 417 * Returns a {@code char} matcher that matches only one specified character. 418 */ 419 public static CharMatcher is(final char match) { 420 return new Is(match); 421 } 422 423 /** 424 * Returns a {@code char} matcher that matches any character except the one specified. 425 * 426 * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 427 */ 428 public static CharMatcher isNot(final char match) { 429 return new IsNot(match); 430 } 431 432 /** 433 * Returns a {@code char} matcher that matches any character present in the given character 434 * sequence. 435 */ 436 public static CharMatcher anyOf(final CharSequence sequence) { 437 switch (sequence.length()) { 438 case 0: 439 return none(); 440 case 1: 441 return is(sequence.charAt(0)); 442 case 2: 443 return isEither(sequence.charAt(0), sequence.charAt(1)); 444 default: 445 // TODO(lowasser): is it potentially worth just going ahead and building a precomputed 446 // matcher? 447 return new AnyOf(sequence); 448 } 449 } 450 451 /** 452 * Returns a {@code char} matcher that matches any character not present in the given character 453 * sequence. 454 */ 455 public static CharMatcher noneOf(CharSequence sequence) { 456 return anyOf(sequence).negate(); 457 } 458 459 /** 460 * Returns a {@code char} matcher that matches any character in a given range (both endpoints are 461 * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code 462 * CharMatcher.inRange('a', 'z')}. 463 * 464 * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 465 */ 466 public static CharMatcher inRange(final char startInclusive, final char endInclusive) { 467 return new InRange(startInclusive, endInclusive); 468 } 469 470 /** 471 * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but 472 * which operates on primitive {@code char} instances instead. 473 */ 474 public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { 475 return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate); 476 } 477 478 // Constructors 479 480 /** 481 * Constructor for use by subclasses. When subclassing, you may want to override 482 * {@code toString()} to provide a useful description. 483 */ 484 protected CharMatcher() {} 485 486 // Abstract methods 487 488 /** Determines a true or false value for the given character. */ 489 public abstract boolean matches(char c); 490 491 // Non-static factories 492 493 /** 494 * Returns a matcher that matches any character not matched by this matcher. 495 */ 496 public CharMatcher negate() { 497 return new Negated(this); 498 } 499 500 /** 501 * Returns a matcher that matches any character matched by both this matcher and {@code other}. 502 */ 503 public CharMatcher and(CharMatcher other) { 504 return new And(this, other); 505 } 506 507 /** 508 * Returns a matcher that matches any character matched by either this matcher or {@code other}. 509 */ 510 public CharMatcher or(CharMatcher other) { 511 return new Or(this, other); 512 } 513 514 /** 515 * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to 516 * query than the original; your mileage may vary. Precomputation takes time and is likely to be 517 * worthwhile only if the precomputed matcher is queried many thousands of times. 518 * 519 * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a 520 * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a 521 * worthwhile tradeoff in a browser. 522 */ 523 public CharMatcher precomputed() { 524 return Platform.precomputeCharMatcher(this); 525 } 526 527 private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; 528 529 /** 530 * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method 531 * on {@link Platform} so that we can have different behavior in GWT. 532 * 533 * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the 534 * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables 535 * for matchers that only match a few characters, and so on. In the worst-case scenario, it 536 * constructs an eight-kilobyte bit array and queries that. In many situations this produces a 537 * matcher which is faster to query than the original. 538 */ 539 @GwtIncompatible // SmallCharMatcher 540 CharMatcher precomputedInternal() { 541 final BitSet table = new BitSet(); 542 setBits(table); 543 int totalCharacters = table.cardinality(); 544 if (totalCharacters * 2 <= DISTINCT_CHARS) { 545 return precomputedPositive(totalCharacters, table, toString()); 546 } else { 547 // TODO(lowasser): is it worth it to worry about the last character of large matchers? 548 table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 549 int negatedCharacters = DISTINCT_CHARS - totalCharacters; 550 String suffix = ".negate()"; 551 final String description = toString(); 552 String negatedDescription = 553 description.endsWith(suffix) 554 ? description.substring(0, description.length() - suffix.length()) 555 : description + suffix; 556 return new NegatedFastMatcher( 557 precomputedPositive(negatedCharacters, table, negatedDescription)) { 558 @Override 559 public String toString() { 560 return description; 561 } 562 }; 563 } 564 } 565 566 /** 567 * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper. 568 */ 569 @GwtIncompatible // SmallCharMatcher 570 private static CharMatcher precomputedPositive( 571 int totalCharacters, BitSet table, String description) { 572 switch (totalCharacters) { 573 case 0: 574 return none(); 575 case 1: 576 return is((char) table.nextSetBit(0)); 577 case 2: 578 char c1 = (char) table.nextSetBit(0); 579 char c2 = (char) table.nextSetBit(c1 + 1); 580 return isEither(c1, c2); 581 default: 582 return isSmall(totalCharacters, table.length()) 583 ? SmallCharMatcher.from(table, description) 584 : new BitSetMatcher(table, description); 585 } 586 } 587 588 @GwtIncompatible // SmallCharMatcher 589 private static boolean isSmall(int totalCharacters, int tableLength) { 590 return totalCharacters <= SmallCharMatcher.MAX_SIZE 591 && tableLength > (totalCharacters * 4 * Character.SIZE); 592 // err on the side of BitSetMatcher 593 } 594 595 /** 596 * Sets bits in {@code table} matched by this matcher. 597 */ 598 @GwtIncompatible // used only from other GwtIncompatible code 599 void setBits(BitSet table) { 600 for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) { 601 if (matches((char) c)) { 602 table.set(c); 603 } 604 } 605 } 606 607 // Text processing routines 608 609 /** 610 * Returns {@code true} if a character sequence contains at least one matching character. 611 * Equivalent to {@code !matchesNoneOf(sequence)}. 612 * 613 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 614 * character, until this returns {@code true} or the end is reached. 615 * 616 * @param sequence the character sequence to examine, possibly empty 617 * @return {@code true} if this matcher matches at least one character in the sequence 618 * @since 8.0 619 */ 620 public boolean matchesAnyOf(CharSequence sequence) { 621 return !matchesNoneOf(sequence); 622 } 623 624 /** 625 * Returns {@code true} if a character sequence contains only matching characters. 626 * 627 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 628 * character, until this returns {@code false} or the end is reached. 629 * 630 * @param sequence the character sequence to examine, possibly empty 631 * @return {@code true} if this matcher matches every character in the sequence, including when 632 * the sequence is empty 633 */ 634 public boolean matchesAllOf(CharSequence sequence) { 635 for (int i = sequence.length() - 1; i >= 0; i--) { 636 if (!matches(sequence.charAt(i))) { 637 return false; 638 } 639 } 640 return true; 641 } 642 643 /** 644 * Returns {@code true} if a character sequence contains no matching characters. Equivalent to 645 * {@code !matchesAnyOf(sequence)}. 646 * 647 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 648 * character, until this returns {@code true} or the end is reached. 649 * 650 * @param sequence the character sequence to examine, possibly empty 651 * @return {@code true} if this matcher matches no characters in the sequence, including when 652 * the sequence is empty 653 */ 654 public boolean matchesNoneOf(CharSequence sequence) { 655 return indexIn(sequence) == -1; 656 } 657 658 /** 659 * Returns the index of the first matching character in a character sequence, or {@code -1} if no 660 * matching character is present. 661 * 662 * <p>The default implementation iterates over the sequence in forward order calling 663 * {@link #matches} for each character. 664 * 665 * @param sequence the character sequence to examine from the beginning 666 * @return an index, or {@code -1} if no character matches 667 */ 668 public int indexIn(CharSequence sequence) { 669 return indexIn(sequence, 0); 670 } 671 672 /** 673 * Returns the index of the first matching character in a character sequence, starting from a 674 * given position, or {@code -1} if no character matches after that position. 675 * 676 * <p>The default implementation iterates over the sequence in forward order, beginning at {@code 677 * start}, calling {@link #matches} for each character. 678 * 679 * @param sequence the character sequence to examine 680 * @param start the first index to examine; must be nonnegative and no greater than {@code 681 * sequence.length()} 682 * @return the index of the first matching character, guaranteed to be no less than {@code start}, 683 * or {@code -1} if no character matches 684 * @throws IndexOutOfBoundsException if start is negative or greater than {@code 685 * sequence.length()} 686 */ 687 public int indexIn(CharSequence sequence, int start) { 688 int length = sequence.length(); 689 checkPositionIndex(start, length); 690 for (int i = start; i < length; i++) { 691 if (matches(sequence.charAt(i))) { 692 return i; 693 } 694 } 695 return -1; 696 } 697 698 /** 699 * Returns the index of the last matching character in a character sequence, or {@code -1} if no 700 * matching character is present. 701 * 702 * <p>The default implementation iterates over the sequence in reverse order calling 703 * {@link #matches} for each character. 704 * 705 * @param sequence the character sequence to examine from the end 706 * @return an index, or {@code -1} if no character matches 707 */ 708 public int lastIndexIn(CharSequence sequence) { 709 for (int i = sequence.length() - 1; i >= 0; i--) { 710 if (matches(sequence.charAt(i))) { 711 return i; 712 } 713 } 714 return -1; 715 } 716 717 /** 718 * Returns the number of matching characters found in a character sequence. 719 */ 720 public int countIn(CharSequence sequence) { 721 int count = 0; 722 for (int i = 0; i < sequence.length(); i++) { 723 if (matches(sequence.charAt(i))) { 724 count++; 725 } 726 } 727 return count; 728 } 729 730 /** 731 * Returns a string containing all non-matching characters of a character sequence, in order. For 732 * example: <pre> {@code 733 * 734 * CharMatcher.is('a').removeFrom("bazaar")}</pre> 735 * 736 * ... returns {@code "bzr"}. 737 */ 738 public String removeFrom(CharSequence sequence) { 739 String string = sequence.toString(); 740 int pos = indexIn(string); 741 if (pos == -1) { 742 return string; 743 } 744 745 char[] chars = string.toCharArray(); 746 int spread = 1; 747 748 // This unusual loop comes from extensive benchmarking 749 OUT: 750 while (true) { 751 pos++; 752 while (true) { 753 if (pos == chars.length) { 754 break OUT; 755 } 756 if (matches(chars[pos])) { 757 break; 758 } 759 chars[pos - spread] = chars[pos]; 760 pos++; 761 } 762 spread++; 763 } 764 return new String(chars, 0, pos - spread); 765 } 766 767 /** 768 * Returns a string containing all matching characters of a character sequence, in order. For 769 * example: <pre> {@code 770 * 771 * CharMatcher.is('a').retainFrom("bazaar")}</pre> 772 * 773 * ... returns {@code "aaa"}. 774 */ 775 public String retainFrom(CharSequence sequence) { 776 return negate().removeFrom(sequence); 777 } 778 779 /** 780 * Returns a string copy of the input character sequence, with each character that matches this 781 * matcher replaced by a given replacement character. For example: <pre> {@code 782 * 783 * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre> 784 * 785 * ... returns {@code "rodor"}. 786 * 787 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 788 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 789 * character. 790 * 791 * @param sequence the character sequence to replace matching characters in 792 * @param replacement the character to append to the result string in place of each matching 793 * character in {@code sequence} 794 * @return the new string 795 */ 796 public String replaceFrom(CharSequence sequence, char replacement) { 797 String string = sequence.toString(); 798 int pos = indexIn(string); 799 if (pos == -1) { 800 return string; 801 } 802 char[] chars = string.toCharArray(); 803 chars[pos] = replacement; 804 for (int i = pos + 1; i < chars.length; i++) { 805 if (matches(chars[i])) { 806 chars[i] = replacement; 807 } 808 } 809 return new String(chars); 810 } 811 812 /** 813 * Returns a string copy of the input character sequence, with each character that matches this 814 * matcher replaced by a given replacement sequence. For example: <pre> {@code 815 * 816 * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre> 817 * 818 * ... returns {@code "yoohoo"}. 819 * 820 * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better 821 * off calling {@link #replaceFrom(CharSequence, char)} directly. 822 * 823 * @param sequence the character sequence to replace matching characters in 824 * @param replacement the characters to append to the result string in place of each matching 825 * character in {@code sequence} 826 * @return the new string 827 */ 828 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 829 int replacementLen = replacement.length(); 830 if (replacementLen == 0) { 831 return removeFrom(sequence); 832 } 833 if (replacementLen == 1) { 834 return replaceFrom(sequence, replacement.charAt(0)); 835 } 836 837 String string = sequence.toString(); 838 int pos = indexIn(string); 839 if (pos == -1) { 840 return string; 841 } 842 843 int len = string.length(); 844 StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); 845 846 int oldpos = 0; 847 do { 848 buf.append(string, oldpos, pos); 849 buf.append(replacement); 850 oldpos = pos + 1; 851 pos = indexIn(string, oldpos); 852 } while (pos != -1); 853 854 buf.append(string, oldpos, len); 855 return buf.toString(); 856 } 857 858 /** 859 * Returns a substring of the input character sequence that omits all characters this matcher 860 * matches from the beginning and from the end of the string. For example: <pre> {@code 861 * 862 * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre> 863 * 864 * ... returns {@code "cat"}. 865 * 866 * <p>Note that: <pre> {@code 867 * 868 * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre> 869 * 870 * ... is equivalent to {@link String#trim()}. 871 */ 872 public String trimFrom(CharSequence sequence) { 873 int len = sequence.length(); 874 int first; 875 int last; 876 877 for (first = 0; first < len; first++) { 878 if (!matches(sequence.charAt(first))) { 879 break; 880 } 881 } 882 for (last = len - 1; last > first; last--) { 883 if (!matches(sequence.charAt(last))) { 884 break; 885 } 886 } 887 888 return sequence.subSequence(first, last + 1).toString(); 889 } 890 891 /** 892 * Returns a substring of the input character sequence that omits all characters this matcher 893 * matches from the beginning of the string. For example: <pre> {@code 894 * 895 * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre> 896 * 897 * ... returns {@code "catbab"}. 898 */ 899 public String trimLeadingFrom(CharSequence sequence) { 900 int len = sequence.length(); 901 for (int first = 0; first < len; first++) { 902 if (!matches(sequence.charAt(first))) { 903 return sequence.subSequence(first, len).toString(); 904 } 905 } 906 return ""; 907 } 908 909 /** 910 * Returns a substring of the input character sequence that omits all characters this matcher 911 * matches from the end of the string. For example: <pre> {@code 912 * 913 * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre> 914 * 915 * ... returns {@code "abacat"}. 916 */ 917 public String trimTrailingFrom(CharSequence sequence) { 918 int len = sequence.length(); 919 for (int last = len - 1; last >= 0; last--) { 920 if (!matches(sequence.charAt(last))) { 921 return sequence.subSequence(0, last + 1).toString(); 922 } 923 } 924 return ""; 925 } 926 927 /** 928 * Returns a string copy of the input character sequence, with each group of consecutive 929 * characters that match this matcher replaced by a single replacement character. For example: 930 * <pre> {@code 931 * 932 * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre> 933 * 934 * ... returns {@code "b-p-r"}. 935 * 936 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 937 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 938 * character. 939 * 940 * @param sequence the character sequence to replace matching groups of characters in 941 * @param replacement the character to append to the result string in place of each group of 942 * matching characters in {@code sequence} 943 * @return the new string 944 */ 945 public String collapseFrom(CharSequence sequence, char replacement) { 946 // This implementation avoids unnecessary allocation. 947 int len = sequence.length(); 948 for (int i = 0; i < len; i++) { 949 char c = sequence.charAt(i); 950 if (matches(c)) { 951 if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { 952 // a no-op replacement 953 i++; 954 } else { 955 StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement); 956 return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); 957 } 958 } 959 } 960 // no replacement needed 961 return sequence.toString(); 962 } 963 964 /** 965 * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that 966 * groups of matching characters at the start or end of the sequence are removed without 967 * replacement. 968 */ 969 public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 970 // This implementation avoids unnecessary allocation. 971 int len = sequence.length(); 972 int first = 0; 973 int last = len - 1; 974 975 while (first < len && matches(sequence.charAt(first))) { 976 first++; 977 } 978 979 while (last > first && matches(sequence.charAt(last))) { 980 last--; 981 } 982 983 return (first == 0 && last == len - 1) 984 ? collapseFrom(sequence, replacement) 985 : finishCollapseFrom( 986 sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false); 987 } 988 989 private String finishCollapseFrom( 990 CharSequence sequence, 991 int start, 992 int end, 993 char replacement, 994 StringBuilder builder, 995 boolean inMatchingGroup) { 996 for (int i = start; i < end; i++) { 997 char c = sequence.charAt(i); 998 if (matches(c)) { 999 if (!inMatchingGroup) { 1000 builder.append(replacement); 1001 inMatchingGroup = true; 1002 } 1003 } else { 1004 builder.append(c); 1005 inMatchingGroup = false; 1006 } 1007 } 1008 return builder.toString(); 1009 } 1010 1011 /** 1012 * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches} 1013 * instead. 1014 */ 1015 @Deprecated 1016 @Override 1017 public boolean apply(Character character) { 1018 return matches(character); 1019 } 1020 1021 /** 1022 * Returns a string representation of this {@code CharMatcher}, such as 1023 * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. 1024 */ 1025 @Override 1026 public String toString() { 1027 return super.toString(); 1028 } 1029 1030 /** 1031 * Returns the Java Unicode escape sequence for the given character, in the form "\u12AB" where 1032 * "12AB" is the four hexadecimal digits representing the 16 bits of the UTF-16 character. 1033 */ 1034 private static String showCharacter(char c) { 1035 String hex = "0123456789ABCDEF"; 1036 char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; 1037 for (int i = 0; i < 4; i++) { 1038 tmp[5 - i] = hex.charAt(c & 0xF); 1039 c = (char) (c >> 4); 1040 } 1041 return String.copyValueOf(tmp); 1042 } 1043 1044 // Fast matchers 1045 1046 /** A matcher for which precomputation will not yield any significant benefit. */ 1047 abstract static class FastMatcher extends CharMatcher { 1048 1049 @Override 1050 public final CharMatcher precomputed() { 1051 return this; 1052 } 1053 1054 @Override 1055 public CharMatcher negate() { 1056 return new NegatedFastMatcher(this); 1057 } 1058 } 1059 1060 /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */ 1061 abstract static class NamedFastMatcher extends FastMatcher { 1062 1063 private final String description; 1064 1065 NamedFastMatcher(String description) { 1066 this.description = checkNotNull(description); 1067 } 1068 1069 @Override 1070 public final String toString() { 1071 return description; 1072 } 1073 } 1074 1075 /** Negation of a {@link FastMatcher}. */ 1076 static class NegatedFastMatcher extends Negated { 1077 1078 NegatedFastMatcher(CharMatcher original) { 1079 super(original); 1080 } 1081 1082 @Override 1083 public final CharMatcher precomputed() { 1084 return this; 1085 } 1086 } 1087 1088 /** Fast matcher using a {@link BitSet} table of matching characters. */ 1089 @GwtIncompatible // used only from other GwtIncompatible code 1090 private static final class BitSetMatcher extends NamedFastMatcher { 1091 1092 private final BitSet table; 1093 1094 private BitSetMatcher(BitSet table, String description) { 1095 super(description); 1096 if (table.length() + Long.SIZE < table.size()) { 1097 table = (BitSet) table.clone(); 1098 // If only we could actually call BitSet.trimToSize() ourselves... 1099 } 1100 this.table = table; 1101 } 1102 1103 @Override 1104 public boolean matches(char c) { 1105 return table.get(c); 1106 } 1107 1108 @Override 1109 void setBits(BitSet bitSet) { 1110 bitSet.or(table); 1111 } 1112 } 1113 1114 // Static constant implementation classes 1115 1116 /** Implementation of {@link #any()}. */ 1117 private static final class Any extends NamedFastMatcher { 1118 1119 static final Any INSTANCE = new Any(); 1120 1121 private Any() { 1122 super("CharMatcher.any()"); 1123 } 1124 1125 @Override 1126 public boolean matches(char c) { 1127 return true; 1128 } 1129 1130 @Override 1131 public int indexIn(CharSequence sequence) { 1132 return (sequence.length() == 0) ? -1 : 0; 1133 } 1134 1135 @Override 1136 public int indexIn(CharSequence sequence, int start) { 1137 int length = sequence.length(); 1138 checkPositionIndex(start, length); 1139 return (start == length) ? -1 : start; 1140 } 1141 1142 @Override 1143 public int lastIndexIn(CharSequence sequence) { 1144 return sequence.length() - 1; 1145 } 1146 1147 @Override 1148 public boolean matchesAllOf(CharSequence sequence) { 1149 checkNotNull(sequence); 1150 return true; 1151 } 1152 1153 @Override 1154 public boolean matchesNoneOf(CharSequence sequence) { 1155 return sequence.length() == 0; 1156 } 1157 1158 @Override 1159 public String removeFrom(CharSequence sequence) { 1160 checkNotNull(sequence); 1161 return ""; 1162 } 1163 1164 @Override 1165 public String replaceFrom(CharSequence sequence, char replacement) { 1166 char[] array = new char[sequence.length()]; 1167 Arrays.fill(array, replacement); 1168 return new String(array); 1169 } 1170 1171 @Override 1172 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1173 StringBuilder result = new StringBuilder(sequence.length() * replacement.length()); 1174 for (int i = 0; i < sequence.length(); i++) { 1175 result.append(replacement); 1176 } 1177 return result.toString(); 1178 } 1179 1180 @Override 1181 public String collapseFrom(CharSequence sequence, char replacement) { 1182 return (sequence.length() == 0) ? "" : String.valueOf(replacement); 1183 } 1184 1185 @Override 1186 public String trimFrom(CharSequence sequence) { 1187 checkNotNull(sequence); 1188 return ""; 1189 } 1190 1191 @Override 1192 public int countIn(CharSequence sequence) { 1193 return sequence.length(); 1194 } 1195 1196 @Override 1197 public CharMatcher and(CharMatcher other) { 1198 return checkNotNull(other); 1199 } 1200 1201 @Override 1202 public CharMatcher or(CharMatcher other) { 1203 checkNotNull(other); 1204 return this; 1205 } 1206 1207 @Override 1208 public CharMatcher negate() { 1209 return none(); 1210 } 1211 } 1212 1213 /** Implementation of {@link #none()}. */ 1214 private static final class None extends NamedFastMatcher { 1215 1216 static final None INSTANCE = new None(); 1217 1218 private None() { 1219 super("CharMatcher.none()"); 1220 } 1221 1222 @Override 1223 public boolean matches(char c) { 1224 return false; 1225 } 1226 1227 @Override 1228 public int indexIn(CharSequence sequence) { 1229 checkNotNull(sequence); 1230 return -1; 1231 } 1232 1233 @Override 1234 public int indexIn(CharSequence sequence, int start) { 1235 int length = sequence.length(); 1236 checkPositionIndex(start, length); 1237 return -1; 1238 } 1239 1240 @Override 1241 public int lastIndexIn(CharSequence sequence) { 1242 checkNotNull(sequence); 1243 return -1; 1244 } 1245 1246 @Override 1247 public boolean matchesAllOf(CharSequence sequence) { 1248 return sequence.length() == 0; 1249 } 1250 1251 @Override 1252 public boolean matchesNoneOf(CharSequence sequence) { 1253 checkNotNull(sequence); 1254 return true; 1255 } 1256 1257 @Override 1258 public String removeFrom(CharSequence sequence) { 1259 return sequence.toString(); 1260 } 1261 1262 @Override 1263 public String replaceFrom(CharSequence sequence, char replacement) { 1264 return sequence.toString(); 1265 } 1266 1267 @Override 1268 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1269 checkNotNull(replacement); 1270 return sequence.toString(); 1271 } 1272 1273 @Override 1274 public String collapseFrom(CharSequence sequence, char replacement) { 1275 return sequence.toString(); 1276 } 1277 1278 @Override 1279 public String trimFrom(CharSequence sequence) { 1280 return sequence.toString(); 1281 } 1282 1283 @Override 1284 public String trimLeadingFrom(CharSequence sequence) { 1285 return sequence.toString(); 1286 } 1287 1288 @Override 1289 public String trimTrailingFrom(CharSequence sequence) { 1290 return sequence.toString(); 1291 } 1292 1293 @Override 1294 public int countIn(CharSequence sequence) { 1295 checkNotNull(sequence); 1296 return 0; 1297 } 1298 1299 @Override 1300 public CharMatcher and(CharMatcher other) { 1301 checkNotNull(other); 1302 return this; 1303 } 1304 1305 @Override 1306 public CharMatcher or(CharMatcher other) { 1307 return checkNotNull(other); 1308 } 1309 1310 @Override 1311 public CharMatcher negate() { 1312 return any(); 1313 } 1314 } 1315 1316 /** Implementation of {@link #whitespace()}. */ 1317 @VisibleForTesting 1318 static final class Whitespace extends NamedFastMatcher { 1319 1320 static final String TABLE = 1321 "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000" 1322 + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680" 1323 + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009" 1324 + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000"; 1325 static final int MULTIPLIER = 1682554634; 1326 static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1); 1327 1328 static final Whitespace INSTANCE = new Whitespace(); 1329 1330 Whitespace() { 1331 super("CharMatcher.whitespace()"); 1332 } 1333 1334 @Override 1335 public boolean matches(char c) { 1336 return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c; 1337 } 1338 1339 @GwtIncompatible // used only from other GwtIncompatible code 1340 @Override 1341 void setBits(BitSet table) { 1342 for (int i = 0; i < TABLE.length(); i++) { 1343 table.set(TABLE.charAt(i)); 1344 } 1345 } 1346 } 1347 1348 /** Implementation of {@link #breakingWhitespace()}. */ 1349 private static final class BreakingWhitespace extends CharMatcher { 1350 1351 static final CharMatcher INSTANCE = new BreakingWhitespace(); 1352 1353 @Override 1354 public boolean matches(char c) { 1355 switch (c) { 1356 case '\t': 1357 case '\n': 1358 case '\013': 1359 case '\f': 1360 case '\r': 1361 case ' ': 1362 case '\u0085': 1363 case '\u1680': 1364 case '\u2028': 1365 case '\u2029': 1366 case '\u205f': 1367 case '\u3000': 1368 return true; 1369 case '\u2007': 1370 return false; 1371 default: 1372 return c >= '\u2000' && c <= '\u200a'; 1373 } 1374 } 1375 1376 @Override 1377 public String toString() { 1378 return "CharMatcher.breakingWhitespace()"; 1379 } 1380 } 1381 1382 /** Implementation of {@link #ascii()}. */ 1383 private static final class Ascii extends NamedFastMatcher { 1384 1385 static final Ascii INSTANCE = new Ascii(); 1386 1387 Ascii() { 1388 super("CharMatcher.ascii()"); 1389 } 1390 1391 @Override 1392 public boolean matches(char c) { 1393 return c <= '\u007f'; 1394 } 1395 } 1396 1397 /** Implementation that matches characters that fall within multiple ranges. */ 1398 private static class RangesMatcher extends CharMatcher { 1399 1400 private final String description; 1401 private final char[] rangeStarts; 1402 private final char[] rangeEnds; 1403 1404 RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { 1405 this.description = description; 1406 this.rangeStarts = rangeStarts; 1407 this.rangeEnds = rangeEnds; 1408 checkArgument(rangeStarts.length == rangeEnds.length); 1409 for (int i = 0; i < rangeStarts.length; i++) { 1410 checkArgument(rangeStarts[i] <= rangeEnds[i]); 1411 if (i + 1 < rangeStarts.length) { 1412 checkArgument(rangeEnds[i] < rangeStarts[i + 1]); 1413 } 1414 } 1415 } 1416 1417 @Override 1418 public boolean matches(char c) { 1419 int index = Arrays.binarySearch(rangeStarts, c); 1420 if (index >= 0) { 1421 return true; 1422 } else { 1423 index = ~index - 1; 1424 return index >= 0 && c <= rangeEnds[index]; 1425 } 1426 } 1427 1428 @Override 1429 public String toString() { 1430 return description; 1431 } 1432 } 1433 1434 /** Implementation of {@link #digit()}. */ 1435 private static final class Digit extends RangesMatcher { 1436 1437 // Must be in ascending order. 1438 private static final String ZEROES = 1439 "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66" 1440 + "\u0be6\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810" 1441 + "\u1946\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; 1442 1443 private static char[] zeroes() { 1444 return ZEROES.toCharArray(); 1445 } 1446 1447 private static char[] nines() { 1448 char[] nines = new char[ZEROES.length()]; 1449 for (int i = 0; i < ZEROES.length(); i++) { 1450 nines[i] = (char) (ZEROES.charAt(i) + 9); 1451 } 1452 return nines; 1453 } 1454 1455 static final Digit INSTANCE = new Digit(); 1456 1457 private Digit() { 1458 super("CharMatcher.digit()", zeroes(), nines()); 1459 } 1460 } 1461 1462 /** Implementation of {@link #javaDigit()}. */ 1463 private static final class JavaDigit extends CharMatcher { 1464 1465 static final JavaDigit INSTANCE = new JavaDigit(); 1466 1467 @Override 1468 public boolean matches(char c) { 1469 return Character.isDigit(c); 1470 } 1471 1472 @Override 1473 public String toString() { 1474 return "CharMatcher.javaDigit()"; 1475 } 1476 } 1477 1478 /** Implementation of {@link #javaLetter()}. */ 1479 private static final class JavaLetter extends CharMatcher { 1480 1481 static final JavaLetter INSTANCE = new JavaLetter(); 1482 1483 @Override 1484 public boolean matches(char c) { 1485 return Character.isLetter(c); 1486 } 1487 1488 @Override 1489 public String toString() { 1490 return "CharMatcher.javaLetter()"; 1491 } 1492 } 1493 1494 /** Implementation of {@link #javaLetterOrDigit()}. */ 1495 private static final class JavaLetterOrDigit extends CharMatcher { 1496 1497 static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit(); 1498 1499 @Override 1500 public boolean matches(char c) { 1501 return Character.isLetterOrDigit(c); 1502 } 1503 1504 @Override 1505 public String toString() { 1506 return "CharMatcher.javaLetterOrDigit()"; 1507 } 1508 } 1509 1510 /** Implementation of {@link #javaUpperCase()}. */ 1511 private static final class JavaUpperCase extends CharMatcher { 1512 1513 static final JavaUpperCase INSTANCE = new JavaUpperCase(); 1514 1515 @Override 1516 public boolean matches(char c) { 1517 return Character.isUpperCase(c); 1518 } 1519 1520 @Override 1521 public String toString() { 1522 return "CharMatcher.javaUpperCase()"; 1523 } 1524 } 1525 1526 /** Implementation of {@link #javaLowerCase()}. */ 1527 private static final class JavaLowerCase extends CharMatcher { 1528 1529 static final JavaLowerCase INSTANCE = new JavaLowerCase(); 1530 1531 @Override 1532 public boolean matches(char c) { 1533 return Character.isLowerCase(c); 1534 } 1535 1536 @Override 1537 public String toString() { 1538 return "CharMatcher.javaLowerCase()"; 1539 } 1540 } 1541 1542 /** Implementation of {@link #javaIsoControl()}. */ 1543 private static final class JavaIsoControl extends NamedFastMatcher { 1544 1545 static final JavaIsoControl INSTANCE = new JavaIsoControl(); 1546 1547 private JavaIsoControl() { 1548 super("CharMatcher.javaIsoControl()"); 1549 } 1550 1551 @Override 1552 public boolean matches(char c) { 1553 return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f'); 1554 } 1555 } 1556 1557 /** Implementation of {@link #invisible()}. */ 1558 private static final class Invisible extends RangesMatcher { 1559 1560 private static final String RANGE_STARTS = 1561 "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067" 1562 + "\u2068\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa"; 1563 private static final String RANGE_ENDS = 1564 "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067" 1565 + "\u2068\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb"; 1566 1567 static final Invisible INSTANCE = new Invisible(); 1568 1569 private Invisible() { 1570 super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray()); 1571 } 1572 } 1573 1574 /** Implementation of {@link #singleWidth()}. */ 1575 private static final class SingleWidth extends RangesMatcher { 1576 1577 static final SingleWidth INSTANCE = new SingleWidth(); 1578 1579 private SingleWidth() { 1580 super( 1581 "CharMatcher.singleWidth()", 1582 "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), 1583 "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); 1584 } 1585 } 1586 1587 // Non-static factory implementation classes 1588 1589 /** Implementation of {@link #negate()}. */ 1590 private static class Negated extends CharMatcher { 1591 1592 final CharMatcher original; 1593 1594 Negated(CharMatcher original) { 1595 this.original = checkNotNull(original); 1596 } 1597 1598 @Override 1599 public boolean matches(char c) { 1600 return !original.matches(c); 1601 } 1602 1603 @Override 1604 public boolean matchesAllOf(CharSequence sequence) { 1605 return original.matchesNoneOf(sequence); 1606 } 1607 1608 @Override 1609 public boolean matchesNoneOf(CharSequence sequence) { 1610 return original.matchesAllOf(sequence); 1611 } 1612 1613 @Override 1614 public int countIn(CharSequence sequence) { 1615 return sequence.length() - original.countIn(sequence); 1616 } 1617 1618 @GwtIncompatible // used only from other GwtIncompatible code 1619 @Override 1620 void setBits(BitSet table) { 1621 BitSet tmp = new BitSet(); 1622 original.setBits(tmp); 1623 tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 1624 table.or(tmp); 1625 } 1626 1627 @Override 1628 public CharMatcher negate() { 1629 return original; 1630 } 1631 1632 @Override 1633 public String toString() { 1634 return original + ".negate()"; 1635 } 1636 } 1637 1638 /** Implementation of {@link #and(CharMatcher)}. */ 1639 private static final class And extends CharMatcher { 1640 1641 final CharMatcher first; 1642 final CharMatcher second; 1643 1644 And(CharMatcher a, CharMatcher b) { 1645 first = checkNotNull(a); 1646 second = checkNotNull(b); 1647 } 1648 1649 @Override 1650 public boolean matches(char c) { 1651 return first.matches(c) && second.matches(c); 1652 } 1653 1654 @GwtIncompatible // used only from other GwtIncompatible code 1655 @Override 1656 void setBits(BitSet table) { 1657 BitSet tmp1 = new BitSet(); 1658 first.setBits(tmp1); 1659 BitSet tmp2 = new BitSet(); 1660 second.setBits(tmp2); 1661 tmp1.and(tmp2); 1662 table.or(tmp1); 1663 } 1664 1665 @Override 1666 public String toString() { 1667 return "CharMatcher.and(" + first + ", " + second + ")"; 1668 } 1669 } 1670 1671 /** Implementation of {@link #or(CharMatcher)}. */ 1672 private static final class Or extends CharMatcher { 1673 1674 final CharMatcher first; 1675 final CharMatcher second; 1676 1677 Or(CharMatcher a, CharMatcher b) { 1678 first = checkNotNull(a); 1679 second = checkNotNull(b); 1680 } 1681 1682 @GwtIncompatible // used only from other GwtIncompatible code 1683 @Override 1684 void setBits(BitSet table) { 1685 first.setBits(table); 1686 second.setBits(table); 1687 } 1688 1689 @Override 1690 public boolean matches(char c) { 1691 return first.matches(c) || second.matches(c); 1692 } 1693 1694 @Override 1695 public String toString() { 1696 return "CharMatcher.or(" + first + ", " + second + ")"; 1697 } 1698 } 1699 1700 // Static factory implementations 1701 1702 /** Implementation of {@link #is(char)}. */ 1703 private static final class Is extends FastMatcher { 1704 1705 private final char match; 1706 1707 Is(char match) { 1708 this.match = match; 1709 } 1710 1711 @Override 1712 public boolean matches(char c) { 1713 return c == match; 1714 } 1715 1716 @Override 1717 public String replaceFrom(CharSequence sequence, char replacement) { 1718 return sequence.toString().replace(match, replacement); 1719 } 1720 1721 @Override 1722 public CharMatcher and(CharMatcher other) { 1723 return other.matches(match) ? this : none(); 1724 } 1725 1726 @Override 1727 public CharMatcher or(CharMatcher other) { 1728 return other.matches(match) ? other : super.or(other); 1729 } 1730 1731 @Override 1732 public CharMatcher negate() { 1733 return isNot(match); 1734 } 1735 1736 @GwtIncompatible // used only from other GwtIncompatible code 1737 @Override 1738 void setBits(BitSet table) { 1739 table.set(match); 1740 } 1741 1742 @Override 1743 public String toString() { 1744 return "CharMatcher.is('" + showCharacter(match) + "')"; 1745 } 1746 } 1747 1748 /** Implementation of {@link #isNot(char)}. */ 1749 private static final class IsNot extends FastMatcher { 1750 1751 private final char match; 1752 1753 IsNot(char match) { 1754 this.match = match; 1755 } 1756 1757 @Override 1758 public boolean matches(char c) { 1759 return c != match; 1760 } 1761 1762 @Override 1763 public CharMatcher and(CharMatcher other) { 1764 return other.matches(match) ? super.and(other) : other; 1765 } 1766 1767 @Override 1768 public CharMatcher or(CharMatcher other) { 1769 return other.matches(match) ? any() : this; 1770 } 1771 1772 @GwtIncompatible // used only from other GwtIncompatible code 1773 @Override 1774 void setBits(BitSet table) { 1775 table.set(0, match); 1776 table.set(match + 1, Character.MAX_VALUE + 1); 1777 } 1778 1779 @Override 1780 public CharMatcher negate() { 1781 return is(match); 1782 } 1783 1784 @Override 1785 public String toString() { 1786 return "CharMatcher.isNot('" + showCharacter(match) + "')"; 1787 } 1788 } 1789 1790 private static CharMatcher.IsEither isEither(char c1, char c2) { 1791 return new CharMatcher.IsEither(c1, c2); 1792 } 1793 1794 /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */ 1795 private static final class IsEither extends FastMatcher { 1796 1797 private final char match1; 1798 private final char match2; 1799 1800 IsEither(char match1, char match2) { 1801 this.match1 = match1; 1802 this.match2 = match2; 1803 } 1804 1805 @Override 1806 public boolean matches(char c) { 1807 return c == match1 || c == match2; 1808 } 1809 1810 @GwtIncompatible // used only from other GwtIncompatible code 1811 @Override 1812 void setBits(BitSet table) { 1813 table.set(match1); 1814 table.set(match2); 1815 } 1816 1817 @Override 1818 public String toString() { 1819 return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")"; 1820 } 1821 } 1822 1823 /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */ 1824 private static final class AnyOf extends CharMatcher { 1825 1826 private final char[] chars; 1827 1828 public AnyOf(CharSequence chars) { 1829 this.chars = chars.toString().toCharArray(); 1830 Arrays.sort(this.chars); 1831 } 1832 1833 @Override 1834 public boolean matches(char c) { 1835 return Arrays.binarySearch(chars, c) >= 0; 1836 } 1837 1838 @Override 1839 @GwtIncompatible // used only from other GwtIncompatible code 1840 void setBits(BitSet table) { 1841 for (char c : chars) { 1842 table.set(c); 1843 } 1844 } 1845 1846 @Override 1847 public String toString() { 1848 StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); 1849 for (char c : chars) { 1850 description.append(showCharacter(c)); 1851 } 1852 description.append("\")"); 1853 return description.toString(); 1854 } 1855 } 1856 1857 /** Implementation of {@link #inRange(char, char)}. */ 1858 private static final class InRange extends FastMatcher { 1859 1860 private final char startInclusive; 1861 private final char endInclusive; 1862 1863 InRange(char startInclusive, char endInclusive) { 1864 checkArgument(endInclusive >= startInclusive); 1865 this.startInclusive = startInclusive; 1866 this.endInclusive = endInclusive; 1867 } 1868 1869 @Override 1870 public boolean matches(char c) { 1871 return startInclusive <= c && c <= endInclusive; 1872 } 1873 1874 @GwtIncompatible // used only from other GwtIncompatible code 1875 @Override 1876 void setBits(BitSet table) { 1877 table.set(startInclusive, endInclusive + 1); 1878 } 1879 1880 @Override 1881 public String toString() { 1882 return "CharMatcher.inRange('" 1883 + showCharacter(startInclusive) 1884 + "', '" 1885 + showCharacter(endInclusive) 1886 + "')"; 1887 } 1888 } 1889 1890 /** Implementation of {@link #forPredicate(Predicate)}. */ 1891 private static final class ForPredicate extends CharMatcher { 1892 1893 private final Predicate<? super Character> predicate; 1894 1895 ForPredicate(Predicate<? super Character> predicate) { 1896 this.predicate = checkNotNull(predicate); 1897 } 1898 1899 @Override 1900 public boolean matches(char c) { 1901 return predicate.apply(c); 1902 } 1903 1904 @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily 1905 @Override 1906 public boolean apply(Character character) { 1907 return predicate.apply(checkNotNull(character)); 1908 } 1909 1910 @Override 1911 public String toString() { 1912 return "CharMatcher.forPredicate(" + predicate + ")"; 1913 } 1914 } 1915}