001/* 002 * Copyright (C) 2008 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.base; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndex; 020 021import com.google.common.annotations.GwtCompatible; 022import com.google.common.annotations.GwtIncompatible; 023import com.google.common.annotations.VisibleForTesting; 024import java.util.Arrays; 025import java.util.BitSet; 026 027/** 028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does 029 * for any {@link Object}. Also offers basic text processing methods based on this function. 030 * Implementations are strongly encouraged to be side-effect-free and immutable. 031 * 032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean 033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}". 034 * 035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a 036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand 037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code 038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of 039 * assigned characters, including important CJK characters and emoji. 040 * 041 * <p>Supplementary characters are <a 042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded 043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as 044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s. 045 * 046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for 047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For 048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner. 049 * 050 * <p>Example usages: 051 * 052 * <pre> 053 * String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput); 054 * if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> 055 * 056 * <p>See the Guava User Guide article on <a 057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher} 058 * </a>. 059 * 060 * @author Kevin Bourrillion 061 * @since 1.0 062 */ 063@GwtCompatible(emulated = true) 064public abstract class CharMatcher implements Predicate<Character> { 065 /* 066 * N777777777NO 067 * N7777777777777N 068 * M777777777777777N 069 * $N877777777D77777M 070 * N M77777777ONND777M 071 * MN777777777NN D777 072 * N7ZN777777777NN ~M7778 073 * N777777777777MMNN88777N 074 * N777777777777MNZZZ7777O 075 * DZN7777O77777777777777 076 * N7OONND7777777D77777N 077 * 8$M++++?N???$77777$ 078 * M7++++N+M77777777N 079 * N77O777777777777$ M 080 * DNNM$$$$777777N D 081 * N$N:=N$777N7777M NZ 082 * 77Z::::N777777777 ODZZZ 083 * 77N::::::N77777777M NNZZZ$ 084 * $777:::::::77777777MN ZM8ZZZZZ 085 * 777M::::::Z7777777Z77 N++ZZZZNN 086 * 7777M:::::M7777777$777M $++IZZZZM 087 * M777$:::::N777777$M7777M +++++ZZZDN 088 * NN$::::::7777$$M777777N N+++ZZZZNZ 089 * N::::::N:7$O:77777777 N++++ZZZZN 090 * M::::::::::::N77777777+ +?+++++ZZZM 091 * 8::::::::::::D77777777M O+++++ZZ 092 * ::::::::::::M777777777N O+?D 093 * M:::::::::::M77777777778 77= 094 * D=::::::::::N7777777777N 777 095 * INN===::::::=77777777777N I777N 096 * ?777N========N7777777777787M N7777 097 * 77777$D======N77777777777N777N? N777777 098 * I77777$$$N7===M$$77777777$77777777$MMZ77777777N 099 * $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON 100 * M$$$$$$$$M M$$$$$$$$N=N$$$$7777777$$$ND 101 * O77Z$$$$$$$ M$$$$$$$$MNI==$DNNNNM=~N 102 * 7 :N MNN$$$$M$ $$$777$8 8D8I 103 * NMM.:7O 777777778 104 * 7777777MN 105 * M NO .7: 106 * M : M 107 * 8 108 */ 109 110 // Constant matcher factory methods 111 112 /** 113 * Matches any character. 114 * 115 * @since 19.0 (since 1.0 as constant {@code ANY}) 116 */ 117 public static CharMatcher any() { 118 return Any.INSTANCE; 119 } 120 121 /** 122 * Matches no characters. 123 * 124 * @since 19.0 (since 1.0 as constant {@code NONE}) 125 */ 126 public static CharMatcher none() { 127 return None.INSTANCE; 128 } 129 130 /** 131 * Determines whether a character is whitespace according to the latest Unicode standard, as 132 * illustrated <a 133 * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 134 * This is not the same definition used by other Java APIs. (See a <a 135 * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.) 136 * 137 * <p>All Unicode White_Space characters are on the BMP and thus supported by this API. 138 * 139 * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to 140 * date. 141 * 142 * @since 19.0 (since 1.0 as constant {@code WHITESPACE}) 143 */ 144 public static CharMatcher whitespace() { 145 return Whitespace.INSTANCE; 146 } 147 148 /** 149 * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 150 * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a 151 * discussion of that term. 152 * 153 * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE}) 154 */ 155 public static CharMatcher breakingWhitespace() { 156 return BreakingWhitespace.INSTANCE; 157 } 158 159 /** 160 * Determines whether a character is ASCII, meaning that its code point is less than 128. 161 * 162 * @since 19.0 (since 1.0 as constant {@code ASCII}) 163 */ 164 public static CharMatcher ascii() { 165 return Ascii.INSTANCE; 166 } 167 168 /** 169 * Determines whether a character is a BMP digit according to <a 170 * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If 171 * you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 172 * 173 * @deprecated Many digits are supplementary characters; see the class documentation. 174 * @since 19.0 (since 1.0 as constant {@code DIGIT}) 175 */ 176 @Deprecated 177 public static CharMatcher digit() { 178 return Digit.INSTANCE; 179 } 180 181 /** 182 * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char) 183 * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0', 184 * '9')}. 185 * 186 * @deprecated Many digits are supplementary characters; see the class documentation. 187 * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT}) 188 */ 189 @Deprecated 190 public static CharMatcher javaDigit() { 191 return JavaDigit.INSTANCE; 192 } 193 194 /** 195 * Determines whether a character is a BMP letter according to {@linkplain 196 * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin 197 * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}. 198 * 199 * @deprecated Most letters are supplementary characters; see the class documentation. 200 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER}) 201 */ 202 @Deprecated 203 public static CharMatcher javaLetter() { 204 return JavaLetter.INSTANCE; 205 } 206 207 /** 208 * Determines whether a character is a BMP letter or digit according to {@linkplain 209 * Character#isLetterOrDigit(char) Java's definition}. 210 * 211 * @deprecated Most letters and digits are supplementary characters; see the class documentation. 212 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}). 213 */ 214 @Deprecated 215 public static CharMatcher javaLetterOrDigit() { 216 return JavaLetterOrDigit.INSTANCE; 217 } 218 219 /** 220 * Determines whether a BMP character is upper case according to {@linkplain 221 * Character#isUpperCase(char) Java's definition}. 222 * 223 * @deprecated Some uppercase characters are supplementary characters; see the class 224 * documentation. 225 * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE}) 226 */ 227 @Deprecated 228 public static CharMatcher javaUpperCase() { 229 return JavaUpperCase.INSTANCE; 230 } 231 232 /** 233 * Determines whether a BMP character is lower case according to {@linkplain 234 * Character#isLowerCase(char) Java's definition}. 235 * 236 * @deprecated Some lowercase characters are supplementary characters; see the class 237 * documentation. 238 * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE}) 239 */ 240 @Deprecated 241 public static CharMatcher javaLowerCase() { 242 return JavaLowerCase.INSTANCE; 243 } 244 245 /** 246 * Determines whether a character is an ISO control character as specified by {@link 247 * Character#isISOControl(char)}. 248 * 249 * <p>All ISO control codes are on the BMP and thus supported by this API. 250 * 251 * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL}) 252 */ 253 public static CharMatcher javaIsoControl() { 254 return JavaIsoControl.INSTANCE; 255 } 256 257 /** 258 * Determines whether a character is invisible; that is, if its Unicode category is any of 259 * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 260 * PRIVATE_USE according to ICU4J. 261 * 262 * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU). 263 * 264 * @deprecated Most invisible characters are supplementary characters; see the class 265 * documentation. 266 * @since 19.0 (since 1.0 as constant {@code INVISIBLE}) 267 */ 268 @Deprecated 269 public static CharMatcher invisible() { 270 return Invisible.INSTANCE; 271 } 272 273 /** 274 * Determines whether a character is single-width (not double-width). When in doubt, this matcher 275 * errs on the side of returning {@code false} (that is, it tends to assume a character is 276 * double-width). 277 * 278 * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to 279 * date. 280 * 281 * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>. 282 * 283 * @deprecated Many such characters are supplementary characters; see the class documentation. 284 * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH}) 285 */ 286 @Deprecated 287 public static CharMatcher singleWidth() { 288 return SingleWidth.INSTANCE; 289 } 290 291 // Static factories 292 293 /** Returns a {@code char} matcher that matches only one specified BMP character. */ 294 public static CharMatcher is(final char match) { 295 return new Is(match); 296 } 297 298 /** 299 * Returns a {@code char} matcher that matches any character except the BMP character specified. 300 * 301 * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 302 */ 303 public static CharMatcher isNot(final char match) { 304 return new IsNot(match); 305 } 306 307 /** 308 * Returns a {@code char} matcher that matches any BMP character present in the given character 309 * sequence. Returns a bogus matcher if the sequence contains supplementary characters. 310 */ 311 public static CharMatcher anyOf(final CharSequence sequence) { 312 switch (sequence.length()) { 313 case 0: 314 return none(); 315 case 1: 316 return is(sequence.charAt(0)); 317 case 2: 318 return isEither(sequence.charAt(0), sequence.charAt(1)); 319 default: 320 // TODO(lowasser): is it potentially worth just going ahead and building a precomputed 321 // matcher? 322 return new AnyOf(sequence); 323 } 324 } 325 326 /** 327 * Returns a {@code char} matcher that matches any BMP character not present in the given 328 * character sequence. Returns a bogus matcher if the sequence contains supplementary characters. 329 */ 330 public static CharMatcher noneOf(CharSequence sequence) { 331 return anyOf(sequence).negate(); 332 } 333 334 /** 335 * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints 336 * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code 337 * CharMatcher.inRange('a', 'z')}. 338 * 339 * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 340 */ 341 public static CharMatcher inRange(final char startInclusive, final char endInclusive) { 342 return new InRange(startInclusive, endInclusive); 343 } 344 345 /** 346 * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but 347 * which operates on primitive {@code char} instances instead. 348 */ 349 public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { 350 return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate); 351 } 352 353 // Constructors 354 355 /** 356 * Constructor for use by subclasses. When subclassing, you may want to override {@code 357 * toString()} to provide a useful description. 358 */ 359 protected CharMatcher() {} 360 361 // Abstract methods 362 363 /** Determines a true or false value for the given character. */ 364 public abstract boolean matches(char c); 365 366 // Non-static factories 367 368 /** Returns a matcher that matches any character not matched by this matcher. */ 369 // @Override under Java 8 but not under Java 7 370 @Override 371 public CharMatcher negate() { 372 return new Negated(this); 373 } 374 375 /** 376 * Returns a matcher that matches any character matched by both this matcher and {@code other}. 377 */ 378 public CharMatcher and(CharMatcher other) { 379 return new And(this, other); 380 } 381 382 /** 383 * Returns a matcher that matches any character matched by either this matcher or {@code other}. 384 */ 385 public CharMatcher or(CharMatcher other) { 386 return new Or(this, other); 387 } 388 389 /** 390 * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to 391 * query than the original; your mileage may vary. Precomputation takes time and is likely to be 392 * worthwhile only if the precomputed matcher is queried many thousands of times. 393 * 394 * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a 395 * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a 396 * worthwhile tradeoff in a browser. 397 */ 398 public CharMatcher precomputed() { 399 return Platform.precomputeCharMatcher(this); 400 } 401 402 private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; 403 404 /** 405 * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method 406 * on {@link Platform} so that we can have different behavior in GWT. 407 * 408 * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the 409 * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables 410 * for matchers that only match a few characters, and so on. In the worst-case scenario, it 411 * constructs an eight-kilobyte bit array and queries that. In many situations this produces a 412 * matcher which is faster to query than the original. 413 */ 414 @GwtIncompatible // SmallCharMatcher 415 CharMatcher precomputedInternal() { 416 final BitSet table = new BitSet(); 417 setBits(table); 418 int totalCharacters = table.cardinality(); 419 if (totalCharacters * 2 <= DISTINCT_CHARS) { 420 return precomputedPositive(totalCharacters, table, toString()); 421 } else { 422 // TODO(lowasser): is it worth it to worry about the last character of large matchers? 423 table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 424 int negatedCharacters = DISTINCT_CHARS - totalCharacters; 425 String suffix = ".negate()"; 426 final String description = toString(); 427 String negatedDescription = 428 description.endsWith(suffix) 429 ? description.substring(0, description.length() - suffix.length()) 430 : description + suffix; 431 return new NegatedFastMatcher( 432 precomputedPositive(negatedCharacters, table, negatedDescription)) { 433 @Override 434 public String toString() { 435 return description; 436 } 437 }; 438 } 439 } 440 441 /** 442 * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper. 443 */ 444 @GwtIncompatible // SmallCharMatcher 445 private static CharMatcher precomputedPositive( 446 int totalCharacters, BitSet table, String description) { 447 switch (totalCharacters) { 448 case 0: 449 return none(); 450 case 1: 451 return is((char) table.nextSetBit(0)); 452 case 2: 453 char c1 = (char) table.nextSetBit(0); 454 char c2 = (char) table.nextSetBit(c1 + 1); 455 return isEither(c1, c2); 456 default: 457 return isSmall(totalCharacters, table.length()) 458 ? SmallCharMatcher.from(table, description) 459 : new BitSetMatcher(table, description); 460 } 461 } 462 463 @GwtIncompatible // SmallCharMatcher 464 private static boolean isSmall(int totalCharacters, int tableLength) { 465 return totalCharacters <= SmallCharMatcher.MAX_SIZE 466 && tableLength > (totalCharacters * 4 * Character.SIZE); 467 // err on the side of BitSetMatcher 468 } 469 470 /** Sets bits in {@code table} matched by this matcher. */ 471 @GwtIncompatible // used only from other GwtIncompatible code 472 void setBits(BitSet table) { 473 for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) { 474 if (matches((char) c)) { 475 table.set(c); 476 } 477 } 478 } 479 480 // Text processing routines 481 482 /** 483 * Returns {@code true} if a character sequence contains at least one matching BMP character. 484 * Equivalent to {@code !matchesNoneOf(sequence)}. 485 * 486 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 487 * character, until this returns {@code true} or the end is reached. 488 * 489 * @param sequence the character sequence to examine, possibly empty 490 * @return {@code true} if this matcher matches at least one character in the sequence 491 * @since 8.0 492 */ 493 public boolean matchesAnyOf(CharSequence sequence) { 494 return !matchesNoneOf(sequence); 495 } 496 497 /** 498 * Returns {@code true} if a character sequence contains only matching BMP characters. 499 * 500 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 501 * character, until this returns {@code false} or the end is reached. 502 * 503 * @param sequence the character sequence to examine, possibly empty 504 * @return {@code true} if this matcher matches every character in the sequence, including when 505 * the sequence is empty 506 */ 507 public boolean matchesAllOf(CharSequence sequence) { 508 for (int i = sequence.length() - 1; i >= 0; i--) { 509 if (!matches(sequence.charAt(i))) { 510 return false; 511 } 512 } 513 return true; 514 } 515 516 /** 517 * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to 518 * {@code !matchesAnyOf(sequence)}. 519 * 520 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 521 * character, until this returns {@code true} or the end is reached. 522 * 523 * @param sequence the character sequence to examine, possibly empty 524 * @return {@code true} if this matcher matches no characters in the sequence, including when the 525 * sequence is empty 526 */ 527 public boolean matchesNoneOf(CharSequence sequence) { 528 return indexIn(sequence) == -1; 529 } 530 531 /** 532 * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if 533 * no matching character is present. 534 * 535 * <p>The default implementation iterates over the sequence in forward order calling {@link 536 * #matches} for each character. 537 * 538 * @param sequence the character sequence to examine from the beginning 539 * @return an index, or {@code -1} if no character matches 540 */ 541 public int indexIn(CharSequence sequence) { 542 return indexIn(sequence, 0); 543 } 544 545 /** 546 * Returns the index of the first matching BMP character in a character sequence, starting from a 547 * given position, or {@code -1} if no character matches after that position. 548 * 549 * <p>The default implementation iterates over the sequence in forward order, beginning at {@code 550 * start}, calling {@link #matches} for each character. 551 * 552 * @param sequence the character sequence to examine 553 * @param start the first index to examine; must be nonnegative and no greater than {@code 554 * sequence.length()} 555 * @return the index of the first matching character, guaranteed to be no less than {@code start}, 556 * or {@code -1} if no character matches 557 * @throws IndexOutOfBoundsException if start is negative or greater than {@code 558 * sequence.length()} 559 */ 560 public int indexIn(CharSequence sequence, int start) { 561 int length = sequence.length(); 562 checkPositionIndex(start, length); 563 for (int i = start; i < length; i++) { 564 if (matches(sequence.charAt(i))) { 565 return i; 566 } 567 } 568 return -1; 569 } 570 571 /** 572 * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if 573 * no matching character is present. 574 * 575 * <p>The default implementation iterates over the sequence in reverse order calling {@link 576 * #matches} for each character. 577 * 578 * @param sequence the character sequence to examine from the end 579 * @return an index, or {@code -1} if no character matches 580 */ 581 public int lastIndexIn(CharSequence sequence) { 582 for (int i = sequence.length() - 1; i >= 0; i--) { 583 if (matches(sequence.charAt(i))) { 584 return i; 585 } 586 } 587 return -1; 588 } 589 590 /** 591 * Returns the number of matching {@code char}s found in a character sequence. 592 * 593 * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}(). 594 */ 595 public int countIn(CharSequence sequence) { 596 int count = 0; 597 for (int i = 0; i < sequence.length(); i++) { 598 if (matches(sequence.charAt(i))) { 599 count++; 600 } 601 } 602 return count; 603 } 604 605 /** 606 * Returns a string containing all non-matching characters of a character sequence, in order. For 607 * example: 608 * 609 * <pre>{@code 610 * CharMatcher.is('a').removeFrom("bazaar") 611 * }</pre> 612 * 613 * ... returns {@code "bzr"}. 614 */ 615 public String removeFrom(CharSequence sequence) { 616 String string = sequence.toString(); 617 int pos = indexIn(string); 618 if (pos == -1) { 619 return string; 620 } 621 622 char[] chars = string.toCharArray(); 623 int spread = 1; 624 625 // This unusual loop comes from extensive benchmarking 626 OUT: 627 while (true) { 628 pos++; 629 while (true) { 630 if (pos == chars.length) { 631 break OUT; 632 } 633 if (matches(chars[pos])) { 634 break; 635 } 636 chars[pos - spread] = chars[pos]; 637 pos++; 638 } 639 spread++; 640 } 641 return new String(chars, 0, pos - spread); 642 } 643 644 /** 645 * Returns a string containing all matching BMP characters of a character sequence, in order. For 646 * example: 647 * 648 * <pre>{@code 649 * CharMatcher.is('a').retainFrom("bazaar") 650 * }</pre> 651 * 652 * ... returns {@code "aaa"}. 653 */ 654 public String retainFrom(CharSequence sequence) { 655 return negate().removeFrom(sequence); 656 } 657 658 /** 659 * Returns a string copy of the input character sequence, with each matching BMP character 660 * replaced by a given replacement character. For example: 661 * 662 * <pre>{@code 663 * CharMatcher.is('a').replaceFrom("radar", 'o') 664 * }</pre> 665 * 666 * ... returns {@code "rodor"}. 667 * 668 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 669 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 670 * character. 671 * 672 * @param sequence the character sequence to replace matching characters in 673 * @param replacement the character to append to the result string in place of each matching 674 * character in {@code sequence} 675 * @return the new string 676 */ 677 public String replaceFrom(CharSequence sequence, char replacement) { 678 String string = sequence.toString(); 679 int pos = indexIn(string); 680 if (pos == -1) { 681 return string; 682 } 683 char[] chars = string.toCharArray(); 684 chars[pos] = replacement; 685 for (int i = pos + 1; i < chars.length; i++) { 686 if (matches(chars[i])) { 687 chars[i] = replacement; 688 } 689 } 690 return new String(chars); 691 } 692 693 /** 694 * Returns a string copy of the input character sequence, with each matching BMP character 695 * replaced by a given replacement sequence. For example: 696 * 697 * <pre>{@code 698 * CharMatcher.is('a').replaceFrom("yaha", "oo") 699 * }</pre> 700 * 701 * ... returns {@code "yoohoo"}. 702 * 703 * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better 704 * off calling {@link #replaceFrom(CharSequence, char)} directly. 705 * 706 * @param sequence the character sequence to replace matching characters in 707 * @param replacement the characters to append to the result string in place of each matching 708 * character in {@code sequence} 709 * @return the new string 710 */ 711 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 712 int replacementLen = replacement.length(); 713 if (replacementLen == 0) { 714 return removeFrom(sequence); 715 } 716 if (replacementLen == 1) { 717 return replaceFrom(sequence, replacement.charAt(0)); 718 } 719 720 String string = sequence.toString(); 721 int pos = indexIn(string); 722 if (pos == -1) { 723 return string; 724 } 725 726 int len = string.length(); 727 StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); 728 729 int oldpos = 0; 730 do { 731 buf.append(string, oldpos, pos); 732 buf.append(replacement); 733 oldpos = pos + 1; 734 pos = indexIn(string, oldpos); 735 } while (pos != -1); 736 737 buf.append(string, oldpos, len); 738 return buf.toString(); 739 } 740 741 /** 742 * Returns a substring of the input character sequence that omits all matching BMP characters from 743 * the beginning and from the end of the string. For example: 744 * 745 * <pre>{@code 746 * CharMatcher.anyOf("ab").trimFrom("abacatbab") 747 * }</pre> 748 * 749 * ... returns {@code "cat"}. 750 * 751 * <p>Note that: 752 * 753 * <pre>{@code 754 * CharMatcher.inRange('\0', ' ').trimFrom(str) 755 * }</pre> 756 * 757 * ... is equivalent to {@link String#trim()}. 758 */ 759 public String trimFrom(CharSequence sequence) { 760 int len = sequence.length(); 761 int first; 762 int last; 763 764 for (first = 0; first < len; first++) { 765 if (!matches(sequence.charAt(first))) { 766 break; 767 } 768 } 769 for (last = len - 1; last > first; last--) { 770 if (!matches(sequence.charAt(last))) { 771 break; 772 } 773 } 774 775 return sequence.subSequence(first, last + 1).toString(); 776 } 777 778 /** 779 * Returns a substring of the input character sequence that omits all matching BMP characters from 780 * the beginning of the string. For example: 781 * 782 * <pre>{@code 783 * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab") 784 * }</pre> 785 * 786 * ... returns {@code "catbab"}. 787 */ 788 public String trimLeadingFrom(CharSequence sequence) { 789 int len = sequence.length(); 790 for (int first = 0; first < len; first++) { 791 if (!matches(sequence.charAt(first))) { 792 return sequence.subSequence(first, len).toString(); 793 } 794 } 795 return ""; 796 } 797 798 /** 799 * Returns a substring of the input character sequence that omits all matching BMP characters from 800 * the end of the string. For example: 801 * 802 * <pre>{@code 803 * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab") 804 * }</pre> 805 * 806 * ... returns {@code "abacat"}. 807 */ 808 public String trimTrailingFrom(CharSequence sequence) { 809 int len = sequence.length(); 810 for (int last = len - 1; last >= 0; last--) { 811 if (!matches(sequence.charAt(last))) { 812 return sequence.subSequence(0, last + 1).toString(); 813 } 814 } 815 return ""; 816 } 817 818 /** 819 * Returns a string copy of the input character sequence, with each group of consecutive matching 820 * BMP characters replaced by a single replacement character. For example: 821 * 822 * <pre>{@code 823 * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-') 824 * }</pre> 825 * 826 * ... returns {@code "b-p-r"}. 827 * 828 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 829 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 830 * character. 831 * 832 * @param sequence the character sequence to replace matching groups of characters in 833 * @param replacement the character to append to the result string in place of each group of 834 * matching characters in {@code sequence} 835 * @return the new string 836 */ 837 public String collapseFrom(CharSequence sequence, char replacement) { 838 // This implementation avoids unnecessary allocation. 839 int len = sequence.length(); 840 for (int i = 0; i < len; i++) { 841 char c = sequence.charAt(i); 842 if (matches(c)) { 843 if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { 844 // a no-op replacement 845 i++; 846 } else { 847 StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement); 848 return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); 849 } 850 } 851 } 852 // no replacement needed 853 return sequence.toString(); 854 } 855 856 /** 857 * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that 858 * groups of matching BMP characters at the start or end of the sequence are removed without 859 * replacement. 860 */ 861 public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 862 // This implementation avoids unnecessary allocation. 863 int len = sequence.length(); 864 int first = 0; 865 int last = len - 1; 866 867 while (first < len && matches(sequence.charAt(first))) { 868 first++; 869 } 870 871 while (last > first && matches(sequence.charAt(last))) { 872 last--; 873 } 874 875 return (first == 0 && last == len - 1) 876 ? collapseFrom(sequence, replacement) 877 : finishCollapseFrom( 878 sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false); 879 } 880 881 private String finishCollapseFrom( 882 CharSequence sequence, 883 int start, 884 int end, 885 char replacement, 886 StringBuilder builder, 887 boolean inMatchingGroup) { 888 for (int i = start; i < end; i++) { 889 char c = sequence.charAt(i); 890 if (matches(c)) { 891 if (!inMatchingGroup) { 892 builder.append(replacement); 893 inMatchingGroup = true; 894 } 895 } else { 896 builder.append(c); 897 inMatchingGroup = false; 898 } 899 } 900 return builder.toString(); 901 } 902 903 /** 904 * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches} 905 * instead. 906 */ 907 @Deprecated 908 @Override 909 public boolean apply(Character character) { 910 return matches(character); 911 } 912 913 /** 914 * Returns a string representation of this {@code CharMatcher}, such as {@code 915 * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. 916 */ 917 @Override 918 public String toString() { 919 return super.toString(); 920 } 921 922 /** 923 * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where 924 * "12AB" is the four hexadecimal digits representing the 16-bit code unit. 925 */ 926 private static String showCharacter(char c) { 927 String hex = "0123456789ABCDEF"; 928 char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; 929 for (int i = 0; i < 4; i++) { 930 tmp[5 - i] = hex.charAt(c & 0xF); 931 c = (char) (c >> 4); 932 } 933 return String.copyValueOf(tmp); 934 } 935 936 // Fast matchers 937 938 /** A matcher for which precomputation will not yield any significant benefit. */ 939 abstract static class FastMatcher extends CharMatcher { 940 941 @Override 942 public final CharMatcher precomputed() { 943 return this; 944 } 945 946 @Override 947 public CharMatcher negate() { 948 return new NegatedFastMatcher(this); 949 } 950 } 951 952 /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */ 953 abstract static class NamedFastMatcher extends FastMatcher { 954 955 private final String description; 956 957 NamedFastMatcher(String description) { 958 this.description = checkNotNull(description); 959 } 960 961 @Override 962 public final String toString() { 963 return description; 964 } 965 } 966 967 /** Negation of a {@link FastMatcher}. */ 968 static class NegatedFastMatcher extends Negated { 969 970 NegatedFastMatcher(CharMatcher original) { 971 super(original); 972 } 973 974 @Override 975 public final CharMatcher precomputed() { 976 return this; 977 } 978 } 979 980 /** Fast matcher using a {@link BitSet} table of matching characters. */ 981 @GwtIncompatible // used only from other GwtIncompatible code 982 private static final class BitSetMatcher extends NamedFastMatcher { 983 984 private final BitSet table; 985 986 private BitSetMatcher(BitSet table, String description) { 987 super(description); 988 if (table.length() + Long.SIZE < table.size()) { 989 table = (BitSet) table.clone(); 990 // If only we could actually call BitSet.trimToSize() ourselves... 991 } 992 this.table = table; 993 } 994 995 @Override 996 public boolean matches(char c) { 997 return table.get(c); 998 } 999 1000 @Override 1001 void setBits(BitSet bitSet) { 1002 bitSet.or(table); 1003 } 1004 } 1005 1006 // Static constant implementation classes 1007 1008 /** Implementation of {@link #any()}. */ 1009 private static final class Any extends NamedFastMatcher { 1010 1011 static final Any INSTANCE = new Any(); 1012 1013 private Any() { 1014 super("CharMatcher.any()"); 1015 } 1016 1017 @Override 1018 public boolean matches(char c) { 1019 return true; 1020 } 1021 1022 @Override 1023 public int indexIn(CharSequence sequence) { 1024 return (sequence.length() == 0) ? -1 : 0; 1025 } 1026 1027 @Override 1028 public int indexIn(CharSequence sequence, int start) { 1029 int length = sequence.length(); 1030 checkPositionIndex(start, length); 1031 return (start == length) ? -1 : start; 1032 } 1033 1034 @Override 1035 public int lastIndexIn(CharSequence sequence) { 1036 return sequence.length() - 1; 1037 } 1038 1039 @Override 1040 public boolean matchesAllOf(CharSequence sequence) { 1041 checkNotNull(sequence); 1042 return true; 1043 } 1044 1045 @Override 1046 public boolean matchesNoneOf(CharSequence sequence) { 1047 return sequence.length() == 0; 1048 } 1049 1050 @Override 1051 public String removeFrom(CharSequence sequence) { 1052 checkNotNull(sequence); 1053 return ""; 1054 } 1055 1056 @Override 1057 public String replaceFrom(CharSequence sequence, char replacement) { 1058 char[] array = new char[sequence.length()]; 1059 Arrays.fill(array, replacement); 1060 return new String(array); 1061 } 1062 1063 @Override 1064 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1065 StringBuilder result = new StringBuilder(sequence.length() * replacement.length()); 1066 for (int i = 0; i < sequence.length(); i++) { 1067 result.append(replacement); 1068 } 1069 return result.toString(); 1070 } 1071 1072 @Override 1073 public String collapseFrom(CharSequence sequence, char replacement) { 1074 return (sequence.length() == 0) ? "" : String.valueOf(replacement); 1075 } 1076 1077 @Override 1078 public String trimFrom(CharSequence sequence) { 1079 checkNotNull(sequence); 1080 return ""; 1081 } 1082 1083 @Override 1084 public int countIn(CharSequence sequence) { 1085 return sequence.length(); 1086 } 1087 1088 @Override 1089 public CharMatcher and(CharMatcher other) { 1090 return checkNotNull(other); 1091 } 1092 1093 @Override 1094 public CharMatcher or(CharMatcher other) { 1095 checkNotNull(other); 1096 return this; 1097 } 1098 1099 @Override 1100 public CharMatcher negate() { 1101 return none(); 1102 } 1103 } 1104 1105 /** Implementation of {@link #none()}. */ 1106 private static final class None extends NamedFastMatcher { 1107 1108 static final None INSTANCE = new None(); 1109 1110 private None() { 1111 super("CharMatcher.none()"); 1112 } 1113 1114 @Override 1115 public boolean matches(char c) { 1116 return false; 1117 } 1118 1119 @Override 1120 public int indexIn(CharSequence sequence) { 1121 checkNotNull(sequence); 1122 return -1; 1123 } 1124 1125 @Override 1126 public int indexIn(CharSequence sequence, int start) { 1127 int length = sequence.length(); 1128 checkPositionIndex(start, length); 1129 return -1; 1130 } 1131 1132 @Override 1133 public int lastIndexIn(CharSequence sequence) { 1134 checkNotNull(sequence); 1135 return -1; 1136 } 1137 1138 @Override 1139 public boolean matchesAllOf(CharSequence sequence) { 1140 return sequence.length() == 0; 1141 } 1142 1143 @Override 1144 public boolean matchesNoneOf(CharSequence sequence) { 1145 checkNotNull(sequence); 1146 return true; 1147 } 1148 1149 @Override 1150 public String removeFrom(CharSequence sequence) { 1151 return sequence.toString(); 1152 } 1153 1154 @Override 1155 public String replaceFrom(CharSequence sequence, char replacement) { 1156 return sequence.toString(); 1157 } 1158 1159 @Override 1160 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1161 checkNotNull(replacement); 1162 return sequence.toString(); 1163 } 1164 1165 @Override 1166 public String collapseFrom(CharSequence sequence, char replacement) { 1167 return sequence.toString(); 1168 } 1169 1170 @Override 1171 public String trimFrom(CharSequence sequence) { 1172 return sequence.toString(); 1173 } 1174 1175 @Override 1176 public String trimLeadingFrom(CharSequence sequence) { 1177 return sequence.toString(); 1178 } 1179 1180 @Override 1181 public String trimTrailingFrom(CharSequence sequence) { 1182 return sequence.toString(); 1183 } 1184 1185 @Override 1186 public int countIn(CharSequence sequence) { 1187 checkNotNull(sequence); 1188 return 0; 1189 } 1190 1191 @Override 1192 public CharMatcher and(CharMatcher other) { 1193 checkNotNull(other); 1194 return this; 1195 } 1196 1197 @Override 1198 public CharMatcher or(CharMatcher other) { 1199 return checkNotNull(other); 1200 } 1201 1202 @Override 1203 public CharMatcher negate() { 1204 return any(); 1205 } 1206 } 1207 1208 /** Implementation of {@link #whitespace()}. */ 1209 @VisibleForTesting 1210 static final class Whitespace extends NamedFastMatcher { 1211 1212 static final String TABLE = 1213 "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000" 1214 + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680" 1215 + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009" 1216 + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000"; 1217 static final int MULTIPLIER = 1682554634; 1218 static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1); 1219 1220 static final Whitespace INSTANCE = new Whitespace(); 1221 1222 Whitespace() { 1223 super("CharMatcher.whitespace()"); 1224 } 1225 1226 @Override 1227 public boolean matches(char c) { 1228 return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c; 1229 } 1230 1231 @GwtIncompatible // used only from other GwtIncompatible code 1232 @Override 1233 void setBits(BitSet table) { 1234 for (int i = 0; i < TABLE.length(); i++) { 1235 table.set(TABLE.charAt(i)); 1236 } 1237 } 1238 } 1239 1240 /** Implementation of {@link #breakingWhitespace()}. */ 1241 private static final class BreakingWhitespace extends CharMatcher { 1242 1243 static final CharMatcher INSTANCE = new BreakingWhitespace(); 1244 1245 @Override 1246 public boolean matches(char c) { 1247 switch (c) { 1248 case '\t': 1249 case '\n': 1250 case '\013': 1251 case '\f': 1252 case '\r': 1253 case ' ': 1254 case '\u0085': 1255 case '\u1680': 1256 case '\u2028': 1257 case '\u2029': 1258 case '\u205f': 1259 case '\u3000': 1260 return true; 1261 case '\u2007': 1262 return false; 1263 default: 1264 return c >= '\u2000' && c <= '\u200a'; 1265 } 1266 } 1267 1268 @Override 1269 public String toString() { 1270 return "CharMatcher.breakingWhitespace()"; 1271 } 1272 } 1273 1274 /** Implementation of {@link #ascii()}. */ 1275 private static final class Ascii extends NamedFastMatcher { 1276 1277 static final Ascii INSTANCE = new Ascii(); 1278 1279 Ascii() { 1280 super("CharMatcher.ascii()"); 1281 } 1282 1283 @Override 1284 public boolean matches(char c) { 1285 return c <= '\u007f'; 1286 } 1287 } 1288 1289 /** Implementation that matches characters that fall within multiple ranges. */ 1290 private static class RangesMatcher extends CharMatcher { 1291 1292 private final String description; 1293 private final char[] rangeStarts; 1294 private final char[] rangeEnds; 1295 1296 RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { 1297 this.description = description; 1298 this.rangeStarts = rangeStarts; 1299 this.rangeEnds = rangeEnds; 1300 checkArgument(rangeStarts.length == rangeEnds.length); 1301 for (int i = 0; i < rangeStarts.length; i++) { 1302 checkArgument(rangeStarts[i] <= rangeEnds[i]); 1303 if (i + 1 < rangeStarts.length) { 1304 checkArgument(rangeEnds[i] < rangeStarts[i + 1]); 1305 } 1306 } 1307 } 1308 1309 @Override 1310 public boolean matches(char c) { 1311 int index = Arrays.binarySearch(rangeStarts, c); 1312 if (index >= 0) { 1313 return true; 1314 } else { 1315 index = ~index - 1; 1316 return index >= 0 && c <= rangeEnds[index]; 1317 } 1318 } 1319 1320 @Override 1321 public String toString() { 1322 return description; 1323 } 1324 } 1325 1326 /** Implementation of {@link #digit()}. */ 1327 private static final class Digit extends RangesMatcher { 1328 // Plug the following UnicodeSet pattern into 1329 // https://unicode.org/cldr/utility/list-unicodeset.jsp 1330 // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]] 1331 // and get the zeroes from there. 1332 1333 // Must be in ascending order. 1334 private static final String ZEROES = 1335 "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6" 1336 + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0" 1337 + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10"; 1338 1339 private static char[] zeroes() { 1340 return ZEROES.toCharArray(); 1341 } 1342 1343 private static char[] nines() { 1344 char[] nines = new char[ZEROES.length()]; 1345 for (int i = 0; i < ZEROES.length(); i++) { 1346 nines[i] = (char) (ZEROES.charAt(i) + 9); 1347 } 1348 return nines; 1349 } 1350 1351 static final Digit INSTANCE = new Digit(); 1352 1353 private Digit() { 1354 super("CharMatcher.digit()", zeroes(), nines()); 1355 } 1356 } 1357 1358 /** Implementation of {@link #javaDigit()}. */ 1359 private static final class JavaDigit extends CharMatcher { 1360 1361 static final JavaDigit INSTANCE = new JavaDigit(); 1362 1363 @Override 1364 public boolean matches(char c) { 1365 return Character.isDigit(c); 1366 } 1367 1368 @Override 1369 public String toString() { 1370 return "CharMatcher.javaDigit()"; 1371 } 1372 } 1373 1374 /** Implementation of {@link #javaLetter()}. */ 1375 private static final class JavaLetter extends CharMatcher { 1376 1377 static final JavaLetter INSTANCE = new JavaLetter(); 1378 1379 @Override 1380 public boolean matches(char c) { 1381 return Character.isLetter(c); 1382 } 1383 1384 @Override 1385 public String toString() { 1386 return "CharMatcher.javaLetter()"; 1387 } 1388 } 1389 1390 /** Implementation of {@link #javaLetterOrDigit()}. */ 1391 private static final class JavaLetterOrDigit extends CharMatcher { 1392 1393 static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit(); 1394 1395 @Override 1396 public boolean matches(char c) { 1397 return Character.isLetterOrDigit(c); 1398 } 1399 1400 @Override 1401 public String toString() { 1402 return "CharMatcher.javaLetterOrDigit()"; 1403 } 1404 } 1405 1406 /** Implementation of {@link #javaUpperCase()}. */ 1407 private static final class JavaUpperCase extends CharMatcher { 1408 1409 static final JavaUpperCase INSTANCE = new JavaUpperCase(); 1410 1411 @Override 1412 public boolean matches(char c) { 1413 return Character.isUpperCase(c); 1414 } 1415 1416 @Override 1417 public String toString() { 1418 return "CharMatcher.javaUpperCase()"; 1419 } 1420 } 1421 1422 /** Implementation of {@link #javaLowerCase()}. */ 1423 private static final class JavaLowerCase extends CharMatcher { 1424 1425 static final JavaLowerCase INSTANCE = new JavaLowerCase(); 1426 1427 @Override 1428 public boolean matches(char c) { 1429 return Character.isLowerCase(c); 1430 } 1431 1432 @Override 1433 public String toString() { 1434 return "CharMatcher.javaLowerCase()"; 1435 } 1436 } 1437 1438 /** Implementation of {@link #javaIsoControl()}. */ 1439 private static final class JavaIsoControl extends NamedFastMatcher { 1440 1441 static final JavaIsoControl INSTANCE = new JavaIsoControl(); 1442 1443 private JavaIsoControl() { 1444 super("CharMatcher.javaIsoControl()"); 1445 } 1446 1447 @Override 1448 public boolean matches(char c) { 1449 return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f'); 1450 } 1451 } 1452 1453 /** Implementation of {@link #invisible()}. */ 1454 private static final class Invisible extends RangesMatcher { 1455 // Plug the following UnicodeSet pattern into 1456 // https://unicode.org/cldr/utility/list-unicodeset.jsp 1457 // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]] 1458 // with the "Abbreviate" option, and get the ranges from there. 1459 private static final String RANGE_STARTS = 1460 "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000\u2028\u205f\u2066" 1461 + "\u3000\ud800\ufeff\ufff9"; 1462 private static final String RANGE_ENDS = // inclusive ends 1463 "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u200f\u202f\u2064\u206f" 1464 + "\u3000\uf8ff\ufeff\ufffb"; 1465 1466 static final Invisible INSTANCE = new Invisible(); 1467 1468 private Invisible() { 1469 super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray()); 1470 } 1471 } 1472 1473 /** Implementation of {@link #singleWidth()}. */ 1474 private static final class SingleWidth extends RangesMatcher { 1475 1476 static final SingleWidth INSTANCE = new SingleWidth(); 1477 1478 private SingleWidth() { 1479 super( 1480 "CharMatcher.singleWidth()", 1481 "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), 1482 "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); 1483 } 1484 } 1485 1486 // Non-static factory implementation classes 1487 1488 /** Implementation of {@link #negate()}. */ 1489 private static class Negated extends CharMatcher { 1490 1491 final CharMatcher original; 1492 1493 Negated(CharMatcher original) { 1494 this.original = checkNotNull(original); 1495 } 1496 1497 @Override 1498 public boolean matches(char c) { 1499 return !original.matches(c); 1500 } 1501 1502 @Override 1503 public boolean matchesAllOf(CharSequence sequence) { 1504 return original.matchesNoneOf(sequence); 1505 } 1506 1507 @Override 1508 public boolean matchesNoneOf(CharSequence sequence) { 1509 return original.matchesAllOf(sequence); 1510 } 1511 1512 @Override 1513 public int countIn(CharSequence sequence) { 1514 return sequence.length() - original.countIn(sequence); 1515 } 1516 1517 @GwtIncompatible // used only from other GwtIncompatible code 1518 @Override 1519 void setBits(BitSet table) { 1520 BitSet tmp = new BitSet(); 1521 original.setBits(tmp); 1522 tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 1523 table.or(tmp); 1524 } 1525 1526 @Override 1527 public CharMatcher negate() { 1528 return original; 1529 } 1530 1531 @Override 1532 public String toString() { 1533 return original + ".negate()"; 1534 } 1535 } 1536 1537 /** Implementation of {@link #and(CharMatcher)}. */ 1538 private static final class And extends CharMatcher { 1539 1540 final CharMatcher first; 1541 final CharMatcher second; 1542 1543 And(CharMatcher a, CharMatcher b) { 1544 first = checkNotNull(a); 1545 second = checkNotNull(b); 1546 } 1547 1548 @Override 1549 public boolean matches(char c) { 1550 return first.matches(c) && second.matches(c); 1551 } 1552 1553 @GwtIncompatible // used only from other GwtIncompatible code 1554 @Override 1555 void setBits(BitSet table) { 1556 BitSet tmp1 = new BitSet(); 1557 first.setBits(tmp1); 1558 BitSet tmp2 = new BitSet(); 1559 second.setBits(tmp2); 1560 tmp1.and(tmp2); 1561 table.or(tmp1); 1562 } 1563 1564 @Override 1565 public String toString() { 1566 return "CharMatcher.and(" + first + ", " + second + ")"; 1567 } 1568 } 1569 1570 /** Implementation of {@link #or(CharMatcher)}. */ 1571 private static final class Or extends CharMatcher { 1572 1573 final CharMatcher first; 1574 final CharMatcher second; 1575 1576 Or(CharMatcher a, CharMatcher b) { 1577 first = checkNotNull(a); 1578 second = checkNotNull(b); 1579 } 1580 1581 @GwtIncompatible // used only from other GwtIncompatible code 1582 @Override 1583 void setBits(BitSet table) { 1584 first.setBits(table); 1585 second.setBits(table); 1586 } 1587 1588 @Override 1589 public boolean matches(char c) { 1590 return first.matches(c) || second.matches(c); 1591 } 1592 1593 @Override 1594 public String toString() { 1595 return "CharMatcher.or(" + first + ", " + second + ")"; 1596 } 1597 } 1598 1599 // Static factory implementations 1600 1601 /** Implementation of {@link #is(char)}. */ 1602 private static final class Is extends FastMatcher { 1603 1604 private final char match; 1605 1606 Is(char match) { 1607 this.match = match; 1608 } 1609 1610 @Override 1611 public boolean matches(char c) { 1612 return c == match; 1613 } 1614 1615 @Override 1616 public String replaceFrom(CharSequence sequence, char replacement) { 1617 return sequence.toString().replace(match, replacement); 1618 } 1619 1620 @Override 1621 public CharMatcher and(CharMatcher other) { 1622 return other.matches(match) ? this : none(); 1623 } 1624 1625 @Override 1626 public CharMatcher or(CharMatcher other) { 1627 return other.matches(match) ? other : super.or(other); 1628 } 1629 1630 @Override 1631 public CharMatcher negate() { 1632 return isNot(match); 1633 } 1634 1635 @GwtIncompatible // used only from other GwtIncompatible code 1636 @Override 1637 void setBits(BitSet table) { 1638 table.set(match); 1639 } 1640 1641 @Override 1642 public String toString() { 1643 return "CharMatcher.is('" + showCharacter(match) + "')"; 1644 } 1645 } 1646 1647 /** Implementation of {@link #isNot(char)}. */ 1648 private static final class IsNot extends FastMatcher { 1649 1650 private final char match; 1651 1652 IsNot(char match) { 1653 this.match = match; 1654 } 1655 1656 @Override 1657 public boolean matches(char c) { 1658 return c != match; 1659 } 1660 1661 @Override 1662 public CharMatcher and(CharMatcher other) { 1663 return other.matches(match) ? super.and(other) : other; 1664 } 1665 1666 @Override 1667 public CharMatcher or(CharMatcher other) { 1668 return other.matches(match) ? any() : this; 1669 } 1670 1671 @GwtIncompatible // used only from other GwtIncompatible code 1672 @Override 1673 void setBits(BitSet table) { 1674 table.set(0, match); 1675 table.set(match + 1, Character.MAX_VALUE + 1); 1676 } 1677 1678 @Override 1679 public CharMatcher negate() { 1680 return is(match); 1681 } 1682 1683 @Override 1684 public String toString() { 1685 return "CharMatcher.isNot('" + showCharacter(match) + "')"; 1686 } 1687 } 1688 1689 private static CharMatcher.IsEither isEither(char c1, char c2) { 1690 return new CharMatcher.IsEither(c1, c2); 1691 } 1692 1693 /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */ 1694 private static final class IsEither extends FastMatcher { 1695 1696 private final char match1; 1697 private final char match2; 1698 1699 IsEither(char match1, char match2) { 1700 this.match1 = match1; 1701 this.match2 = match2; 1702 } 1703 1704 @Override 1705 public boolean matches(char c) { 1706 return c == match1 || c == match2; 1707 } 1708 1709 @GwtIncompatible // used only from other GwtIncompatible code 1710 @Override 1711 void setBits(BitSet table) { 1712 table.set(match1); 1713 table.set(match2); 1714 } 1715 1716 @Override 1717 public String toString() { 1718 return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")"; 1719 } 1720 } 1721 1722 /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */ 1723 private static final class AnyOf extends CharMatcher { 1724 1725 private final char[] chars; 1726 1727 public AnyOf(CharSequence chars) { 1728 this.chars = chars.toString().toCharArray(); 1729 Arrays.sort(this.chars); 1730 } 1731 1732 @Override 1733 public boolean matches(char c) { 1734 return Arrays.binarySearch(chars, c) >= 0; 1735 } 1736 1737 @Override 1738 @GwtIncompatible // used only from other GwtIncompatible code 1739 void setBits(BitSet table) { 1740 for (char c : chars) { 1741 table.set(c); 1742 } 1743 } 1744 1745 @Override 1746 public String toString() { 1747 StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); 1748 for (char c : chars) { 1749 description.append(showCharacter(c)); 1750 } 1751 description.append("\")"); 1752 return description.toString(); 1753 } 1754 } 1755 1756 /** Implementation of {@link #inRange(char, char)}. */ 1757 private static final class InRange extends FastMatcher { 1758 1759 private final char startInclusive; 1760 private final char endInclusive; 1761 1762 InRange(char startInclusive, char endInclusive) { 1763 checkArgument(endInclusive >= startInclusive); 1764 this.startInclusive = startInclusive; 1765 this.endInclusive = endInclusive; 1766 } 1767 1768 @Override 1769 public boolean matches(char c) { 1770 return startInclusive <= c && c <= endInclusive; 1771 } 1772 1773 @GwtIncompatible // used only from other GwtIncompatible code 1774 @Override 1775 void setBits(BitSet table) { 1776 table.set(startInclusive, endInclusive + 1); 1777 } 1778 1779 @Override 1780 public String toString() { 1781 return "CharMatcher.inRange('" 1782 + showCharacter(startInclusive) 1783 + "', '" 1784 + showCharacter(endInclusive) 1785 + "')"; 1786 } 1787 } 1788 1789 /** Implementation of {@link #forPredicate(Predicate)}. */ 1790 private static final class ForPredicate extends CharMatcher { 1791 1792 private final Predicate<? super Character> predicate; 1793 1794 ForPredicate(Predicate<? super Character> predicate) { 1795 this.predicate = checkNotNull(predicate); 1796 } 1797 1798 @Override 1799 public boolean matches(char c) { 1800 return predicate.apply(c); 1801 } 1802 1803 @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily 1804 @Override 1805 public boolean apply(Character character) { 1806 return predicate.apply(checkNotNull(character)); 1807 } 1808 1809 @Override 1810 public String toString() { 1811 return "CharMatcher.forPredicate(" + predicate + ")"; 1812 } 1813 } 1814}