001/* 002 * Copyright (C) 2008 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.base; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021import static com.google.common.base.Preconditions.checkPositionIndex; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtCompatible; 025import com.google.common.annotations.GwtIncompatible; 026import com.google.common.annotations.VisibleForTesting; 027 028import java.util.Arrays; 029import java.util.BitSet; 030 031import javax.annotation.CheckReturnValue; 032 033/** 034 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does 035 * for any {@link Object}. Also offers basic text processing methods based on this function. 036 * Implementations are strongly encouraged to be side-effect-free and immutable. 037 * 038 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean 039 * "any character {@code c} for which {@code this.matches(c)} returns {@code true}". 040 * 041 * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand 042 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical 043 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher} 044 * treats these just as two separate characters. 045 * 046 * <p>Example usages: <pre> 047 * String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput); 048 * if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> 049 * 050 * <p>See the Guava User Guide article on <a href= 051 * "https://github.com/google/guava/wiki/StringsExplained#charmatcher"> 052 * {@code CharMatcher}</a>. 053 * 054 * @author Kevin Bourrillion 055 * @since 1.0 056 */ 057@Beta // Possibly change from chars to code points; decide constants vs. methods 058@GwtCompatible(emulated = true) 059public abstract class CharMatcher implements Predicate<Character> { 060 061 // Constant matcher factory methods 062 063 /** 064 * Matches any character. 065 * 066 * @since 19.0 (since 1.0 as constant {@code ANY}) 067 */ 068 public static CharMatcher any() { 069 return Any.INSTANCE; 070 } 071 072 /** 073 * Matches no characters. 074 * 075 * @since 19.0 (since 1.0 as constant {@code NONE}) 076 */ 077 public static CharMatcher none() { 078 return None.INSTANCE; 079 } 080 081 /** 082 * Determines whether a character is whitespace according to the latest Unicode standard, as 083 * illustrated <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 084 * This is not the same definition used by other Java APIs. (See a <a 085 * href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several 086 * definitions of "whitespace"</a>.) 087 * 088 * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up 089 * to date. 090 * 091 * @since 19.0 (since 1.0 as constant {@code WHITESPACE}) 092 */ 093 public static CharMatcher whitespace() { 094 return Whitespace.INSTANCE; 095 } 096 097 /** 098 * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 099 * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a 100 * discussion of that term. 101 * 102 * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE}) 103 */ 104 public static CharMatcher breakingWhitespace() { 105 return BreakingWhitespace.INSTANCE; 106 } 107 108 /** 109 * Determines whether a character is ASCII, meaning that its code point is less than 128. 110 * 111 * @since 19.0 (since 1.0 as constant {@code ASCII}) 112 */ 113 public static CharMatcher ascii() { 114 return Ascii.INSTANCE; 115 } 116 117 /** 118 * Determines whether a character is a digit according to 119 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. 120 * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 121 * 122 * @since 19.0 (since 1.0 as constant {@code DIGIT}) 123 */ 124 public static CharMatcher digit() { 125 return Digit.INSTANCE; 126 } 127 128 /** 129 * Determines whether a character is a digit according to {@linkplain Character#isDigit(char) 130 * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0', 131 * '9')}. 132 * 133 * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT}) 134 */ 135 public static CharMatcher javaDigit() { 136 return JavaDigit.INSTANCE; 137 } 138 139 /** 140 * Determines whether a character is a letter according to {@linkplain Character#isLetter(char) 141 * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code 142 * inRange('a', 'z').or(inRange('A', 'Z'))}. 143 * 144 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER}) 145 */ 146 public static CharMatcher javaLetter() { 147 return JavaLetter.INSTANCE; 148 } 149 150 /** 151 * Determines whether a character is a letter or digit according to {@linkplain 152 * Character#isLetterOrDigit(char) Java's definition}. 153 * 154 * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}). 155 */ 156 public static CharMatcher javaLetterOrDigit() { 157 return JavaLetterOrDigit.INSTANCE; 158 } 159 160 /** 161 * Determines whether a character is upper case according to {@linkplain 162 * Character#isUpperCase(char) Java's definition}. 163 * 164 * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE}) 165 */ 166 public static CharMatcher javaUpperCase() { 167 return JavaUpperCase.INSTANCE; 168 } 169 170 /** 171 * Determines whether a character is lower case according to {@linkplain 172 * Character#isLowerCase(char) Java's definition}. 173 * 174 * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE}) 175 */ 176 public static CharMatcher javaLowerCase() { 177 return JavaLowerCase.INSTANCE; 178 } 179 180 /** 181 * Determines whether a character is an ISO control character as specified by {@link 182 * Character#isISOControl(char)}. 183 * 184 * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL}) 185 */ 186 public static CharMatcher javaIsoControl() { 187 return JavaIsoControl.INSTANCE; 188 } 189 190 /** 191 * Determines whether a character is invisible; that is, if its Unicode category is any of 192 * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 193 * PRIVATE_USE according to ICU4J. 194 * 195 * @since 19.0 (since 1.0 as constant {@code INVISIBLE}) 196 */ 197 public static CharMatcher invisible() { 198 return Invisible.INSTANCE; 199 } 200 201 /** 202 * Determines whether a character is single-width (not double-width). When in doubt, this matcher 203 * errs on the side of returning {@code false} (that is, it tends to assume a character is 204 * double-width). 205 * 206 * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to 207 * date. 208 * 209 * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH}) 210 */ 211 public static CharMatcher singleWidth() { 212 return SingleWidth.INSTANCE; 213 } 214 215 // Legacy constants 216 // TODO(cgdecker): Deprecate these so they can be removed eventually 217 218 /** 219 * Determines whether a character is whitespace according to the latest Unicode standard, as 220 * illustrated <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 221 * This is not the same definition used by other Java APIs. (See a <a 222 * href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several 223 * definitions of "whitespace"</a>.) 224 * 225 * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up 226 * to date. 227 */ 228 public static final CharMatcher WHITESPACE = whitespace(); 229 230 /** 231 * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 232 * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a 233 * discussion of that term. 234 * 235 * @since 2.0 236 */ 237 public static final CharMatcher BREAKING_WHITESPACE = breakingWhitespace(); 238 239 /** 240 * Determines whether a character is ASCII, meaning that its code point is less than 128. 241 */ 242 public static final CharMatcher ASCII = ascii(); 243 244 /** 245 * Determines whether a character is a digit according to 246 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. 247 * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 248 */ 249 public static final CharMatcher DIGIT = digit(); 250 251 /** 252 * Determines whether a character is a digit according to {@linkplain Character#isDigit(char) 253 * Java's definition}. If you only care to match ASCII digits, you can use {@code 254 * inRange('0', '9')}. 255 */ 256 public static final CharMatcher JAVA_DIGIT = javaDigit(); 257 258 /** 259 * Determines whether a character is a letter according to {@linkplain Character#isLetter(char) 260 * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code 261 * inRange('a', 'z').or(inRange('A', 'Z'))}. 262 */ 263 public static final CharMatcher JAVA_LETTER = javaLetter(); 264 265 /** 266 * Determines whether a character is a letter or digit according to {@linkplain 267 * Character#isLetterOrDigit(char) Java's definition}. 268 */ 269 public static final CharMatcher JAVA_LETTER_OR_DIGIT = javaLetterOrDigit(); 270 271 /** 272 * Determines whether a character is upper case according to {@linkplain 273 * Character#isUpperCase(char) Java's definition}. 274 */ 275 public static final CharMatcher JAVA_UPPER_CASE = javaUpperCase(); 276 277 /** 278 * Determines whether a character is lower case according to {@linkplain 279 * Character#isLowerCase(char) Java's definition}. 280 */ 281 public static final CharMatcher JAVA_LOWER_CASE = javaLowerCase(); 282 283 /** 284 * Determines whether a character is an ISO control character as specified by {@link 285 * Character#isISOControl(char)}. 286 */ 287 public static final CharMatcher JAVA_ISO_CONTROL = javaIsoControl(); 288 289 /** 290 * Determines whether a character is invisible; that is, if its Unicode category is any of 291 * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 292 * PRIVATE_USE according to ICU4J. 293 */ 294 public static final CharMatcher INVISIBLE = invisible(); 295 296 /** 297 * Determines whether a character is single-width (not double-width). When in doubt, this matcher 298 * errs on the side of returning {@code false} (that is, it tends to assume a character is 299 * double-width). 300 * 301 * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to 302 * date. 303 */ 304 public static final CharMatcher SINGLE_WIDTH = singleWidth(); 305 306 /** Matches any character. */ 307 public static final CharMatcher ANY = any(); 308 309 /** Matches no characters. */ 310 public static final CharMatcher NONE = none(); 311 312 // Static factories 313 314 /** 315 * Returns a {@code char} matcher that matches only one specified character. 316 */ 317 public static CharMatcher is(final char match) { 318 return new Is(match); 319 } 320 321 /** 322 * Returns a {@code char} matcher that matches any character except the one specified. 323 * 324 * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 325 */ 326 public static CharMatcher isNot(final char match) { 327 return new IsNot(match); 328 } 329 330 /** 331 * Returns a {@code char} matcher that matches any character present in the given character 332 * sequence. 333 */ 334 public static CharMatcher anyOf(final CharSequence sequence) { 335 switch (sequence.length()) { 336 case 0: 337 return none(); 338 case 1: 339 return is(sequence.charAt(0)); 340 case 2: 341 return isEither(sequence.charAt(0), sequence.charAt(1)); 342 default: 343 // TODO(lowasser): is it potentially worth just going ahead and building a precomputed 344 // matcher? 345 return new AnyOf(sequence); 346 } 347 } 348 349 /** 350 * Returns a {@code char} matcher that matches any character not present in the given character 351 * sequence. 352 */ 353 public static CharMatcher noneOf(CharSequence sequence) { 354 return anyOf(sequence).negate(); 355 } 356 357 /** 358 * Returns a {@code char} matcher that matches any character in a given range (both endpoints are 359 * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code 360 * CharMatcher.inRange('a', 'z')}. 361 * 362 * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 363 */ 364 public static CharMatcher inRange(final char startInclusive, final char endInclusive) { 365 return new InRange(startInclusive, endInclusive); 366 } 367 368 /** 369 * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but 370 * which operates on primitive {@code char} instances instead. 371 */ 372 public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { 373 return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate); 374 } 375 376 // Constructors 377 378 /** 379 * Constructor for use by subclasses. When subclassing, you may want to override 380 * {@code toString()} to provide a useful description. 381 */ 382 protected CharMatcher() {} 383 384 // Abstract methods 385 386 /** Determines a true or false value for the given character. */ 387 public abstract boolean matches(char c); 388 389 // Non-static factories 390 391 /** 392 * Returns a matcher that matches any character not matched by this matcher. 393 */ 394 public CharMatcher negate() { 395 return new Negated(this); 396 } 397 398 /** 399 * Returns a matcher that matches any character matched by both this matcher and {@code other}. 400 */ 401 public CharMatcher and(CharMatcher other) { 402 return new And(this, other); 403 } 404 405 /** 406 * Returns a matcher that matches any character matched by either this matcher or {@code other}. 407 */ 408 public CharMatcher or(CharMatcher other) { 409 return new Or(this, other); 410 } 411 412 /** 413 * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to 414 * query than the original; your mileage may vary. Precomputation takes time and is likely to be 415 * worthwhile only if the precomputed matcher is queried many thousands of times. 416 * 417 * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a 418 * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a 419 * worthwhile tradeoff in a browser. 420 */ 421 public CharMatcher precomputed() { 422 return Platform.precomputeCharMatcher(this); 423 } 424 425 private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; 426 427 /** 428 * This is the actual implementation of {@link #precomputed}, but we bounce calls through a 429 * method on {@link Platform} so that we can have different behavior in GWT. 430 * 431 * <p>This implementation tries to be smart in a number of ways. It recognizes cases where 432 * the negation is cheaper to precompute than the matcher itself; it tries to build small 433 * hash tables for matchers that only match a few characters, and so on. In the worst-case 434 * scenario, it constructs an eight-kilobyte bit array and queries that. 435 * In many situations this produces a matcher which is faster to query than the original. 436 */ 437 @GwtIncompatible("java.util.BitSet") 438 CharMatcher precomputedInternal() { 439 final BitSet table = new BitSet(); 440 setBits(table); 441 int totalCharacters = table.cardinality(); 442 if (totalCharacters * 2 <= DISTINCT_CHARS) { 443 return precomputedPositive(totalCharacters, table, toString()); 444 } else { 445 // TODO(lowasser): is it worth it to worry about the last character of large matchers? 446 table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 447 int negatedCharacters = DISTINCT_CHARS - totalCharacters; 448 String suffix = ".negate()"; 449 final String description = toString(); 450 String negatedDescription = 451 description.endsWith(suffix) 452 ? description.substring(0, description.length() - suffix.length()) 453 : description + suffix; 454 return new NegatedFastMatcher( 455 precomputedPositive(negatedCharacters, table, negatedDescription)) { 456 @Override 457 public String toString() { 458 return description; 459 } 460 }; 461 } 462 } 463 464 /** 465 * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper. 466 */ 467 @GwtIncompatible("java.util.BitSet") 468 private static CharMatcher precomputedPositive( 469 int totalCharacters, BitSet table, String description) { 470 switch (totalCharacters) { 471 case 0: 472 return none(); 473 case 1: 474 return is((char) table.nextSetBit(0)); 475 case 2: 476 char c1 = (char) table.nextSetBit(0); 477 char c2 = (char) table.nextSetBit(c1 + 1); 478 return isEither(c1, c2); 479 default: 480 return isSmall(totalCharacters, table.length()) 481 ? SmallCharMatcher.from(table, description) 482 : new BitSetMatcher(table, description); 483 } 484 } 485 486 @GwtIncompatible("SmallCharMatcher") 487 private static boolean isSmall(int totalCharacters, int tableLength) { 488 return totalCharacters <= SmallCharMatcher.MAX_SIZE 489 && tableLength > (totalCharacters * 4 * Character.SIZE); 490 // err on the side of BitSetMatcher 491 } 492 493 /** 494 * Sets bits in {@code table} matched by this matcher. 495 */ 496 @GwtIncompatible("java.util.BitSet") 497 void setBits(BitSet table) { 498 for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) { 499 if (matches((char) c)) { 500 table.set(c); 501 } 502 } 503 } 504 505 // Text processing routines 506 507 /** 508 * Returns {@code true} if a character sequence contains at least one matching character. 509 * Equivalent to {@code !matchesNoneOf(sequence)}. 510 * 511 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 512 * character, until this returns {@code true} or the end is reached. 513 * 514 * @param sequence the character sequence to examine, possibly empty 515 * @return {@code true} if this matcher matches at least one character in the sequence 516 * @since 8.0 517 */ 518 public boolean matchesAnyOf(CharSequence sequence) { 519 return !matchesNoneOf(sequence); 520 } 521 522 /** 523 * Returns {@code true} if a character sequence contains only matching characters. 524 * 525 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 526 * character, until this returns {@code false} or the end is reached. 527 * 528 * @param sequence the character sequence to examine, possibly empty 529 * @return {@code true} if this matcher matches every character in the sequence, including when 530 * the sequence is empty 531 */ 532 public boolean matchesAllOf(CharSequence sequence) { 533 for (int i = sequence.length() - 1; i >= 0; i--) { 534 if (!matches(sequence.charAt(i))) { 535 return false; 536 } 537 } 538 return true; 539 } 540 541 /** 542 * Returns {@code true} if a character sequence contains no matching characters. Equivalent to 543 * {@code !matchesAnyOf(sequence)}. 544 * 545 * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 546 * character, until this returns {@code false} or the end is reached. 547 * 548 * @param sequence the character sequence to examine, possibly empty 549 * @return {@code true} if this matcher matches every character in the sequence, including when 550 * the sequence is empty 551 */ 552 public boolean matchesNoneOf(CharSequence sequence) { 553 return indexIn(sequence) == -1; 554 } 555 556 /** 557 * Returns the index of the first matching character in a character sequence, or {@code -1} if no 558 * matching character is present. 559 * 560 * <p>The default implementation iterates over the sequence in forward order calling {@link 561 * #matches} for each character. 562 * 563 * @param sequence the character sequence to examine from the beginning 564 * @return an index, or {@code -1} if no character matches 565 */ 566 public int indexIn(CharSequence sequence) { 567 return indexIn(sequence, 0); 568 } 569 570 /** 571 * Returns the index of the first matching character in a character sequence, starting from a 572 * given position, or {@code -1} if no character matches after that position. 573 * 574 * <p>The default implementation iterates over the sequence in forward order, beginning at {@code 575 * start}, calling {@link #matches} for each character. 576 * 577 * @param sequence the character sequence to examine 578 * @param start the first index to examine; must be nonnegative and no greater than {@code 579 * sequence.length()} 580 * @return the index of the first matching character, guaranteed to be no less than {@code start}, 581 * or {@code -1} if no character matches 582 * @throws IndexOutOfBoundsException if start is negative or greater than {@code 583 * sequence.length()} 584 */ 585 public int indexIn(CharSequence sequence, int start) { 586 int length = sequence.length(); 587 checkPositionIndex(start, length); 588 for (int i = start; i < length; i++) { 589 if (matches(sequence.charAt(i))) { 590 return i; 591 } 592 } 593 return -1; 594 } 595 596 /** 597 * Returns the index of the last matching character in a character sequence, or {@code -1} if no 598 * matching character is present. 599 * 600 * <p>The default implementation iterates over the sequence in reverse order calling {@link 601 * #matches} for each character. 602 * 603 * @param sequence the character sequence to examine from the end 604 * @return an index, or {@code -1} if no character matches 605 */ 606 public int lastIndexIn(CharSequence sequence) { 607 for (int i = sequence.length() - 1; i >= 0; i--) { 608 if (matches(sequence.charAt(i))) { 609 return i; 610 } 611 } 612 return -1; 613 } 614 615 /** 616 * Returns the number of matching characters found in a character sequence. 617 */ 618 public int countIn(CharSequence sequence) { 619 int count = 0; 620 for (int i = 0; i < sequence.length(); i++) { 621 if (matches(sequence.charAt(i))) { 622 count++; 623 } 624 } 625 return count; 626 } 627 628 /** 629 * Returns a string containing all non-matching characters of a character sequence, in order. For 630 * example: <pre> {@code 631 * 632 * CharMatcher.is('a').removeFrom("bazaar")}</pre> 633 * 634 * ... returns {@code "bzr"}. 635 */ 636 @CheckReturnValue 637 public String removeFrom(CharSequence sequence) { 638 String string = sequence.toString(); 639 int pos = indexIn(string); 640 if (pos == -1) { 641 return string; 642 } 643 644 char[] chars = string.toCharArray(); 645 int spread = 1; 646 647 // This unusual loop comes from extensive benchmarking 648 OUT: 649 while (true) { 650 pos++; 651 while (true) { 652 if (pos == chars.length) { 653 break OUT; 654 } 655 if (matches(chars[pos])) { 656 break; 657 } 658 chars[pos - spread] = chars[pos]; 659 pos++; 660 } 661 spread++; 662 } 663 return new String(chars, 0, pos - spread); 664 } 665 666 /** 667 * Returns a string containing all matching characters of a character sequence, in order. For 668 * example: <pre> {@code 669 * 670 * CharMatcher.is('a').retainFrom("bazaar")}</pre> 671 * 672 * ... returns {@code "aaa"}. 673 */ 674 @CheckReturnValue 675 public String retainFrom(CharSequence sequence) { 676 return negate().removeFrom(sequence); 677 } 678 679 /** 680 * Returns a string copy of the input character sequence, with each character that matches this 681 * matcher replaced by a given replacement character. For example: <pre> {@code 682 * 683 * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre> 684 * 685 * ... returns {@code "rodor"}. 686 * 687 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 688 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 689 * character. 690 * 691 * @param sequence the character sequence to replace matching characters in 692 * @param replacement the character to append to the result string in place of each matching 693 * character in {@code sequence} 694 * @return the new string 695 */ 696 @CheckReturnValue 697 public String replaceFrom(CharSequence sequence, char replacement) { 698 String string = sequence.toString(); 699 int pos = indexIn(string); 700 if (pos == -1) { 701 return string; 702 } 703 char[] chars = string.toCharArray(); 704 chars[pos] = replacement; 705 for (int i = pos + 1; i < chars.length; i++) { 706 if (matches(chars[i])) { 707 chars[i] = replacement; 708 } 709 } 710 return new String(chars); 711 } 712 713 /** 714 * Returns a string copy of the input character sequence, with each character that matches this 715 * matcher replaced by a given replacement sequence. For example: <pre> {@code 716 * 717 * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre> 718 * 719 * ... returns {@code "yoohoo"}. 720 * 721 * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better 722 * off calling {@link #replaceFrom(CharSequence, char)} directly. 723 * 724 * @param sequence the character sequence to replace matching characters in 725 * @param replacement the characters to append to the result string in place of each matching 726 * character in {@code sequence} 727 * @return the new string 728 */ 729 @CheckReturnValue 730 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 731 int replacementLen = replacement.length(); 732 if (replacementLen == 0) { 733 return removeFrom(sequence); 734 } 735 if (replacementLen == 1) { 736 return replaceFrom(sequence, replacement.charAt(0)); 737 } 738 739 String string = sequence.toString(); 740 int pos = indexIn(string); 741 if (pos == -1) { 742 return string; 743 } 744 745 int len = string.length(); 746 StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); 747 748 int oldpos = 0; 749 do { 750 buf.append(string, oldpos, pos); 751 buf.append(replacement); 752 oldpos = pos + 1; 753 pos = indexIn(string, oldpos); 754 } while (pos != -1); 755 756 buf.append(string, oldpos, len); 757 return buf.toString(); 758 } 759 760 /** 761 * Returns a substring of the input character sequence that omits all characters this matcher 762 * matches from the beginning and from the end of the string. For example: <pre> {@code 763 * 764 * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre> 765 * 766 * ... returns {@code "cat"}. 767 * 768 * <p>Note that: <pre> {@code 769 * 770 * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre> 771 * 772 * ... is equivalent to {@link String#trim()}. 773 */ 774 @CheckReturnValue 775 public String trimFrom(CharSequence sequence) { 776 int len = sequence.length(); 777 int first; 778 int last; 779 780 for (first = 0; first < len; first++) { 781 if (!matches(sequence.charAt(first))) { 782 break; 783 } 784 } 785 for (last = len - 1; last > first; last--) { 786 if (!matches(sequence.charAt(last))) { 787 break; 788 } 789 } 790 791 return sequence.subSequence(first, last + 1).toString(); 792 } 793 794 /** 795 * Returns a substring of the input character sequence that omits all characters this matcher 796 * matches from the beginning of the string. For example: <pre> {@code 797 * 798 * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre> 799 * 800 * ... returns {@code "catbab"}. 801 */ 802 @CheckReturnValue 803 public String trimLeadingFrom(CharSequence sequence) { 804 int len = sequence.length(); 805 for (int first = 0; first < len; first++) { 806 if (!matches(sequence.charAt(first))) { 807 return sequence.subSequence(first, len).toString(); 808 } 809 } 810 return ""; 811 } 812 813 /** 814 * Returns a substring of the input character sequence that omits all characters this matcher 815 * matches from the end of the string. For example: <pre> {@code 816 * 817 * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre> 818 * 819 * ... returns {@code "abacat"}. 820 */ 821 @CheckReturnValue 822 public String trimTrailingFrom(CharSequence sequence) { 823 int len = sequence.length(); 824 for (int last = len - 1; last >= 0; last--) { 825 if (!matches(sequence.charAt(last))) { 826 return sequence.subSequence(0, last + 1).toString(); 827 } 828 } 829 return ""; 830 } 831 832 /** 833 * Returns a string copy of the input character sequence, with each group of consecutive 834 * characters that match this matcher replaced by a single replacement character. For example: 835 * <pre> {@code 836 * 837 * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre> 838 * 839 * ... returns {@code "b-p-r"}. 840 * 841 * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 842 * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 843 * character. 844 * 845 * @param sequence the character sequence to replace matching groups of characters in 846 * @param replacement the character to append to the result string in place of each group of 847 * matching characters in {@code sequence} 848 * @return the new string 849 */ 850 @CheckReturnValue 851 public String collapseFrom(CharSequence sequence, char replacement) { 852 // This implementation avoids unnecessary allocation. 853 int len = sequence.length(); 854 for (int i = 0; i < len; i++) { 855 char c = sequence.charAt(i); 856 if (matches(c)) { 857 if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { 858 // a no-op replacement 859 i++; 860 } else { 861 StringBuilder builder = 862 new StringBuilder(len).append(sequence.subSequence(0, i)).append(replacement); 863 return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); 864 } 865 } 866 } 867 // no replacement needed 868 return sequence.toString(); 869 } 870 871 /** 872 * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that 873 * groups of matching characters at the start or end of the sequence are removed without 874 * replacement. 875 */ 876 @CheckReturnValue 877 public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 878 // This implementation avoids unnecessary allocation. 879 int len = sequence.length(); 880 int first = 0; 881 int last = len - 1; 882 883 while (first < len && matches(sequence.charAt(first))) { 884 first++; 885 } 886 887 while (last > first && matches(sequence.charAt(last))) { 888 last--; 889 } 890 891 return (first == 0 && last == len - 1) 892 ? collapseFrom(sequence, replacement) 893 : finishCollapseFrom( 894 sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false); 895 } 896 897 private String finishCollapseFrom( 898 CharSequence sequence, 899 int start, 900 int end, 901 char replacement, 902 StringBuilder builder, 903 boolean inMatchingGroup) { 904 for (int i = start; i < end; i++) { 905 char c = sequence.charAt(i); 906 if (matches(c)) { 907 if (!inMatchingGroup) { 908 builder.append(replacement); 909 inMatchingGroup = true; 910 } 911 } else { 912 builder.append(c); 913 inMatchingGroup = false; 914 } 915 } 916 return builder.toString(); 917 } 918 919 /** 920 * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches} 921 * instead. 922 */ 923 @Deprecated 924 @Override 925 public boolean apply(Character character) { 926 return matches(character); 927 } 928 929 /** 930 * Returns a string representation of this {@code CharMatcher}, such as 931 * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. 932 */ 933 @Override 934 public String toString() { 935 return super.toString(); 936 } 937 938 /** 939 * Returns the Java Unicode escape sequence for the given character, in the form "\u12AB" where 940 * "12AB" is the four hexadecimal digits representing the 16 bits of the UTF-16 character. 941 */ 942 private static String showCharacter(char c) { 943 String hex = "0123456789ABCDEF"; 944 char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; 945 for (int i = 0; i < 4; i++) { 946 tmp[5 - i] = hex.charAt(c & 0xF); 947 c = (char) (c >> 4); 948 } 949 return String.copyValueOf(tmp); 950 } 951 952 // Fast matchers 953 954 /** A matcher for which precomputation will not yield any significant benefit. */ 955 abstract static class FastMatcher extends CharMatcher { 956 957 @Override 958 public final CharMatcher precomputed() { 959 return this; 960 } 961 962 @Override 963 public CharMatcher negate() { 964 return new NegatedFastMatcher(this); 965 } 966 } 967 968 /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */ 969 abstract static class NamedFastMatcher extends FastMatcher { 970 971 private final String description; 972 973 NamedFastMatcher(String description) { 974 this.description = checkNotNull(description); 975 } 976 977 @Override 978 public final String toString() { 979 return description; 980 } 981 } 982 983 /** Negation of a {@link FastMatcher}. */ 984 static class NegatedFastMatcher extends Negated { 985 986 NegatedFastMatcher(CharMatcher original) { 987 super(original); 988 } 989 990 @Override 991 public final CharMatcher precomputed() { 992 return this; 993 } 994 } 995 996 /** Fast matcher using a {@link BitSet} table of matching characters. */ 997 @GwtIncompatible("java.util.BitSet") 998 private static final class BitSetMatcher extends NamedFastMatcher { 999 1000 private final BitSet table; 1001 1002 private BitSetMatcher(BitSet table, String description) { 1003 super(description); 1004 if (table.length() + Long.SIZE < table.size()) { 1005 table = (BitSet) table.clone(); 1006 // If only we could actually call BitSet.trimToSize() ourselves... 1007 } 1008 this.table = table; 1009 } 1010 1011 @Override 1012 public boolean matches(char c) { 1013 return table.get(c); 1014 } 1015 1016 @Override 1017 void setBits(BitSet bitSet) { 1018 bitSet.or(table); 1019 } 1020 } 1021 1022 // Static constant implementation classes 1023 1024 /** Implementation of {@link #any()}. */ 1025 private static final class Any extends NamedFastMatcher { 1026 1027 static final Any INSTANCE = new Any(); 1028 1029 private Any() { 1030 super("CharMatcher.any()"); 1031 } 1032 1033 @Override 1034 public boolean matches(char c) { 1035 return true; 1036 } 1037 1038 @Override 1039 public int indexIn(CharSequence sequence) { 1040 return (sequence.length() == 0) ? -1 : 0; 1041 } 1042 1043 @Override 1044 public int indexIn(CharSequence sequence, int start) { 1045 int length = sequence.length(); 1046 checkPositionIndex(start, length); 1047 return (start == length) ? -1 : start; 1048 } 1049 1050 @Override 1051 public int lastIndexIn(CharSequence sequence) { 1052 return sequence.length() - 1; 1053 } 1054 1055 @Override 1056 public boolean matchesAllOf(CharSequence sequence) { 1057 checkNotNull(sequence); 1058 return true; 1059 } 1060 1061 @Override 1062 public boolean matchesNoneOf(CharSequence sequence) { 1063 return sequence.length() == 0; 1064 } 1065 1066 @Override 1067 public String removeFrom(CharSequence sequence) { 1068 checkNotNull(sequence); 1069 return ""; 1070 } 1071 1072 @Override 1073 public String replaceFrom(CharSequence sequence, char replacement) { 1074 char[] array = new char[sequence.length()]; 1075 Arrays.fill(array, replacement); 1076 return new String(array); 1077 } 1078 1079 @Override 1080 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1081 StringBuilder result = new StringBuilder(sequence.length() * replacement.length()); 1082 for (int i = 0; i < sequence.length(); i++) { 1083 result.append(replacement); 1084 } 1085 return result.toString(); 1086 } 1087 1088 @Override 1089 public String collapseFrom(CharSequence sequence, char replacement) { 1090 return (sequence.length() == 0) ? "" : String.valueOf(replacement); 1091 } 1092 1093 @Override 1094 public String trimFrom(CharSequence sequence) { 1095 checkNotNull(sequence); 1096 return ""; 1097 } 1098 1099 @Override 1100 public int countIn(CharSequence sequence) { 1101 return sequence.length(); 1102 } 1103 1104 @Override 1105 public CharMatcher and(CharMatcher other) { 1106 return checkNotNull(other); 1107 } 1108 1109 @Override 1110 public CharMatcher or(CharMatcher other) { 1111 checkNotNull(other); 1112 return this; 1113 } 1114 1115 @Override 1116 public CharMatcher negate() { 1117 return none(); 1118 } 1119 } 1120 1121 /** Implementation of {@link #none()}. */ 1122 private static final class None extends NamedFastMatcher { 1123 1124 static final None INSTANCE = new None(); 1125 1126 private None() { 1127 super("CharMatcher.none()"); 1128 } 1129 1130 @Override 1131 public boolean matches(char c) { 1132 return false; 1133 } 1134 1135 @Override 1136 public int indexIn(CharSequence sequence) { 1137 checkNotNull(sequence); 1138 return -1; 1139 } 1140 1141 @Override 1142 public int indexIn(CharSequence sequence, int start) { 1143 int length = sequence.length(); 1144 checkPositionIndex(start, length); 1145 return -1; 1146 } 1147 1148 @Override 1149 public int lastIndexIn(CharSequence sequence) { 1150 checkNotNull(sequence); 1151 return -1; 1152 } 1153 1154 @Override 1155 public boolean matchesAllOf(CharSequence sequence) { 1156 return sequence.length() == 0; 1157 } 1158 1159 @Override 1160 public boolean matchesNoneOf(CharSequence sequence) { 1161 checkNotNull(sequence); 1162 return true; 1163 } 1164 1165 @Override 1166 public String removeFrom(CharSequence sequence) { 1167 return sequence.toString(); 1168 } 1169 1170 @Override 1171 public String replaceFrom(CharSequence sequence, char replacement) { 1172 return sequence.toString(); 1173 } 1174 1175 @Override 1176 public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1177 checkNotNull(replacement); 1178 return sequence.toString(); 1179 } 1180 1181 @Override 1182 public String collapseFrom(CharSequence sequence, char replacement) { 1183 return sequence.toString(); 1184 } 1185 1186 @Override 1187 public String trimFrom(CharSequence sequence) { 1188 return sequence.toString(); 1189 } 1190 1191 @Override 1192 public String trimLeadingFrom(CharSequence sequence) { 1193 return sequence.toString(); 1194 } 1195 1196 @Override 1197 public String trimTrailingFrom(CharSequence sequence) { 1198 return sequence.toString(); 1199 } 1200 1201 @Override 1202 public int countIn(CharSequence sequence) { 1203 checkNotNull(sequence); 1204 return 0; 1205 } 1206 1207 @Override 1208 public CharMatcher and(CharMatcher other) { 1209 checkNotNull(other); 1210 return this; 1211 } 1212 1213 @Override 1214 public CharMatcher or(CharMatcher other) { 1215 return checkNotNull(other); 1216 } 1217 1218 @Override 1219 public CharMatcher negate() { 1220 return any(); 1221 } 1222 } 1223 1224 /** Implementation of {@link #whitespace()}. */ 1225 @VisibleForTesting 1226 static final class Whitespace extends NamedFastMatcher { 1227 1228 static final String TABLE = 1229 "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000" 1230 + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680" 1231 + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009" 1232 + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000"; 1233 static final int MULTIPLIER = 1682554634; 1234 static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1); 1235 1236 static final Whitespace INSTANCE = new Whitespace(); 1237 1238 Whitespace() { 1239 super("CharMatcher.whitespace()"); 1240 } 1241 1242 @Override 1243 public boolean matches(char c) { 1244 return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c; 1245 } 1246 1247 @GwtIncompatible("java.util.BitSet") 1248 @Override 1249 void setBits(BitSet table) { 1250 for (int i = 0; i < TABLE.length(); i++) { 1251 table.set(TABLE.charAt(i)); 1252 } 1253 } 1254 } 1255 1256 /** Implementation of {@link #breakingWhitespace()}. */ 1257 private static final class BreakingWhitespace extends CharMatcher { 1258 1259 static final CharMatcher INSTANCE = new BreakingWhitespace(); 1260 1261 @Override 1262 public boolean matches(char c) { 1263 switch (c) { 1264 case '\t': 1265 case '\n': 1266 case '\013': 1267 case '\f': 1268 case '\r': 1269 case ' ': 1270 case '\u0085': 1271 case '\u1680': 1272 case '\u2028': 1273 case '\u2029': 1274 case '\u205f': 1275 case '\u3000': 1276 return true; 1277 case '\u2007': 1278 return false; 1279 default: 1280 return c >= '\u2000' && c <= '\u200a'; 1281 } 1282 } 1283 1284 @Override 1285 public String toString() { 1286 return "CharMatcher.breakingWhitespace()"; 1287 } 1288 } 1289 1290 /** Implementation of {@link #ascii()}. */ 1291 private static final class Ascii extends NamedFastMatcher { 1292 1293 static final Ascii INSTANCE = new Ascii(); 1294 1295 Ascii() { 1296 super("CharMatcher.ascii()"); 1297 } 1298 1299 @Override 1300 public boolean matches(char c) { 1301 return c <= '\u007f'; 1302 } 1303 } 1304 1305 /** Implementation that matches characters that fall within multiple ranges. */ 1306 private static class RangesMatcher extends CharMatcher { 1307 1308 private final String description; 1309 private final char[] rangeStarts; 1310 private final char[] rangeEnds; 1311 1312 RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { 1313 this.description = description; 1314 this.rangeStarts = rangeStarts; 1315 this.rangeEnds = rangeEnds; 1316 checkArgument(rangeStarts.length == rangeEnds.length); 1317 for (int i = 0; i < rangeStarts.length; i++) { 1318 checkArgument(rangeStarts[i] <= rangeEnds[i]); 1319 if (i + 1 < rangeStarts.length) { 1320 checkArgument(rangeEnds[i] < rangeStarts[i + 1]); 1321 } 1322 } 1323 } 1324 1325 @Override 1326 public boolean matches(char c) { 1327 int index = Arrays.binarySearch(rangeStarts, c); 1328 if (index >= 0) { 1329 return true; 1330 } else { 1331 index = ~index - 1; 1332 return index >= 0 && c <= rangeEnds[index]; 1333 } 1334 } 1335 1336 @Override 1337 public String toString() { 1338 return description; 1339 } 1340 } 1341 1342 /** Implementation of {@link #digit()}. */ 1343 private static final class Digit extends RangesMatcher { 1344 1345 // Must be in ascending order. 1346 private static final String ZEROES = 1347 "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66" 1348 + "\u0be6\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810" 1349 + "\u1946\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; 1350 1351 private static char[] zeroes() { 1352 return ZEROES.toCharArray(); 1353 } 1354 1355 private static char[] nines() { 1356 char[] nines = new char[ZEROES.length()]; 1357 for (int i = 0; i < ZEROES.length(); i++) { 1358 nines[i] = (char) (ZEROES.charAt(i) + 9); 1359 } 1360 return nines; 1361 } 1362 1363 static final Digit INSTANCE = new Digit(); 1364 1365 private Digit() { 1366 super("CharMatcher.digit()", zeroes(), nines()); 1367 } 1368 } 1369 1370 /** Implementation of {@link #javaDigit()}. */ 1371 private static final class JavaDigit extends CharMatcher { 1372 1373 static final JavaDigit INSTANCE = new JavaDigit(); 1374 1375 @Override 1376 public boolean matches(char c) { 1377 return Character.isDigit(c); 1378 } 1379 1380 @Override 1381 public String toString() { 1382 return "CharMatcher.javaDigit()"; 1383 } 1384 } 1385 1386 /** Implementation of {@link #javaLetter()}. */ 1387 private static final class JavaLetter extends CharMatcher { 1388 1389 static final JavaLetter INSTANCE = new JavaLetter(); 1390 1391 @Override 1392 public boolean matches(char c) { 1393 return Character.isLetter(c); 1394 } 1395 1396 @Override 1397 public String toString() { 1398 return "CharMatcher.javaLetter()"; 1399 } 1400 } 1401 1402 /** Implementation of {@link #javaLetterOrDigit()}. */ 1403 private static final class JavaLetterOrDigit extends CharMatcher { 1404 1405 static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit(); 1406 1407 @Override 1408 public boolean matches(char c) { 1409 return Character.isLetterOrDigit(c); 1410 } 1411 1412 @Override 1413 public String toString() { 1414 return "CharMatcher.javaLetterOrDigit()"; 1415 } 1416 } 1417 1418 /** Implementation of {@link #javaUpperCase()}. */ 1419 private static final class JavaUpperCase extends CharMatcher { 1420 1421 static final JavaUpperCase INSTANCE = new JavaUpperCase(); 1422 1423 @Override 1424 public boolean matches(char c) { 1425 return Character.isUpperCase(c); 1426 } 1427 1428 @Override 1429 public String toString() { 1430 return "CharMatcher.javaUpperCase()"; 1431 } 1432 } 1433 1434 /** Implementation of {@link #javaLowerCase()}. */ 1435 private static final class JavaLowerCase extends CharMatcher { 1436 1437 static final JavaLowerCase INSTANCE = new JavaLowerCase(); 1438 1439 @Override 1440 public boolean matches(char c) { 1441 return Character.isLowerCase(c); 1442 } 1443 1444 @Override 1445 public String toString() { 1446 return "CharMatcher.javaLowerCase()"; 1447 } 1448 } 1449 1450 /** Implementation of {@link #javaIsoControl()}. */ 1451 private static final class JavaIsoControl extends NamedFastMatcher { 1452 1453 static final JavaIsoControl INSTANCE = new JavaIsoControl(); 1454 1455 private JavaIsoControl() { 1456 super("CharMatcher.javaIsoControl()"); 1457 } 1458 1459 @Override 1460 public boolean matches(char c) { 1461 return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f'); 1462 } 1463 } 1464 1465 /** Implementation of {@link #invisible()}. */ 1466 private static final class Invisible extends RangesMatcher { 1467 1468 private static final String RANGE_STARTS = 1469 "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067" 1470 + "\u2068\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa"; 1471 private static final String RANGE_ENDS = 1472 "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067" 1473 + "\u2068\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb"; 1474 1475 static final Invisible INSTANCE = new Invisible(); 1476 1477 private Invisible() { 1478 super( 1479 "CharMatcher.invisible()", 1480 RANGE_STARTS.toCharArray(), 1481 RANGE_ENDS.toCharArray()); 1482 } 1483 } 1484 1485 /** Implementation of {@link #singleWidth()}. */ 1486 private static final class SingleWidth extends RangesMatcher { 1487 1488 static final SingleWidth INSTANCE = new SingleWidth(); 1489 1490 private SingleWidth() { 1491 super( 1492 "CharMatcher.singleWidth()", 1493 "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), 1494 "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); 1495 } 1496 } 1497 1498 // Non-static factory implementation classes 1499 1500 /** Implementation of {@link #negate()}. */ 1501 private static class Negated extends CharMatcher { 1502 1503 final CharMatcher original; 1504 1505 Negated(CharMatcher original) { 1506 this.original = checkNotNull(original); 1507 } 1508 1509 @Override 1510 public boolean matches(char c) { 1511 return !original.matches(c); 1512 } 1513 1514 @Override 1515 public boolean matchesAllOf(CharSequence sequence) { 1516 return original.matchesNoneOf(sequence); 1517 } 1518 1519 @Override 1520 public boolean matchesNoneOf(CharSequence sequence) { 1521 return original.matchesAllOf(sequence); 1522 } 1523 1524 @Override 1525 public int countIn(CharSequence sequence) { 1526 return sequence.length() - original.countIn(sequence); 1527 } 1528 1529 @GwtIncompatible("java.util.BitSet") 1530 @Override 1531 void setBits(BitSet table) { 1532 BitSet tmp = new BitSet(); 1533 original.setBits(tmp); 1534 tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 1535 table.or(tmp); 1536 } 1537 1538 @Override 1539 public CharMatcher negate() { 1540 return original; 1541 } 1542 1543 @Override 1544 public String toString() { 1545 return original + ".negate()"; 1546 } 1547 } 1548 1549 /** Implementation of {@link #and(CharMatcher)}. */ 1550 private static final class And extends CharMatcher { 1551 1552 final CharMatcher first; 1553 final CharMatcher second; 1554 1555 And(CharMatcher a, CharMatcher b) { 1556 first = checkNotNull(a); 1557 second = checkNotNull(b); 1558 } 1559 1560 @Override 1561 public boolean matches(char c) { 1562 return first.matches(c) && second.matches(c); 1563 } 1564 1565 @GwtIncompatible("java.util.BitSet") 1566 @Override 1567 void setBits(BitSet table) { 1568 BitSet tmp1 = new BitSet(); 1569 first.setBits(tmp1); 1570 BitSet tmp2 = new BitSet(); 1571 second.setBits(tmp2); 1572 tmp1.and(tmp2); 1573 table.or(tmp1); 1574 } 1575 1576 @Override 1577 public String toString() { 1578 return "CharMatcher.and(" + first + ", " + second + ")"; 1579 } 1580 } 1581 1582 /** Implementation of {@link #or(CharMatcher)}. */ 1583 private static final class Or extends CharMatcher { 1584 1585 final CharMatcher first; 1586 final CharMatcher second; 1587 1588 Or(CharMatcher a, CharMatcher b) { 1589 first = checkNotNull(a); 1590 second = checkNotNull(b); 1591 } 1592 1593 @GwtIncompatible("java.util.BitSet") 1594 @Override 1595 void setBits(BitSet table) { 1596 first.setBits(table); 1597 second.setBits(table); 1598 } 1599 1600 @Override 1601 public boolean matches(char c) { 1602 return first.matches(c) || second.matches(c); 1603 } 1604 1605 @Override 1606 public String toString() { 1607 return "CharMatcher.or(" + first + ", " + second + ")"; 1608 } 1609 } 1610 1611 // Static factory implementations 1612 1613 /** Implementation of {@link #is(char)}. */ 1614 private static final class Is extends FastMatcher { 1615 1616 private final char match; 1617 1618 Is(char match) { 1619 this.match = match; 1620 } 1621 1622 @Override 1623 public boolean matches(char c) { 1624 return c == match; 1625 } 1626 1627 @Override 1628 public String replaceFrom(CharSequence sequence, char replacement) { 1629 return sequence.toString().replace(match, replacement); 1630 } 1631 1632 @Override 1633 public CharMatcher and(CharMatcher other) { 1634 return other.matches(match) ? this : none(); 1635 } 1636 1637 @Override 1638 public CharMatcher or(CharMatcher other) { 1639 return other.matches(match) ? other : super.or(other); 1640 } 1641 1642 @Override 1643 public CharMatcher negate() { 1644 return isNot(match); 1645 } 1646 1647 @GwtIncompatible("java.util.BitSet") 1648 @Override 1649 void setBits(BitSet table) { 1650 table.set(match); 1651 } 1652 1653 @Override 1654 public String toString() { 1655 return "CharMatcher.is('" + showCharacter(match) + "')"; 1656 } 1657 } 1658 1659 /** Implementation of {@link #isNot(char)}. */ 1660 private static final class IsNot extends FastMatcher { 1661 1662 private final char match; 1663 1664 IsNot(char match) { 1665 this.match = match; 1666 } 1667 1668 @Override 1669 public boolean matches(char c) { 1670 return c != match; 1671 } 1672 1673 @Override 1674 public CharMatcher and(CharMatcher other) { 1675 return other.matches(match) ? super.and(other) : other; 1676 } 1677 1678 @Override 1679 public CharMatcher or(CharMatcher other) { 1680 return other.matches(match) ? any() : this; 1681 } 1682 1683 @GwtIncompatible("java.util.BitSet") 1684 @Override 1685 void setBits(BitSet table) { 1686 table.set(0, match); 1687 table.set(match + 1, Character.MAX_VALUE + 1); 1688 } 1689 1690 @Override 1691 public CharMatcher negate() { 1692 return is(match); 1693 } 1694 1695 @Override 1696 public String toString() { 1697 return "CharMatcher.isNot('" + showCharacter(match) + "')"; 1698 } 1699 } 1700 1701 private static CharMatcher.IsEither isEither(char c1, char c2) { 1702 return new CharMatcher.IsEither(c1, c2); 1703 } 1704 1705 /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */ 1706 private static final class IsEither extends FastMatcher { 1707 1708 private final char match1; 1709 private final char match2; 1710 1711 IsEither(char match1, char match2) { 1712 this.match1 = match1; 1713 this.match2 = match2; 1714 } 1715 1716 @Override 1717 public boolean matches(char c) { 1718 return c == match1 || c == match2; 1719 } 1720 1721 @GwtIncompatible("java.util.BitSet") 1722 @Override 1723 void setBits(BitSet table) { 1724 table.set(match1); 1725 table.set(match2); 1726 } 1727 1728 @Override 1729 public String toString() { 1730 return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")"; 1731 } 1732 } 1733 1734 /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */ 1735 private static final class AnyOf extends CharMatcher { 1736 1737 private final char[] chars; 1738 1739 public AnyOf(CharSequence chars) { 1740 this.chars = chars.toString().toCharArray(); 1741 Arrays.sort(this.chars); 1742 } 1743 1744 @Override 1745 public boolean matches(char c) { 1746 return Arrays.binarySearch(chars, c) >= 0; 1747 } 1748 1749 @Override 1750 @GwtIncompatible("java.util.BitSet") 1751 void setBits(BitSet table) { 1752 for (char c : chars) { 1753 table.set(c); 1754 } 1755 } 1756 1757 @Override 1758 public String toString() { 1759 StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); 1760 for (char c : chars) { 1761 description.append(showCharacter(c)); 1762 } 1763 description.append("\")"); 1764 return description.toString(); 1765 } 1766 } 1767 1768 /** Implementation of {@link #inRange(char, char)}. */ 1769 private static final class InRange extends FastMatcher { 1770 1771 private final char startInclusive; 1772 private final char endInclusive; 1773 1774 InRange(char startInclusive, char endInclusive) { 1775 checkArgument(endInclusive >= startInclusive); 1776 this.startInclusive = startInclusive; 1777 this.endInclusive = endInclusive; 1778 } 1779 1780 @Override 1781 public boolean matches(char c) { 1782 return startInclusive <= c && c <= endInclusive; 1783 } 1784 1785 @GwtIncompatible("java.util.BitSet") 1786 @Override 1787 void setBits(BitSet table) { 1788 table.set(startInclusive, endInclusive + 1); 1789 } 1790 1791 @Override 1792 public String toString() { 1793 return "CharMatcher.inRange('" 1794 + showCharacter(startInclusive) 1795 + "', '" 1796 + showCharacter(endInclusive) 1797 + "')"; 1798 } 1799 } 1800 1801 /** Implementation of {@link #forPredicate(Predicate)}. */ 1802 private static final class ForPredicate extends CharMatcher { 1803 1804 private final Predicate<? super Character> predicate; 1805 1806 ForPredicate(Predicate<? super Character> predicate) { 1807 this.predicate = checkNotNull(predicate); 1808 } 1809 1810 @Override 1811 public boolean matches(char c) { 1812 return predicate.apply(c); 1813 } 1814 1815 @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily 1816 @Override 1817 public boolean apply(Character character) { 1818 return predicate.apply(checkNotNull(character)); 1819 } 1820 1821 @Override 1822 public String toString() { 1823 return "CharMatcher.forPredicate(" + predicate + ")"; 1824 } 1825 } 1826}