001    /*
002     * Copyright (C) 2008 Google Inc.
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005     * in compliance with the License. You may obtain a copy of the License at
006     *
007     * http://www.apache.org/licenses/LICENSE-2.0
008     *
009     * Unless required by applicable law or agreed to in writing, software distributed under the
010     * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
011     * express or implied. See the License for the specific language governing permissions and
012     * limitations under the License.
013     */
014    
015    package com.google.common.base;
016    
017    import static com.google.common.base.Preconditions.checkArgument;
018    import static com.google.common.base.Preconditions.checkNotNull;
019    
020    import com.google.common.annotations.Beta;
021    import com.google.common.annotations.GwtCompatible;
022    
023    import java.util.ArrayList;
024    import java.util.Arrays;
025    import java.util.List;
026    
027    /**
028     * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029     * for any {@link Object}. Also offers basic text processing methods based on this function.
030     * Implementations are strongly encouraged to be side-effect-free and immutable.
031     *
032     * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033     * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
034     *
035     * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
036     * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
037     * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
038     * treats these just as two separate characters.
039     *
040     * @author Kevin Bourrillion
041     * @since 1
042     */
043    // TODO: release as "stable" after changing from chars to code points, and
044    // deciding whether constants should change to methods
045    @Beta
046    @GwtCompatible
047    public abstract class CharMatcher implements Predicate<Character> {
048      // Constants
049    
050      // Excludes 2000-2000a, which is handled as a range
051      private static final String BREAKING_WHITESPACE_CHARS =
052          "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
053    
054      // Excludes 2007, which is handled as a gap in a pair of ranges
055      private static final String NON_BREAKING_WHITESPACE_CHARS =
056          "\u00a0\u180e\u202f";
057    
058      /**
059       * Determines whether a character is whitespace according to the latest Unicode standard, as
060       * illustrated
061       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
062       * This is not the same definition used by other Java APIs. (See a
063       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
064       * definitions of "whitespace"</a>.)
065       *
066       * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
067       * to date.
068       */
069      public static final CharMatcher WHITESPACE =
070          anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
071              .or(inRange('\u2000', '\u200a'))
072              .precomputed();
073    
074      /**
075       * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
076       * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
077       * discussion of that term.
078       *
079       * @since 2
080       */
081      public static final CharMatcher BREAKING_WHITESPACE =
082          anyOf(BREAKING_WHITESPACE_CHARS)
083              .or(inRange('\u2000', '\u2006'))
084              .or(inRange('\u2008', '\u200a'))
085              .precomputed();
086    
087      /**
088       * Determines whether a character is ASCII, meaning that its code point is less than 128.
089       */
090      public static final CharMatcher ASCII = inRange('\0', '\u007f');
091    
092      /**
093       * Determines whether a character is a digit according to
094       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
095       */
096      public static final CharMatcher DIGIT;
097    
098      static {
099        CharMatcher digit = inRange('0', '9');
100        String zeroes =
101            "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
102                + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
103                + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
104        for (char base : zeroes.toCharArray()) {
105          digit = digit.or(inRange(base, (char) (base + 9)));
106        }
107        DIGIT = digit.precomputed();
108      }
109    
110      /**
111       * Determines whether a character is whitespace according to {@link Character#isWhitespace(char)
112       * Java's definition}; it is usually preferable to use {@link #WHITESPACE}. (See a
113       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
114       * definitions of "whitespace"</a>.)
115       */
116      public static final CharMatcher JAVA_WHITESPACE =
117          inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
118              .or(inRange('\u001c', '\u0020'))
119              .or(is('\u1680'))
120              .or(is('\u180e'))
121              .or(inRange('\u2000', '\u2006'))
122              .or(inRange('\u2008', '\u200b'))
123              .or(inRange('\u2028', '\u2029'))
124              .or(is('\u205f'))
125              .or(is('\u3000'))
126              .precomputed();
127    
128      /**
129       * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
130       * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
131       */
132      public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
133        @Override public boolean matches(char c) {
134          return Character.isDigit(c);
135        }
136      };
137    
138      /**
139       * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
140       * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
141       * inRange('a', 'z').or(inRange('A', 'Z'))}.
142       */
143      public static final CharMatcher JAVA_LETTER = new CharMatcher() {
144        @Override public boolean matches(char c) {
145          return Character.isLetter(c);
146        }
147      };
148    
149      /**
150       * Determines whether a character is a letter or digit according to {@link
151       * Character#isLetterOrDigit(char) Java's definition}.
152       */
153      public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
154        @Override public boolean matches(char c) {
155          return Character.isLetterOrDigit(c);
156        }
157      };
158    
159      /**
160       * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
161       * Java's definition}.
162       */
163      public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
164        @Override public boolean matches(char c) {
165          return Character.isUpperCase(c);
166        }
167      };
168    
169      /**
170       * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
171       * Java's definition}.
172       */
173      public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
174        @Override public boolean matches(char c) {
175          return Character.isLowerCase(c);
176        }
177      };
178    
179      /**
180       * Determines whether a character is an ISO control character as specified by {@link
181       * Character#isISOControl(char)}.
182       */
183      public static final CharMatcher JAVA_ISO_CONTROL =
184          inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
185    
186      /**
187       * Determines whether a character is invisible; that is, if its Unicode category is any of
188       * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
189       * PRIVATE_USE according to ICU4J.
190       */
191      public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
192          .or(inRange('\u007f', '\u00a0'))
193          .or(is('\u00ad'))
194          .or(inRange('\u0600', '\u0603'))
195          .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
196          .or(inRange('\u2000', '\u200f'))
197          .or(inRange('\u2028', '\u202f'))
198          .or(inRange('\u205f', '\u2064'))
199          .or(inRange('\u206a', '\u206f'))
200          .or(is('\u3000'))
201          .or(inRange('\ud800', '\uf8ff'))
202          .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
203          .precomputed();
204    
205      /**
206       * Determines whether a character is single-width (not double-width). When in doubt, this matcher
207       * errs on the side of returning {@code false} (that is, it tends to assume a character is
208       * double-width).
209       *
210       * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
211       * date.
212       */
213      public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
214          .or(is('\u05be'))
215          .or(inRange('\u05d0', '\u05ea'))
216          .or(is('\u05f3'))
217          .or(is('\u05f4'))
218          .or(inRange('\u0600', '\u06ff'))
219          .or(inRange('\u0750', '\u077f'))
220          .or(inRange('\u0e00', '\u0e7f'))
221          .or(inRange('\u1e00', '\u20af'))
222          .or(inRange('\u2100', '\u213a'))
223          .or(inRange('\ufb50', '\ufdff'))
224          .or(inRange('\ufe70', '\ufeff'))
225          .or(inRange('\uff61', '\uffdc'))
226          .precomputed();
227    
228      /** Matches any character. */
229      public static final CharMatcher ANY =
230          new CharMatcher() {
231            @Override public boolean matches(char c) {
232              return true;
233            }
234    
235            @Override public int indexIn(CharSequence sequence) {
236              return (sequence.length() == 0) ? -1 : 0;
237            }
238    
239            @Override public int indexIn(CharSequence sequence, int start) {
240              int length = sequence.length();
241              Preconditions.checkPositionIndex(start, length);
242              return (start == length) ? -1 : start;
243            }
244    
245            @Override public int lastIndexIn(CharSequence sequence) {
246              return sequence.length() - 1;
247            }
248    
249            @Override public boolean matchesAllOf(CharSequence sequence) {
250              checkNotNull(sequence);
251              return true;
252            }
253    
254            @Override public boolean matchesNoneOf(CharSequence sequence) {
255              return sequence.length() == 0;
256            }
257    
258            @Override public String removeFrom(CharSequence sequence) {
259              checkNotNull(sequence);
260              return "";
261            }
262    
263            @Override public String replaceFrom(CharSequence sequence, char replacement) {
264              char[] array = new char[sequence.length()];
265              Arrays.fill(array, replacement);
266              return new String(array);
267            }
268    
269            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
270              StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
271              for (int i = 0; i < sequence.length(); i++) {
272                retval.append(replacement);
273              }
274              return retval.toString();
275            }
276    
277            @Override public String collapseFrom(CharSequence sequence, char replacement) {
278              return (sequence.length() == 0) ? "" : String.valueOf(replacement);
279            }
280    
281            @Override public String trimFrom(CharSequence sequence) {
282              checkNotNull(sequence);
283              return "";
284            }
285    
286            @Override public int countIn(CharSequence sequence) {
287              return sequence.length();
288            }
289    
290            @Override public CharMatcher and(CharMatcher other) {
291              return checkNotNull(other);
292            }
293    
294            @Override public CharMatcher or(CharMatcher other) {
295              checkNotNull(other);
296              return this;
297            }
298    
299            @Override public CharMatcher negate() {
300              return NONE;
301            }
302    
303            @Override public CharMatcher precomputed() {
304              return this;
305            }
306          };
307    
308      /** Matches no characters. */
309      public static final CharMatcher NONE =
310          new CharMatcher() {
311            @Override public boolean matches(char c) {
312              return false;
313            }
314    
315            @Override public int indexIn(CharSequence sequence) {
316              checkNotNull(sequence);
317              return -1;
318            }
319    
320            @Override public int indexIn(CharSequence sequence, int start) {
321              int length = sequence.length();
322              Preconditions.checkPositionIndex(start, length);
323              return -1;
324            }
325    
326            @Override public int lastIndexIn(CharSequence sequence) {
327              checkNotNull(sequence);
328              return -1;
329            }
330    
331            @Override public boolean matchesAllOf(CharSequence sequence) {
332              return sequence.length() == 0;
333            }
334    
335            @Override public boolean matchesNoneOf(CharSequence sequence) {
336              checkNotNull(sequence);
337              return true;
338            }
339    
340            @Override public String removeFrom(CharSequence sequence) {
341              return sequence.toString();
342            }
343    
344            @Override public String replaceFrom(CharSequence sequence, char replacement) {
345              return sequence.toString();
346            }
347    
348            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
349              checkNotNull(replacement);
350              return sequence.toString();
351            }
352    
353            @Override public String collapseFrom(CharSequence sequence, char replacement) {
354              return sequence.toString();
355            }
356    
357            @Override public String trimFrom(CharSequence sequence) {
358              return sequence.toString();
359            }
360    
361            @Override public int countIn(CharSequence sequence) {
362              checkNotNull(sequence);
363              return 0;
364            }
365    
366            @Override public CharMatcher and(CharMatcher other) {
367              checkNotNull(other);
368              return this;
369            }
370    
371            @Override public CharMatcher or(CharMatcher other) {
372              return checkNotNull(other);
373            }
374    
375            @Override public CharMatcher negate() {
376              return ANY;
377            }
378    
379            @Override void setBits(LookupTable table) {}
380    
381            @Override public CharMatcher precomputed() {
382              return this;
383            }
384          };
385    
386      // Static factories
387    
388      /**
389       * Returns a {@code char} matcher that matches only one specified character.
390       */
391      public static CharMatcher is(final char match) {
392        return new CharMatcher() {
393          @Override public boolean matches(char c) {
394            return c == match;
395          }
396    
397          @Override public String replaceFrom(CharSequence sequence, char replacement) {
398            return sequence.toString().replace(match, replacement);
399          }
400    
401          @Override public CharMatcher and(CharMatcher other) {
402            return other.matches(match) ? this : NONE;
403          }
404    
405          @Override public CharMatcher or(CharMatcher other) {
406            return other.matches(match) ? other : super.or(other);
407          }
408    
409          @Override public CharMatcher negate() {
410            return isNot(match);
411          }
412    
413          @Override void setBits(LookupTable table) {
414            table.set(match);
415          }
416    
417          @Override public CharMatcher precomputed() {
418            return this;
419          }
420        };
421      }
422    
423      /**
424       * Returns a {@code char} matcher that matches any character except the one specified.
425       *
426       * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
427       */
428      public static CharMatcher isNot(final char match) {
429        return new CharMatcher() {
430          @Override public boolean matches(char c) {
431            return c != match;
432          }
433    
434          @Override public CharMatcher and(CharMatcher other) {
435            return other.matches(match) ? super.and(other) : other;
436          }
437    
438          @Override public CharMatcher or(CharMatcher other) {
439            return other.matches(match) ? ANY : this;
440          }
441    
442          @Override public CharMatcher negate() {
443            return is(match);
444          }
445        };
446      }
447    
448      /**
449       * Returns a {@code char} matcher that matches any character present in the given character
450       * sequence.
451       */
452      public static CharMatcher anyOf(final CharSequence sequence) {
453        switch (sequence.length()) {
454          case 0:
455            return NONE;
456          case 1:
457            return is(sequence.charAt(0));
458          case 2:
459            final char match1 = sequence.charAt(0);
460            final char match2 = sequence.charAt(1);
461            return new CharMatcher() {
462              @Override public boolean matches(char c) {
463                return c == match1 || c == match2;
464              }
465    
466              @Override void setBits(LookupTable table) {
467                table.set(match1);
468                table.set(match2);
469              }
470    
471              @Override public CharMatcher precomputed() {
472                return this;
473              }
474            };
475        }
476    
477        final char[] chars = sequence.toString().toCharArray();
478        Arrays.sort(chars); // not worth collapsing duplicates
479    
480        return new CharMatcher() {
481          @Override public boolean matches(char c) {
482            return Arrays.binarySearch(chars, c) >= 0;
483          }
484    
485          @Override void setBits(LookupTable table) {
486            for (char c : chars) {
487              table.set(c);
488            }
489          }
490        };
491      }
492    
493      /**
494       * Returns a {@code char} matcher that matches any character not present in the given character
495       * sequence.
496       */
497      public static CharMatcher noneOf(CharSequence sequence) {
498        return anyOf(sequence).negate();
499      }
500    
501      /**
502       * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
503       * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
504       * CharMatcher.inRange('a', 'z')}.
505       *
506       * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
507       */
508      public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
509        checkArgument(endInclusive >= startInclusive);
510        return new CharMatcher() {
511          @Override public boolean matches(char c) {
512            return startInclusive <= c && c <= endInclusive;
513          }
514    
515          @Override void setBits(LookupTable table) {
516            char c = startInclusive;
517            while (true) {
518              table.set(c);
519              if (c++ == endInclusive) {
520                break;
521              }
522            }
523          }
524    
525          @Override public CharMatcher precomputed() {
526            return this;
527          }
528        };
529      }
530    
531      /**
532       * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
533       * which operates on primitive {@code char} instances instead.
534       */
535      public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
536        checkNotNull(predicate);
537        if (predicate instanceof CharMatcher) {
538          return (CharMatcher) predicate;
539        }
540        return new CharMatcher() {
541          @Override public boolean matches(char c) {
542            return predicate.apply(c);
543          }
544    
545          @Override public boolean apply(Character character) {
546            return predicate.apply(checkNotNull(character));
547          }
548        };
549      }
550    
551      // Abstract methods
552    
553      /** Determines a true or false value for the given character. */
554      public abstract boolean matches(char c);
555    
556      // Non-static factories
557    
558      /**
559       * Returns a matcher that matches any character not matched by this matcher.
560       */
561      public CharMatcher negate() {
562        final CharMatcher original = this;
563        return new CharMatcher() {
564          @Override public boolean matches(char c) {
565            return !original.matches(c);
566          }
567    
568          @Override public boolean matchesAllOf(CharSequence sequence) {
569            return original.matchesNoneOf(sequence);
570          }
571    
572          @Override public boolean matchesNoneOf(CharSequence sequence) {
573            return original.matchesAllOf(sequence);
574          }
575    
576          @Override public int countIn(CharSequence sequence) {
577            return sequence.length() - original.countIn(sequence);
578          }
579    
580          @Override public CharMatcher negate() {
581            return original;
582          }
583        };
584      }
585    
586      /**
587       * Returns a matcher that matches any character matched by both this matcher and {@code other}.
588       */
589      public CharMatcher and(CharMatcher other) {
590        return new And(Arrays.asList(this, checkNotNull(other)));
591      }
592    
593      private static class And extends CharMatcher {
594        List<CharMatcher> components;
595    
596        And(List<CharMatcher> components) {
597          this.components = components; // Skip defensive copy (private)
598        }
599    
600        @Override public boolean matches(char c) {
601          for (CharMatcher matcher : components) {
602            if (!matcher.matches(c)) {
603              return false;
604            }
605          }
606          return true;
607        }
608    
609        @Override public CharMatcher and(CharMatcher other) {
610          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
611          newComponents.add(checkNotNull(other));
612          return new And(newComponents);
613        }
614      }
615    
616      /**
617       * Returns a matcher that matches any character matched by either this matcher or {@code other}.
618       */
619      public CharMatcher or(CharMatcher other) {
620        return new Or(Arrays.asList(this, checkNotNull(other)));
621      }
622    
623      private static class Or extends CharMatcher {
624        List<CharMatcher> components;
625    
626        Or(List<CharMatcher> components) {
627          this.components = components; // Skip defensive copy (private)
628        }
629    
630        @Override public boolean matches(char c) {
631          for (CharMatcher matcher : components) {
632            if (matcher.matches(c)) {
633              return true;
634            }
635          }
636          return false;
637        }
638    
639        @Override public CharMatcher or(CharMatcher other) {
640          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
641          newComponents.add(checkNotNull(other));
642          return new Or(newComponents);
643        }
644    
645        @Override void setBits(LookupTable table) {
646          for (CharMatcher matcher : components) {
647            matcher.setBits(table);
648          }
649        }
650      }
651    
652      /**
653       * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
654       * query than the original; your mileage may vary. Precomputation takes time and is likely to be
655       * worthwhile only if the precomputed matcher is queried many thousands of times.
656       *
657       * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
658       * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
659       * worthwhile tradeoff in a browser.
660       */
661      public CharMatcher precomputed() {
662        return Platform.precomputeCharMatcher(this);
663      }
664    
665      /**
666       * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
667       * on {@link Platform} so that we can have different behavior in GWT.
668       *
669       * <p>The default precomputation is to cache the configuration of the original matcher in an
670       * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
671       * than the original.
672       *
673       * <p>The default implementation creates a new bit array and passes it to {@link
674       * #setBits(LookupTable)}.
675       */
676      CharMatcher precomputedInternal() {
677        final LookupTable table = new LookupTable();
678        setBits(table);
679    
680        return new CharMatcher() {
681          @Override public boolean matches(char c) {
682            return table.get(c);
683          }
684    
685          // TODO: make methods like negate() smart
686    
687          @Override public CharMatcher precomputed() {
688            return this;
689          }
690        };
691      }
692    
693      /**
694       * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
695       * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
696       *
697       * <p>The default implementation loops over every possible character value, invoking {@link
698       * #matches} for each one.
699       */
700      void setBits(LookupTable table) {
701        char c = Character.MIN_VALUE;
702        while (true) {
703          if (matches(c)) {
704            table.set(c);
705          }
706          if (c++ == Character.MAX_VALUE) {
707            break;
708          }
709        }
710      }
711    
712      /**
713       * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
714       *
715       * <p>TODO: possibly share a common BitArray class with BloomFilter and others... a simpler
716       * java.util.BitSet.
717       */
718      private static final class LookupTable {
719        int[] data = new int[2048];
720    
721        void set(char index) {
722          data[index >> 5] |= (1 << index);
723        }
724    
725        boolean get(char index) {
726          return (data[index >> 5] & (1 << index)) != 0;
727        }
728      }
729    
730      // Text processing routines
731    
732      /**
733       * Returns {@code true} if a character sequence contains only matching characters.
734       *
735       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
736       * character, until this returns {@code false} or the end is reached.
737       *
738       * @param sequence the character sequence to examine, possibly empty
739       * @return {@code true} if this matcher matches every character in the sequence, including when
740       *         the sequence is empty
741       */
742      public boolean matchesAllOf(CharSequence sequence) {
743        for (int i = sequence.length() - 1; i >= 0; i--) {
744          if (!matches(sequence.charAt(i))) {
745            return false;
746          }
747        }
748        return true;
749      }
750    
751      /**
752       * Returns {@code true} if a character sequence contains no matching characters.
753       *
754       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
755       * character, until this returns {@code false} or the end is reached.
756       *
757       * @param sequence the character sequence to examine, possibly empty
758       * @return {@code true} if this matcher matches every character in the sequence, including when
759       *         the sequence is empty
760       */
761      public boolean matchesNoneOf(CharSequence sequence) {
762        return indexIn(sequence) == -1;
763      }
764    
765      // TODO: perhaps add matchesAnyOf()
766    
767      /**
768       * Returns the index of the first matching character in a character sequence, or {@code -1} if no
769       * matching character is present.
770       *
771       * <p>The default implementation iterates over the sequence in forward order calling {@link
772       * #matches} for each character.
773       *
774       * @param sequence the character sequence to examine from the beginning
775       * @return an index, or {@code -1} if no character matches
776       */
777      public int indexIn(CharSequence sequence) {
778        int length = sequence.length();
779        for (int i = 0; i < length; i++) {
780          if (matches(sequence.charAt(i))) {
781            return i;
782          }
783        }
784        return -1;
785      }
786    
787      /**
788       * Returns the index of the first matching character in a character sequence, starting from a
789       * given position, or {@code -1} if no character matches after that position.
790       *
791       * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
792       * start}, calling {@link #matches} for each character.
793       *
794       * @param sequence the character sequence to examine
795       * @param start the first index to examine; must be nonnegative and no greater than {@code
796       *        sequence.length()}
797       * @return the index of the first matching character, guaranteed to be no less than {@code start},
798       *         or {@code -1} if no character matches
799       * @throws IndexOutOfBoundsException if start is negative or greater than {@code
800       *         sequence.length()}
801       */
802      public int indexIn(CharSequence sequence, int start) {
803        int length = sequence.length();
804        Preconditions.checkPositionIndex(start, length);
805        for (int i = start; i < length; i++) {
806          if (matches(sequence.charAt(i))) {
807            return i;
808          }
809        }
810        return -1;
811      }
812    
813      /**
814       * Returns the index of the last matching character in a character sequence, or {@code -1} if no
815       * matching character is present.
816       *
817       * <p>The default implementation iterates over the sequence in reverse order calling {@link
818       * #matches} for each character.
819       *
820       * @param sequence the character sequence to examine from the end
821       * @return an index, or {@code -1} if no character matches
822       */
823      public int lastIndexIn(CharSequence sequence) {
824        for (int i = sequence.length() - 1; i >= 0; i--) {
825          if (matches(sequence.charAt(i))) {
826            return i;
827          }
828        }
829        return -1;
830      }
831    
832      /**
833       * Returns the number of matching characters found in a character sequence.
834       */
835      public int countIn(CharSequence sequence) {
836        int count = 0;
837        for (int i = 0; i < sequence.length(); i++) {
838          if (matches(sequence.charAt(i))) {
839            count++;
840          }
841        }
842        return count;
843      }
844    
845      /**
846       * Returns a string containing all non-matching characters of a character sequence, in order. For
847       * example: <pre>   {@code
848       *
849       *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
850       *
851       * ... returns {@code "bzr"}.
852       */
853      public String removeFrom(CharSequence sequence) {
854        String string = sequence.toString();
855        int pos = indexIn(string);
856        if (pos == -1) {
857          return string;
858        }
859    
860        char[] chars = string.toCharArray();
861        int spread = 1;
862    
863        // This unusual loop comes from extensive benchmarking
864        OUT: while (true) {
865          pos++;
866          while (true) {
867            if (pos == chars.length) {
868              break OUT;
869            }
870            if (matches(chars[pos])) {
871              break;
872            }
873            chars[pos - spread] = chars[pos];
874            pos++;
875          }
876          spread++;
877        }
878        return new String(chars, 0, pos - spread);
879      }
880    
881      /**
882       * Returns a string containing all matching characters of a character sequence, in order. For
883       * example: <pre>   {@code
884       *
885       *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
886       *
887       * ... returns {@code "aaa"}.
888       */
889      public String retainFrom(CharSequence sequence) {
890        return negate().removeFrom(sequence);
891      }
892    
893      /**
894       * Returns a string copy of the input character sequence, with each character that matches this
895       * matcher replaced by a given replacement character. For example: <pre>   {@code
896       *
897       *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
898       *
899       * ... returns {@code "rodor"}.
900       *
901       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
902       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
903       * character.
904       *
905       * @param sequence the character sequence to replace matching characters in
906       * @param replacement the character to append to the result string in place of each matching
907       *        character in {@code sequence}
908       * @return the new string
909       */
910      public String replaceFrom(CharSequence sequence, char replacement) {
911        String string = sequence.toString();
912        int pos = indexIn(string);
913        if (pos == -1) {
914          return string;
915        }
916        char[] chars = string.toCharArray();
917        chars[pos] = replacement;
918        for (int i = pos + 1; i < chars.length; i++) {
919          if (matches(chars[i])) {
920            chars[i] = replacement;
921          }
922        }
923        return new String(chars);
924      }
925    
926      /**
927       * Returns a string copy of the input character sequence, with each character that matches this
928       * matcher replaced by a given replacement sequence. For example: <pre>   {@code
929       *
930       *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
931       *
932       * ... returns {@code "yoohoo"}.
933       *
934       * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
935       * off calling {@link #replaceFrom(CharSequence, char)} directly.
936       *
937       * @param sequence the character sequence to replace matching characters in
938       * @param replacement the characters to append to the result string in place of each matching
939       *        character in {@code sequence}
940       * @return the new string
941       */
942      public String replaceFrom(CharSequence sequence, CharSequence replacement) {
943        int replacementLen = replacement.length();
944        if (replacementLen == 0) {
945          return removeFrom(sequence);
946        }
947        if (replacementLen == 1) {
948          return replaceFrom(sequence, replacement.charAt(0));
949        }
950    
951        String string = sequence.toString();
952        int pos = indexIn(string);
953        if (pos == -1) {
954          return string;
955        }
956    
957        int len = string.length();
958        StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
959    
960        int oldpos = 0;
961        do {
962          buf.append(string, oldpos, pos);
963          buf.append(replacement);
964          oldpos = pos + 1;
965          pos = indexIn(string, oldpos);
966        } while (pos != -1);
967    
968        buf.append(string, oldpos, len);
969        return buf.toString();
970      }
971    
972      /**
973       * Returns a substring of the input character sequence that omits all characters this matcher
974       * matches from the beginning and from the end of the string. For example: <pre>   {@code
975       *
976       *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
977       *
978       * ... returns {@code "cat"}.
979       *
980       * <p>Note that: <pre>   {@code
981       *
982       *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
983       *
984       * ... is equivalent to {@link String#trim()}.
985       */
986      public String trimFrom(CharSequence sequence) {
987        int len = sequence.length();
988        int first;
989        int last;
990    
991        for (first = 0; first < len; first++) {
992          if (!matches(sequence.charAt(first))) {
993            break;
994          }
995        }
996        for (last = len - 1; last > first; last--) {
997          if (!matches(sequence.charAt(last))) {
998            break;
999          }
1000        }
1001    
1002        return sequence.subSequence(first, last + 1).toString();
1003      }
1004    
1005      /**
1006       * Returns a substring of the input character sequence that omits all characters this matcher
1007       * matches from the beginning of the string. For example: <pre> {@code
1008       *
1009       *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1010       *
1011       * ... returns {@code "catbab"}.
1012       */
1013      public String trimLeadingFrom(CharSequence sequence) {
1014        int len = sequence.length();
1015        int first;
1016    
1017        for (first = 0; first < len; first++) {
1018          if (!matches(sequence.charAt(first))) {
1019            break;
1020          }
1021        }
1022    
1023        return sequence.subSequence(first, len).toString();
1024      }
1025    
1026      /**
1027       * Returns a substring of the input character sequence that omits all characters this matcher
1028       * matches from the end of the string. For example: <pre> {@code
1029       *
1030       *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1031       *
1032       * ... returns {@code "abacat"}.
1033       */
1034      public String trimTrailingFrom(CharSequence sequence) {
1035        int len = sequence.length();
1036        int last;
1037    
1038        for (last = len - 1; last >= 0; last--) {
1039          if (!matches(sequence.charAt(last))) {
1040            break;
1041          }
1042        }
1043    
1044        return sequence.subSequence(0, last + 1).toString();
1045      }
1046    
1047      /**
1048       * Returns a string copy of the input character sequence, with each group of consecutive
1049       * characters that match this matcher replaced by a single replacement character. For example:
1050       * <pre>   {@code
1051       *
1052       *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1053       *
1054       * ... returns {@code "b-p-r"}.
1055       *
1056       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1057       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1058       * character.
1059       *
1060       * @param sequence the character sequence to replace matching groups of characters in
1061       * @param replacement the character to append to the result string in place of each group of
1062       *        matching characters in {@code sequence}
1063       * @return the new string
1064       */
1065      public String collapseFrom(CharSequence sequence, char replacement) {
1066        int first = indexIn(sequence);
1067        if (first == -1) {
1068          return sequence.toString();
1069        }
1070    
1071        // TODO: this implementation can probably be made faster.
1072    
1073        StringBuilder builder = new StringBuilder(sequence.length())
1074            .append(sequence.subSequence(0, first))
1075            .append(replacement);
1076        boolean in = true;
1077        for (int i = first + 1; i < sequence.length(); i++) {
1078          char c = sequence.charAt(i);
1079          if (apply(c)) {
1080            if (!in) {
1081              builder.append(replacement);
1082              in = true;
1083            }
1084          } else {
1085            builder.append(c);
1086            in = false;
1087          }
1088        }
1089        return builder.toString();
1090      }
1091    
1092      /**
1093       * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1094       * groups of matching characters at the start or end of the sequence are removed without
1095       * replacement.
1096       */
1097      public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1098        int first = negate().indexIn(sequence);
1099        if (first == -1) {
1100          return ""; // everything matches. nothing's left.
1101        }
1102        StringBuilder builder = new StringBuilder(sequence.length());
1103        boolean inMatchingGroup = false;
1104        for (int i = first; i < sequence.length(); i++) {
1105          char c = sequence.charAt(i);
1106          if (apply(c)) {
1107            inMatchingGroup = true;
1108          } else {
1109            if (inMatchingGroup) {
1110              builder.append(replacement);
1111              inMatchingGroup = false;
1112            }
1113            builder.append(c);
1114          }
1115        }
1116        return builder.toString();
1117      }
1118    
1119      // Predicate interface
1120    
1121      /**
1122       * Returns {@code true} if this matcher matches the given character.
1123       *
1124       * @throws NullPointerException if {@code character} is null
1125       */
1126      @Override public boolean apply(Character character) {
1127        return matches(character);
1128      }
1129    }