001    /*
002     * Copyright (C) 2008 The Guava Authors
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     * http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package com.google.common.base;
018    
019    import static com.google.common.base.Preconditions.checkArgument;
020    import static com.google.common.base.Preconditions.checkNotNull;
021    
022    import com.google.common.annotations.Beta;
023    import com.google.common.annotations.GwtCompatible;
024    
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.List;
028    
029    import javax.annotation.CheckReturnValue;
030    
031    /**
032     * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
033     * for any {@link Object}. Also offers basic text processing methods based on this function.
034     * Implementations are strongly encouraged to be side-effect-free and immutable.
035     *
036     * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
037     * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
038     *
039     * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
040     * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
041     * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
042     * treats these just as two separate characters.
043     *
044     * @author Kevin Bourrillion
045     * @since 1.0
046     */
047    @Beta // Possibly change from chars to code points; decide constants vs. methods
048    @GwtCompatible
049    public abstract class CharMatcher implements Predicate<Character> {
050      // Constants
051    
052      // Excludes 2000-2000a, which is handled as a range
053      private static final String BREAKING_WHITESPACE_CHARS =
054          "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
055    
056      // Excludes 2007, which is handled as a gap in a pair of ranges
057      private static final String NON_BREAKING_WHITESPACE_CHARS =
058          "\u00a0\u180e\u202f";
059    
060      /**
061       * Determines whether a character is whitespace according to the latest Unicode standard, as
062       * illustrated
063       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
064       * This is not the same definition used by other Java APIs. (See a
065       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
066       * definitions of "whitespace"</a>.)
067       *
068       * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
069       * to date.
070       */
071      public static final CharMatcher WHITESPACE =
072          anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
073              .or(inRange('\u2000', '\u200a'))
074              .precomputed();
075    
076      /**
077       * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
078       * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
079       * discussion of that term.
080       *
081       * @since 2.0
082       */
083      public static final CharMatcher BREAKING_WHITESPACE =
084          anyOf(BREAKING_WHITESPACE_CHARS)
085              .or(inRange('\u2000', '\u2006'))
086              .or(inRange('\u2008', '\u200a'))
087              .precomputed();
088    
089      /**
090       * Determines whether a character is ASCII, meaning that its code point is less than 128.
091       */
092      public static final CharMatcher ASCII = inRange('\0', '\u007f');
093    
094      /**
095       * Determines whether a character is a digit according to
096       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
097       */
098      public static final CharMatcher DIGIT;
099    
100      static {
101        CharMatcher digit = inRange('0', '9');
102        String zeroes =
103            "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
104                + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
105                + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
106        for (char base : zeroes.toCharArray()) {
107          digit = digit.or(inRange(base, (char) (base + 9)));
108        }
109        DIGIT = digit.precomputed();
110      }
111    
112      /**
113       * Determines whether a character is whitespace according to {@link Character#isWhitespace(char)
114       * Java's definition}; it is usually preferable to use {@link #WHITESPACE}. (See a
115       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
116       * definitions of "whitespace"</a>.)
117       */
118      public static final CharMatcher JAVA_WHITESPACE =
119          inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
120              .or(inRange('\u001c', '\u0020'))
121              .or(is('\u1680'))
122              .or(is('\u180e'))
123              .or(inRange('\u2000', '\u2006'))
124              .or(inRange('\u2008', '\u200b'))
125              .or(inRange('\u2028', '\u2029'))
126              .or(is('\u205f'))
127              .or(is('\u3000'))
128              .precomputed();
129    
130      /**
131       * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
132       * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
133       */
134      public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
135        @Override public boolean matches(char c) {
136          return Character.isDigit(c);
137        }
138      };
139    
140      /**
141       * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
142       * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
143       * inRange('a', 'z').or(inRange('A', 'Z'))}.
144       */
145      public static final CharMatcher JAVA_LETTER = new CharMatcher() {
146        @Override public boolean matches(char c) {
147          return Character.isLetter(c);
148        }
149      };
150    
151      /**
152       * Determines whether a character is a letter or digit according to {@link
153       * Character#isLetterOrDigit(char) Java's definition}.
154       */
155      public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
156        @Override public boolean matches(char c) {
157          return Character.isLetterOrDigit(c);
158        }
159      };
160    
161      /**
162       * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
163       * Java's definition}.
164       */
165      public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
166        @Override public boolean matches(char c) {
167          return Character.isUpperCase(c);
168        }
169      };
170    
171      /**
172       * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
173       * Java's definition}.
174       */
175      public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
176        @Override public boolean matches(char c) {
177          return Character.isLowerCase(c);
178        }
179      };
180    
181      /**
182       * Determines whether a character is an ISO control character as specified by {@link
183       * Character#isISOControl(char)}.
184       */
185      public static final CharMatcher JAVA_ISO_CONTROL =
186          inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
187    
188      /**
189       * Determines whether a character is invisible; that is, if its Unicode category is any of
190       * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
191       * PRIVATE_USE according to ICU4J.
192       */
193      public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
194          .or(inRange('\u007f', '\u00a0'))
195          .or(is('\u00ad'))
196          .or(inRange('\u0600', '\u0603'))
197          .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
198          .or(inRange('\u2000', '\u200f'))
199          .or(inRange('\u2028', '\u202f'))
200          .or(inRange('\u205f', '\u2064'))
201          .or(inRange('\u206a', '\u206f'))
202          .or(is('\u3000'))
203          .or(inRange('\ud800', '\uf8ff'))
204          .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
205          .precomputed();
206    
207      /**
208       * Determines whether a character is single-width (not double-width). When in doubt, this matcher
209       * errs on the side of returning {@code false} (that is, it tends to assume a character is
210       * double-width).
211       *
212       * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
213       * date.
214       */
215      public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
216          .or(is('\u05be'))
217          .or(inRange('\u05d0', '\u05ea'))
218          .or(is('\u05f3'))
219          .or(is('\u05f4'))
220          .or(inRange('\u0600', '\u06ff'))
221          .or(inRange('\u0750', '\u077f'))
222          .or(inRange('\u0e00', '\u0e7f'))
223          .or(inRange('\u1e00', '\u20af'))
224          .or(inRange('\u2100', '\u213a'))
225          .or(inRange('\ufb50', '\ufdff'))
226          .or(inRange('\ufe70', '\ufeff'))
227          .or(inRange('\uff61', '\uffdc'))
228          .precomputed();
229    
230      /** Matches any character. */
231      public static final CharMatcher ANY =
232          new CharMatcher() {
233            @Override public boolean matches(char c) {
234              return true;
235            }
236    
237            @Override public int indexIn(CharSequence sequence) {
238              return (sequence.length() == 0) ? -1 : 0;
239            }
240    
241            @Override public int indexIn(CharSequence sequence, int start) {
242              int length = sequence.length();
243              Preconditions.checkPositionIndex(start, length);
244              return (start == length) ? -1 : start;
245            }
246    
247            @Override public int lastIndexIn(CharSequence sequence) {
248              return sequence.length() - 1;
249            }
250    
251            @Override public boolean matchesAllOf(CharSequence sequence) {
252              checkNotNull(sequence);
253              return true;
254            }
255    
256            @Override public boolean matchesNoneOf(CharSequence sequence) {
257              return sequence.length() == 0;
258            }
259    
260            @Override public String removeFrom(CharSequence sequence) {
261              checkNotNull(sequence);
262              return "";
263            }
264    
265            @Override public String replaceFrom(CharSequence sequence, char replacement) {
266              char[] array = new char[sequence.length()];
267              Arrays.fill(array, replacement);
268              return new String(array);
269            }
270    
271            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
272              StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
273              for (int i = 0; i < sequence.length(); i++) {
274                retval.append(replacement);
275              }
276              return retval.toString();
277            }
278    
279            @Override public String collapseFrom(CharSequence sequence, char replacement) {
280              return (sequence.length() == 0) ? "" : String.valueOf(replacement);
281            }
282    
283            @Override public String trimFrom(CharSequence sequence) {
284              checkNotNull(sequence);
285              return "";
286            }
287    
288            @Override public int countIn(CharSequence sequence) {
289              return sequence.length();
290            }
291    
292            @Override public CharMatcher and(CharMatcher other) {
293              return checkNotNull(other);
294            }
295    
296            @Override public CharMatcher or(CharMatcher other) {
297              checkNotNull(other);
298              return this;
299            }
300    
301            @Override public CharMatcher negate() {
302              return NONE;
303            }
304    
305            @Override public CharMatcher precomputed() {
306              return this;
307            }
308          };
309    
310      /** Matches no characters. */
311      public static final CharMatcher NONE =
312          new CharMatcher() {
313            @Override public boolean matches(char c) {
314              return false;
315            }
316    
317            @Override public int indexIn(CharSequence sequence) {
318              checkNotNull(sequence);
319              return -1;
320            }
321    
322            @Override public int indexIn(CharSequence sequence, int start) {
323              int length = sequence.length();
324              Preconditions.checkPositionIndex(start, length);
325              return -1;
326            }
327    
328            @Override public int lastIndexIn(CharSequence sequence) {
329              checkNotNull(sequence);
330              return -1;
331            }
332    
333            @Override public boolean matchesAllOf(CharSequence sequence) {
334              return sequence.length() == 0;
335            }
336    
337            @Override public boolean matchesNoneOf(CharSequence sequence) {
338              checkNotNull(sequence);
339              return true;
340            }
341    
342            @Override public String removeFrom(CharSequence sequence) {
343              return sequence.toString();
344            }
345    
346            @Override public String replaceFrom(CharSequence sequence, char replacement) {
347              return sequence.toString();
348            }
349    
350            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
351              checkNotNull(replacement);
352              return sequence.toString();
353            }
354    
355            @Override public String collapseFrom(CharSequence sequence, char replacement) {
356              return sequence.toString();
357            }
358    
359            @Override public String trimFrom(CharSequence sequence) {
360              return sequence.toString();
361            }
362    
363            @Override public int countIn(CharSequence sequence) {
364              checkNotNull(sequence);
365              return 0;
366            }
367    
368            @Override public CharMatcher and(CharMatcher other) {
369              checkNotNull(other);
370              return this;
371            }
372    
373            @Override public CharMatcher or(CharMatcher other) {
374              return checkNotNull(other);
375            }
376    
377            @Override public CharMatcher negate() {
378              return ANY;
379            }
380    
381            @Override void setBits(LookupTable table) {}
382    
383            @Override public CharMatcher precomputed() {
384              return this;
385            }
386          };
387    
388      // Static factories
389    
390      /**
391       * Returns a {@code char} matcher that matches only one specified character.
392       */
393      public static CharMatcher is(final char match) {
394        return new CharMatcher() {
395          @Override public boolean matches(char c) {
396            return c == match;
397          }
398    
399          @Override public String replaceFrom(CharSequence sequence, char replacement) {
400            return sequence.toString().replace(match, replacement);
401          }
402    
403          @Override public CharMatcher and(CharMatcher other) {
404            return other.matches(match) ? this : NONE;
405          }
406    
407          @Override public CharMatcher or(CharMatcher other) {
408            return other.matches(match) ? other : super.or(other);
409          }
410    
411          @Override public CharMatcher negate() {
412            return isNot(match);
413          }
414    
415          @Override void setBits(LookupTable table) {
416            table.set(match);
417          }
418    
419          @Override public CharMatcher precomputed() {
420            return this;
421          }
422        };
423      }
424    
425      /**
426       * Returns a {@code char} matcher that matches any character except the one specified.
427       *
428       * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
429       */
430      public static CharMatcher isNot(final char match) {
431        return new CharMatcher() {
432          @Override public boolean matches(char c) {
433            return c != match;
434          }
435    
436          @Override public CharMatcher and(CharMatcher other) {
437            return other.matches(match) ? super.and(other) : other;
438          }
439    
440          @Override public CharMatcher or(CharMatcher other) {
441            return other.matches(match) ? ANY : this;
442          }
443    
444          @Override public CharMatcher negate() {
445            return is(match);
446          }
447        };
448      }
449    
450      /**
451       * Returns a {@code char} matcher that matches any character present in the given character
452       * sequence.
453       */
454      public static CharMatcher anyOf(final CharSequence sequence) {
455        switch (sequence.length()) {
456          case 0:
457            return NONE;
458          case 1:
459            return is(sequence.charAt(0));
460          case 2:
461            final char match1 = sequence.charAt(0);
462            final char match2 = sequence.charAt(1);
463            return new CharMatcher() {
464              @Override public boolean matches(char c) {
465                return c == match1 || c == match2;
466              }
467    
468              @Override void setBits(LookupTable table) {
469                table.set(match1);
470                table.set(match2);
471              }
472    
473              @Override public CharMatcher precomputed() {
474                return this;
475              }
476            };
477        }
478    
479        final char[] chars = sequence.toString().toCharArray();
480        Arrays.sort(chars); // not worth collapsing duplicates
481    
482        return new CharMatcher() {
483          @Override public boolean matches(char c) {
484            return Arrays.binarySearch(chars, c) >= 0;
485          }
486    
487          @Override void setBits(LookupTable table) {
488            for (char c : chars) {
489              table.set(c);
490            }
491          }
492        };
493      }
494    
495      /**
496       * Returns a {@code char} matcher that matches any character not present in the given character
497       * sequence.
498       */
499      public static CharMatcher noneOf(CharSequence sequence) {
500        return anyOf(sequence).negate();
501      }
502    
503      /**
504       * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
505       * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
506       * CharMatcher.inRange('a', 'z')}.
507       *
508       * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
509       */
510      public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
511        checkArgument(endInclusive >= startInclusive);
512        return new CharMatcher() {
513          @Override public boolean matches(char c) {
514            return startInclusive <= c && c <= endInclusive;
515          }
516    
517          @Override void setBits(LookupTable table) {
518            char c = startInclusive;
519            while (true) {
520              table.set(c);
521              if (c++ == endInclusive) {
522                break;
523              }
524            }
525          }
526    
527          @Override public CharMatcher precomputed() {
528            return this;
529          }
530        };
531      }
532    
533      /**
534       * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
535       * which operates on primitive {@code char} instances instead.
536       */
537      public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
538        checkNotNull(predicate);
539        if (predicate instanceof CharMatcher) {
540          return (CharMatcher) predicate;
541        }
542        return new CharMatcher() {
543          @Override public boolean matches(char c) {
544            return predicate.apply(c);
545          }
546    
547          @Override public boolean apply(Character character) {
548            return predicate.apply(checkNotNull(character));
549          }
550        };
551      }
552    
553      // Abstract methods
554    
555      /** Determines a true or false value for the given character. */
556      public abstract boolean matches(char c);
557    
558      // Non-static factories
559    
560      /**
561       * Returns a matcher that matches any character not matched by this matcher.
562       */
563      public CharMatcher negate() {
564        final CharMatcher original = this;
565        return new CharMatcher() {
566          @Override public boolean matches(char c) {
567            return !original.matches(c);
568          }
569    
570          @Override public boolean matchesAllOf(CharSequence sequence) {
571            return original.matchesNoneOf(sequence);
572          }
573    
574          @Override public boolean matchesNoneOf(CharSequence sequence) {
575            return original.matchesAllOf(sequence);
576          }
577    
578          @Override public int countIn(CharSequence sequence) {
579            return sequence.length() - original.countIn(sequence);
580          }
581    
582          @Override public CharMatcher negate() {
583            return original;
584          }
585        };
586      }
587    
588      /**
589       * Returns a matcher that matches any character matched by both this matcher and {@code other}.
590       */
591      public CharMatcher and(CharMatcher other) {
592        return new And(Arrays.asList(this, checkNotNull(other)));
593      }
594    
595      private static class And extends CharMatcher {
596        List<CharMatcher> components;
597    
598        And(List<CharMatcher> components) {
599          this.components = components; // Skip defensive copy (private)
600        }
601    
602        @Override public boolean matches(char c) {
603          for (CharMatcher matcher : components) {
604            if (!matcher.matches(c)) {
605              return false;
606            }
607          }
608          return true;
609        }
610    
611        @Override public CharMatcher and(CharMatcher other) {
612          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
613          newComponents.add(checkNotNull(other));
614          return new And(newComponents);
615        }
616      }
617    
618      /**
619       * Returns a matcher that matches any character matched by either this matcher or {@code other}.
620       */
621      public CharMatcher or(CharMatcher other) {
622        return new Or(Arrays.asList(this, checkNotNull(other)));
623      }
624    
625      private static class Or extends CharMatcher {
626        List<CharMatcher> components;
627    
628        Or(List<CharMatcher> components) {
629          this.components = components; // Skip defensive copy (private)
630        }
631    
632        @Override public boolean matches(char c) {
633          for (CharMatcher matcher : components) {
634            if (matcher.matches(c)) {
635              return true;
636            }
637          }
638          return false;
639        }
640    
641        @Override public CharMatcher or(CharMatcher other) {
642          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
643          newComponents.add(checkNotNull(other));
644          return new Or(newComponents);
645        }
646    
647        @Override void setBits(LookupTable table) {
648          for (CharMatcher matcher : components) {
649            matcher.setBits(table);
650          }
651        }
652      }
653    
654      /**
655       * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
656       * query than the original; your mileage may vary. Precomputation takes time and is likely to be
657       * worthwhile only if the precomputed matcher is queried many thousands of times.
658       *
659       * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
660       * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
661       * worthwhile tradeoff in a browser.
662       */
663      public CharMatcher precomputed() {
664        return Platform.precomputeCharMatcher(this);
665      }
666    
667      /**
668       * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
669       * on {@link Platform} so that we can have different behavior in GWT.
670       *
671       * <p>The default precomputation is to cache the configuration of the original matcher in an
672       * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
673       * than the original.
674       *
675       * <p>The default implementation creates a new bit array and passes it to {@link
676       * #setBits(LookupTable)}.
677       */
678      CharMatcher precomputedInternal() {
679        final LookupTable table = new LookupTable();
680        setBits(table);
681    
682        return new CharMatcher() {
683          @Override public boolean matches(char c) {
684            return table.get(c);
685          }
686    
687          // TODO(kevinb): make methods like negate() smart?
688    
689          @Override public CharMatcher precomputed() {
690            return this;
691          }
692        };
693      }
694    
695      /**
696       * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
697       * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
698       *
699       * <p>The default implementation loops over every possible character value, invoking {@link
700       * #matches} for each one.
701       */
702      void setBits(LookupTable table) {
703        char c = Character.MIN_VALUE;
704        while (true) {
705          if (matches(c)) {
706            table.set(c);
707          }
708          if (c++ == Character.MAX_VALUE) {
709            break;
710          }
711        }
712      }
713    
714      /**
715       * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
716       *
717       * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
718       * simpler java.util.BitSet.
719       */
720      private static final class LookupTable {
721        int[] data = new int[2048];
722    
723        void set(char index) {
724          data[index >> 5] |= (1 << index);
725        }
726    
727        boolean get(char index) {
728          return (data[index >> 5] & (1 << index)) != 0;
729        }
730      }
731    
732      // Text processing routines
733    
734      /**
735       * Returns {@code true} if a character sequence contains at least one matching character.
736       * Equivalent to {@code !matchesNoneOf(sequence)}.
737       *
738       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
739       * character, until this returns {@code true} or the end is reached.
740       *
741       * @param sequence the character sequence to examine, possibly empty
742       * @return {@code true} if this matcher matches at least one character in the sequence
743       * @since 8.0
744       */
745      public boolean matchesAnyOf(CharSequence sequence) {
746        return !matchesNoneOf(sequence);
747      }
748    
749      /**
750       * Returns {@code true} if a character sequence contains only matching characters.
751       *
752       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
753       * character, until this returns {@code false} or the end is reached.
754       *
755       * @param sequence the character sequence to examine, possibly empty
756       * @return {@code true} if this matcher matches every character in the sequence, including when
757       *         the sequence is empty
758       */
759      public boolean matchesAllOf(CharSequence sequence) {
760        for (int i = sequence.length() - 1; i >= 0; i--) {
761          if (!matches(sequence.charAt(i))) {
762            return false;
763          }
764        }
765        return true;
766      }
767    
768      /**
769       * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
770       * {@code !matchesAnyOf(sequence)}.
771       *
772       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
773       * character, until this returns {@code false} or the end is reached.
774       *
775       * @param sequence the character sequence to examine, possibly empty
776       * @return {@code true} if this matcher matches every character in the sequence, including when
777       *         the sequence is empty
778       */
779      public boolean matchesNoneOf(CharSequence sequence) {
780        return indexIn(sequence) == -1;
781      }
782    
783      // TODO(kevinb): add matchesAnyOf()
784    
785      /**
786       * Returns the index of the first matching character in a character sequence, or {@code -1} if no
787       * matching character is present.
788       *
789       * <p>The default implementation iterates over the sequence in forward order calling {@link
790       * #matches} for each character.
791       *
792       * @param sequence the character sequence to examine from the beginning
793       * @return an index, or {@code -1} if no character matches
794       */
795      public int indexIn(CharSequence sequence) {
796        int length = sequence.length();
797        for (int i = 0; i < length; i++) {
798          if (matches(sequence.charAt(i))) {
799            return i;
800          }
801        }
802        return -1;
803      }
804    
805      /**
806       * Returns the index of the first matching character in a character sequence, starting from a
807       * given position, or {@code -1} if no character matches after that position.
808       *
809       * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
810       * start}, calling {@link #matches} for each character.
811       *
812       * @param sequence the character sequence to examine
813       * @param start the first index to examine; must be nonnegative and no greater than {@code
814       *        sequence.length()}
815       * @return the index of the first matching character, guaranteed to be no less than {@code start},
816       *         or {@code -1} if no character matches
817       * @throws IndexOutOfBoundsException if start is negative or greater than {@code
818       *         sequence.length()}
819       */
820      public int indexIn(CharSequence sequence, int start) {
821        int length = sequence.length();
822        Preconditions.checkPositionIndex(start, length);
823        for (int i = start; i < length; i++) {
824          if (matches(sequence.charAt(i))) {
825            return i;
826          }
827        }
828        return -1;
829      }
830    
831      /**
832       * Returns the index of the last matching character in a character sequence, or {@code -1} if no
833       * matching character is present.
834       *
835       * <p>The default implementation iterates over the sequence in reverse order calling {@link
836       * #matches} for each character.
837       *
838       * @param sequence the character sequence to examine from the end
839       * @return an index, or {@code -1} if no character matches
840       */
841      public int lastIndexIn(CharSequence sequence) {
842        for (int i = sequence.length() - 1; i >= 0; i--) {
843          if (matches(sequence.charAt(i))) {
844            return i;
845          }
846        }
847        return -1;
848      }
849    
850      /**
851       * Returns the number of matching characters found in a character sequence.
852       */
853      public int countIn(CharSequence sequence) {
854        int count = 0;
855        for (int i = 0; i < sequence.length(); i++) {
856          if (matches(sequence.charAt(i))) {
857            count++;
858          }
859        }
860        return count;
861      }
862    
863      /**
864       * Returns a string containing all non-matching characters of a character sequence, in order. For
865       * example: <pre>   {@code
866       *
867       *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
868       *
869       * ... returns {@code "bzr"}.
870       */
871      @CheckReturnValue
872      public String removeFrom(CharSequence sequence) {
873        String string = sequence.toString();
874        int pos = indexIn(string);
875        if (pos == -1) {
876          return string;
877        }
878    
879        char[] chars = string.toCharArray();
880        int spread = 1;
881    
882        // This unusual loop comes from extensive benchmarking
883        OUT: while (true) {
884          pos++;
885          while (true) {
886            if (pos == chars.length) {
887              break OUT;
888            }
889            if (matches(chars[pos])) {
890              break;
891            }
892            chars[pos - spread] = chars[pos];
893            pos++;
894          }
895          spread++;
896        }
897        return new String(chars, 0, pos - spread);
898      }
899    
900      /**
901       * Returns a string containing all matching characters of a character sequence, in order. For
902       * example: <pre>   {@code
903       *
904       *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
905       *
906       * ... returns {@code "aaa"}.
907       */
908      @CheckReturnValue
909      public String retainFrom(CharSequence sequence) {
910        return negate().removeFrom(sequence);
911      }
912    
913      /**
914       * Returns a string copy of the input character sequence, with each character that matches this
915       * matcher replaced by a given replacement character. For example: <pre>   {@code
916       *
917       *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
918       *
919       * ... returns {@code "rodor"}.
920       *
921       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
922       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
923       * character.
924       *
925       * @param sequence the character sequence to replace matching characters in
926       * @param replacement the character to append to the result string in place of each matching
927       *        character in {@code sequence}
928       * @return the new string
929       */
930      @CheckReturnValue
931      public String replaceFrom(CharSequence sequence, char replacement) {
932        String string = sequence.toString();
933        int pos = indexIn(string);
934        if (pos == -1) {
935          return string;
936        }
937        char[] chars = string.toCharArray();
938        chars[pos] = replacement;
939        for (int i = pos + 1; i < chars.length; i++) {
940          if (matches(chars[i])) {
941            chars[i] = replacement;
942          }
943        }
944        return new String(chars);
945      }
946    
947      /**
948       * Returns a string copy of the input character sequence, with each character that matches this
949       * matcher replaced by a given replacement sequence. For example: <pre>   {@code
950       *
951       *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
952       *
953       * ... returns {@code "yoohoo"}.
954       *
955       * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
956       * off calling {@link #replaceFrom(CharSequence, char)} directly.
957       *
958       * @param sequence the character sequence to replace matching characters in
959       * @param replacement the characters to append to the result string in place of each matching
960       *        character in {@code sequence}
961       * @return the new string
962       */
963      @CheckReturnValue
964      public String replaceFrom(CharSequence sequence, CharSequence replacement) {
965        int replacementLen = replacement.length();
966        if (replacementLen == 0) {
967          return removeFrom(sequence);
968        }
969        if (replacementLen == 1) {
970          return replaceFrom(sequence, replacement.charAt(0));
971        }
972    
973        String string = sequence.toString();
974        int pos = indexIn(string);
975        if (pos == -1) {
976          return string;
977        }
978    
979        int len = string.length();
980        StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
981    
982        int oldpos = 0;
983        do {
984          buf.append(string, oldpos, pos);
985          buf.append(replacement);
986          oldpos = pos + 1;
987          pos = indexIn(string, oldpos);
988        } while (pos != -1);
989    
990        buf.append(string, oldpos, len);
991        return buf.toString();
992      }
993    
994      /**
995       * Returns a substring of the input character sequence that omits all characters this matcher
996       * matches from the beginning and from the end of the string. For example: <pre>   {@code
997       *
998       *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
999       *
1000       * ... returns {@code "cat"}.
1001       *
1002       * <p>Note that: <pre>   {@code
1003       *
1004       *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
1005       *
1006       * ... is equivalent to {@link String#trim()}.
1007       */
1008      @CheckReturnValue
1009      public String trimFrom(CharSequence sequence) {
1010        int len = sequence.length();
1011        int first;
1012        int last;
1013    
1014        for (first = 0; first < len; first++) {
1015          if (!matches(sequence.charAt(first))) {
1016            break;
1017          }
1018        }
1019        for (last = len - 1; last > first; last--) {
1020          if (!matches(sequence.charAt(last))) {
1021            break;
1022          }
1023        }
1024    
1025        return sequence.subSequence(first, last + 1).toString();
1026      }
1027    
1028      /**
1029       * Returns a substring of the input character sequence that omits all characters this matcher
1030       * matches from the beginning of the string. For example: <pre> {@code
1031       *
1032       *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1033       *
1034       * ... returns {@code "catbab"}.
1035       */
1036      @CheckReturnValue
1037      public String trimLeadingFrom(CharSequence sequence) {
1038        int len = sequence.length();
1039        int first;
1040    
1041        for (first = 0; first < len; first++) {
1042          if (!matches(sequence.charAt(first))) {
1043            break;
1044          }
1045        }
1046    
1047        return sequence.subSequence(first, len).toString();
1048      }
1049    
1050      /**
1051       * Returns a substring of the input character sequence that omits all characters this matcher
1052       * matches from the end of the string. For example: <pre> {@code
1053       *
1054       *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1055       *
1056       * ... returns {@code "abacat"}.
1057       */
1058      @CheckReturnValue
1059      public String trimTrailingFrom(CharSequence sequence) {
1060        int len = sequence.length();
1061        int last;
1062    
1063        for (last = len - 1; last >= 0; last--) {
1064          if (!matches(sequence.charAt(last))) {
1065            break;
1066          }
1067        }
1068    
1069        return sequence.subSequence(0, last + 1).toString();
1070      }
1071    
1072      /**
1073       * Returns a string copy of the input character sequence, with each group of consecutive
1074       * characters that match this matcher replaced by a single replacement character. For example:
1075       * <pre>   {@code
1076       *
1077       *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1078       *
1079       * ... returns {@code "b-p-r"}.
1080       *
1081       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1082       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1083       * character.
1084       *
1085       * @param sequence the character sequence to replace matching groups of characters in
1086       * @param replacement the character to append to the result string in place of each group of
1087       *        matching characters in {@code sequence}
1088       * @return the new string
1089       */
1090      @CheckReturnValue
1091      public String collapseFrom(CharSequence sequence, char replacement) {
1092        int first = indexIn(sequence);
1093        if (first == -1) {
1094          return sequence.toString();
1095        }
1096    
1097        // TODO(kevinb): see if this implementation can be made faster
1098        StringBuilder builder = new StringBuilder(sequence.length())
1099            .append(sequence.subSequence(0, first))
1100            .append(replacement);
1101        boolean in = true;
1102        for (int i = first + 1; i < sequence.length(); i++) {
1103          char c = sequence.charAt(i);
1104          if (apply(c)) {
1105            if (!in) {
1106              builder.append(replacement);
1107              in = true;
1108            }
1109          } else {
1110            builder.append(c);
1111            in = false;
1112          }
1113        }
1114        return builder.toString();
1115      }
1116    
1117      /**
1118       * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1119       * groups of matching characters at the start or end of the sequence are removed without
1120       * replacement.
1121       */
1122      @CheckReturnValue
1123      public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1124        int first = negate().indexIn(sequence);
1125        if (first == -1) {
1126          return ""; // everything matches. nothing's left.
1127        }
1128        StringBuilder builder = new StringBuilder(sequence.length());
1129        boolean inMatchingGroup = false;
1130        for (int i = first; i < sequence.length(); i++) {
1131          char c = sequence.charAt(i);
1132          if (apply(c)) {
1133            inMatchingGroup = true;
1134          } else {
1135            if (inMatchingGroup) {
1136              builder.append(replacement);
1137              inMatchingGroup = false;
1138            }
1139            builder.append(c);
1140          }
1141        }
1142        return builder.toString();
1143      }
1144    
1145      // Predicate interface
1146    
1147      /**
1148       * Returns {@code true} if this matcher matches the given character.
1149       *
1150       * @throws NullPointerException if {@code character} is null
1151       */
1152      @Override public boolean apply(Character character) {
1153        return matches(character);
1154      }
1155    }