001    /*
002     * Copyright (C) 2008 Google Inc.
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005     * in compliance with the License. You may obtain a copy of the License at
006     *
007     * http://www.apache.org/licenses/LICENSE-2.0
008     *
009     * Unless required by applicable law or agreed to in writing, software distributed under the
010     * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
011     * express or implied. See the License for the specific language governing permissions and
012     * limitations under the License.
013     */
014    
015    package com.google.common.base;
016    
017    import static com.google.common.base.Preconditions.checkArgument;
018    import static com.google.common.base.Preconditions.checkNotNull;
019    
020    import com.google.common.annotations.Beta;
021    import com.google.common.annotations.GwtCompatible;
022    
023    import java.util.ArrayList;
024    import java.util.Arrays;
025    import java.util.List;
026    
027    /**
028     * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029     * for any {@link Object}. Also offers basic text processing methods based on this function.
030     * Implementations are strongly encouraged to be side-effect-free and immutable.
031     *
032     * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033     * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
034     *
035     * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
036     * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
037     * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
038     * treats these just as two separate characters.
039     *
040     * @author Kevin Bourrillion
041     * @since 1
042     */
043    @Beta // Possibly change from chars to code points; decide constants vs. methods
044    @GwtCompatible
045    public abstract class CharMatcher implements Predicate<Character> {
046      // Constants
047    
048      // Excludes 2000-2000a, which is handled as a range
049      private static final String BREAKING_WHITESPACE_CHARS =
050          "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
051    
052      // Excludes 2007, which is handled as a gap in a pair of ranges
053      private static final String NON_BREAKING_WHITESPACE_CHARS =
054          "\u00a0\u180e\u202f";
055    
056      /**
057       * Determines whether a character is whitespace according to the latest Unicode standard, as
058       * illustrated
059       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
060       * This is not the same definition used by other Java APIs. (See a
061       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
062       * definitions of "whitespace"</a>.)
063       *
064       * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
065       * to date.
066       */
067      public static final CharMatcher WHITESPACE =
068          anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
069              .or(inRange('\u2000', '\u200a'))
070              .precomputed();
071    
072      /**
073       * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
074       * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
075       * discussion of that term.
076       *
077       * @since 2
078       */
079      public static final CharMatcher BREAKING_WHITESPACE =
080          anyOf(BREAKING_WHITESPACE_CHARS)
081              .or(inRange('\u2000', '\u2006'))
082              .or(inRange('\u2008', '\u200a'))
083              .precomputed();
084    
085      /**
086       * Determines whether a character is ASCII, meaning that its code point is less than 128.
087       */
088      public static final CharMatcher ASCII = inRange('\0', '\u007f');
089    
090      /**
091       * Determines whether a character is a digit according to
092       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
093       */
094      public static final CharMatcher DIGIT;
095    
096      static {
097        CharMatcher digit = inRange('0', '9');
098        String zeroes =
099            "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
100                + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
101                + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
102        for (char base : zeroes.toCharArray()) {
103          digit = digit.or(inRange(base, (char) (base + 9)));
104        }
105        DIGIT = digit.precomputed();
106      }
107    
108      /**
109       * Determines whether a character is whitespace according to {@link Character#isWhitespace(char)
110       * Java's definition}; it is usually preferable to use {@link #WHITESPACE}. (See a
111       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
112       * definitions of "whitespace"</a>.)
113       */
114      public static final CharMatcher JAVA_WHITESPACE =
115          inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
116              .or(inRange('\u001c', '\u0020'))
117              .or(is('\u1680'))
118              .or(is('\u180e'))
119              .or(inRange('\u2000', '\u2006'))
120              .or(inRange('\u2008', '\u200b'))
121              .or(inRange('\u2028', '\u2029'))
122              .or(is('\u205f'))
123              .or(is('\u3000'))
124              .precomputed();
125    
126      /**
127       * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
128       * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
129       */
130      public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
131        @Override public boolean matches(char c) {
132          return Character.isDigit(c);
133        }
134      };
135    
136      /**
137       * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
138       * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
139       * inRange('a', 'z').or(inRange('A', 'Z'))}.
140       */
141      public static final CharMatcher JAVA_LETTER = new CharMatcher() {
142        @Override public boolean matches(char c) {
143          return Character.isLetter(c);
144        }
145      };
146    
147      /**
148       * Determines whether a character is a letter or digit according to {@link
149       * Character#isLetterOrDigit(char) Java's definition}.
150       */
151      public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
152        @Override public boolean matches(char c) {
153          return Character.isLetterOrDigit(c);
154        }
155      };
156    
157      /**
158       * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
159       * Java's definition}.
160       */
161      public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
162        @Override public boolean matches(char c) {
163          return Character.isUpperCase(c);
164        }
165      };
166    
167      /**
168       * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
169       * Java's definition}.
170       */
171      public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
172        @Override public boolean matches(char c) {
173          return Character.isLowerCase(c);
174        }
175      };
176    
177      /**
178       * Determines whether a character is an ISO control character as specified by {@link
179       * Character#isISOControl(char)}.
180       */
181      public static final CharMatcher JAVA_ISO_CONTROL =
182          inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
183    
184      /**
185       * Determines whether a character is invisible; that is, if its Unicode category is any of
186       * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
187       * PRIVATE_USE according to ICU4J.
188       */
189      public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
190          .or(inRange('\u007f', '\u00a0'))
191          .or(is('\u00ad'))
192          .or(inRange('\u0600', '\u0603'))
193          .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
194          .or(inRange('\u2000', '\u200f'))
195          .or(inRange('\u2028', '\u202f'))
196          .or(inRange('\u205f', '\u2064'))
197          .or(inRange('\u206a', '\u206f'))
198          .or(is('\u3000'))
199          .or(inRange('\ud800', '\uf8ff'))
200          .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
201          .precomputed();
202    
203      /**
204       * Determines whether a character is single-width (not double-width). When in doubt, this matcher
205       * errs on the side of returning {@code false} (that is, it tends to assume a character is
206       * double-width).
207       *
208       * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
209       * date.
210       */
211      public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
212          .or(is('\u05be'))
213          .or(inRange('\u05d0', '\u05ea'))
214          .or(is('\u05f3'))
215          .or(is('\u05f4'))
216          .or(inRange('\u0600', '\u06ff'))
217          .or(inRange('\u0750', '\u077f'))
218          .or(inRange('\u0e00', '\u0e7f'))
219          .or(inRange('\u1e00', '\u20af'))
220          .or(inRange('\u2100', '\u213a'))
221          .or(inRange('\ufb50', '\ufdff'))
222          .or(inRange('\ufe70', '\ufeff'))
223          .or(inRange('\uff61', '\uffdc'))
224          .precomputed();
225    
226      /** Matches any character. */
227      public static final CharMatcher ANY =
228          new CharMatcher() {
229            @Override public boolean matches(char c) {
230              return true;
231            }
232    
233            @Override public int indexIn(CharSequence sequence) {
234              return (sequence.length() == 0) ? -1 : 0;
235            }
236    
237            @Override public int indexIn(CharSequence sequence, int start) {
238              int length = sequence.length();
239              Preconditions.checkPositionIndex(start, length);
240              return (start == length) ? -1 : start;
241            }
242    
243            @Override public int lastIndexIn(CharSequence sequence) {
244              return sequence.length() - 1;
245            }
246    
247            @Override public boolean matchesAllOf(CharSequence sequence) {
248              checkNotNull(sequence);
249              return true;
250            }
251    
252            @Override public boolean matchesNoneOf(CharSequence sequence) {
253              return sequence.length() == 0;
254            }
255    
256            @Override public String removeFrom(CharSequence sequence) {
257              checkNotNull(sequence);
258              return "";
259            }
260    
261            @Override public String replaceFrom(CharSequence sequence, char replacement) {
262              char[] array = new char[sequence.length()];
263              Arrays.fill(array, replacement);
264              return new String(array);
265            }
266    
267            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
268              StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
269              for (int i = 0; i < sequence.length(); i++) {
270                retval.append(replacement);
271              }
272              return retval.toString();
273            }
274    
275            @Override public String collapseFrom(CharSequence sequence, char replacement) {
276              return (sequence.length() == 0) ? "" : String.valueOf(replacement);
277            }
278    
279            @Override public String trimFrom(CharSequence sequence) {
280              checkNotNull(sequence);
281              return "";
282            }
283    
284            @Override public int countIn(CharSequence sequence) {
285              return sequence.length();
286            }
287    
288            @Override public CharMatcher and(CharMatcher other) {
289              return checkNotNull(other);
290            }
291    
292            @Override public CharMatcher or(CharMatcher other) {
293              checkNotNull(other);
294              return this;
295            }
296    
297            @Override public CharMatcher negate() {
298              return NONE;
299            }
300    
301            @Override public CharMatcher precomputed() {
302              return this;
303            }
304          };
305    
306      /** Matches no characters. */
307      public static final CharMatcher NONE =
308          new CharMatcher() {
309            @Override public boolean matches(char c) {
310              return false;
311            }
312    
313            @Override public int indexIn(CharSequence sequence) {
314              checkNotNull(sequence);
315              return -1;
316            }
317    
318            @Override public int indexIn(CharSequence sequence, int start) {
319              int length = sequence.length();
320              Preconditions.checkPositionIndex(start, length);
321              return -1;
322            }
323    
324            @Override public int lastIndexIn(CharSequence sequence) {
325              checkNotNull(sequence);
326              return -1;
327            }
328    
329            @Override public boolean matchesAllOf(CharSequence sequence) {
330              return sequence.length() == 0;
331            }
332    
333            @Override public boolean matchesNoneOf(CharSequence sequence) {
334              checkNotNull(sequence);
335              return true;
336            }
337    
338            @Override public String removeFrom(CharSequence sequence) {
339              return sequence.toString();
340            }
341    
342            @Override public String replaceFrom(CharSequence sequence, char replacement) {
343              return sequence.toString();
344            }
345    
346            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
347              checkNotNull(replacement);
348              return sequence.toString();
349            }
350    
351            @Override public String collapseFrom(CharSequence sequence, char replacement) {
352              return sequence.toString();
353            }
354    
355            @Override public String trimFrom(CharSequence sequence) {
356              return sequence.toString();
357            }
358    
359            @Override public int countIn(CharSequence sequence) {
360              checkNotNull(sequence);
361              return 0;
362            }
363    
364            @Override public CharMatcher and(CharMatcher other) {
365              checkNotNull(other);
366              return this;
367            }
368    
369            @Override public CharMatcher or(CharMatcher other) {
370              return checkNotNull(other);
371            }
372    
373            @Override public CharMatcher negate() {
374              return ANY;
375            }
376    
377            @Override void setBits(LookupTable table) {}
378    
379            @Override public CharMatcher precomputed() {
380              return this;
381            }
382          };
383    
384      // Static factories
385    
386      /**
387       * Returns a {@code char} matcher that matches only one specified character.
388       */
389      public static CharMatcher is(final char match) {
390        return new CharMatcher() {
391          @Override public boolean matches(char c) {
392            return c == match;
393          }
394    
395          @Override public String replaceFrom(CharSequence sequence, char replacement) {
396            return sequence.toString().replace(match, replacement);
397          }
398    
399          @Override public CharMatcher and(CharMatcher other) {
400            return other.matches(match) ? this : NONE;
401          }
402    
403          @Override public CharMatcher or(CharMatcher other) {
404            return other.matches(match) ? other : super.or(other);
405          }
406    
407          @Override public CharMatcher negate() {
408            return isNot(match);
409          }
410    
411          @Override void setBits(LookupTable table) {
412            table.set(match);
413          }
414    
415          @Override public CharMatcher precomputed() {
416            return this;
417          }
418        };
419      }
420    
421      /**
422       * Returns a {@code char} matcher that matches any character except the one specified.
423       *
424       * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
425       */
426      public static CharMatcher isNot(final char match) {
427        return new CharMatcher() {
428          @Override public boolean matches(char c) {
429            return c != match;
430          }
431    
432          @Override public CharMatcher and(CharMatcher other) {
433            return other.matches(match) ? super.and(other) : other;
434          }
435    
436          @Override public CharMatcher or(CharMatcher other) {
437            return other.matches(match) ? ANY : this;
438          }
439    
440          @Override public CharMatcher negate() {
441            return is(match);
442          }
443        };
444      }
445    
446      /**
447       * Returns a {@code char} matcher that matches any character present in the given character
448       * sequence.
449       */
450      public static CharMatcher anyOf(final CharSequence sequence) {
451        switch (sequence.length()) {
452          case 0:
453            return NONE;
454          case 1:
455            return is(sequence.charAt(0));
456          case 2:
457            final char match1 = sequence.charAt(0);
458            final char match2 = sequence.charAt(1);
459            return new CharMatcher() {
460              @Override public boolean matches(char c) {
461                return c == match1 || c == match2;
462              }
463    
464              @Override void setBits(LookupTable table) {
465                table.set(match1);
466                table.set(match2);
467              }
468    
469              @Override public CharMatcher precomputed() {
470                return this;
471              }
472            };
473        }
474    
475        final char[] chars = sequence.toString().toCharArray();
476        Arrays.sort(chars); // not worth collapsing duplicates
477    
478        return new CharMatcher() {
479          @Override public boolean matches(char c) {
480            return Arrays.binarySearch(chars, c) >= 0;
481          }
482    
483          @Override void setBits(LookupTable table) {
484            for (char c : chars) {
485              table.set(c);
486            }
487          }
488        };
489      }
490    
491      /**
492       * Returns a {@code char} matcher that matches any character not present in the given character
493       * sequence.
494       */
495      public static CharMatcher noneOf(CharSequence sequence) {
496        return anyOf(sequence).negate();
497      }
498    
499      /**
500       * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
501       * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
502       * CharMatcher.inRange('a', 'z')}.
503       *
504       * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
505       */
506      public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
507        checkArgument(endInclusive >= startInclusive);
508        return new CharMatcher() {
509          @Override public boolean matches(char c) {
510            return startInclusive <= c && c <= endInclusive;
511          }
512    
513          @Override void setBits(LookupTable table) {
514            char c = startInclusive;
515            while (true) {
516              table.set(c);
517              if (c++ == endInclusive) {
518                break;
519              }
520            }
521          }
522    
523          @Override public CharMatcher precomputed() {
524            return this;
525          }
526        };
527      }
528    
529      /**
530       * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
531       * which operates on primitive {@code char} instances instead.
532       */
533      public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
534        checkNotNull(predicate);
535        if (predicate instanceof CharMatcher) {
536          return (CharMatcher) predicate;
537        }
538        return new CharMatcher() {
539          @Override public boolean matches(char c) {
540            return predicate.apply(c);
541          }
542    
543          @Override public boolean apply(Character character) {
544            return predicate.apply(checkNotNull(character));
545          }
546        };
547      }
548    
549      // Abstract methods
550    
551      /** Determines a true or false value for the given character. */
552      public abstract boolean matches(char c);
553    
554      // Non-static factories
555    
556      /**
557       * Returns a matcher that matches any character not matched by this matcher.
558       */
559      public CharMatcher negate() {
560        final CharMatcher original = this;
561        return new CharMatcher() {
562          @Override public boolean matches(char c) {
563            return !original.matches(c);
564          }
565    
566          @Override public boolean matchesAllOf(CharSequence sequence) {
567            return original.matchesNoneOf(sequence);
568          }
569    
570          @Override public boolean matchesNoneOf(CharSequence sequence) {
571            return original.matchesAllOf(sequence);
572          }
573    
574          @Override public int countIn(CharSequence sequence) {
575            return sequence.length() - original.countIn(sequence);
576          }
577    
578          @Override public CharMatcher negate() {
579            return original;
580          }
581        };
582      }
583    
584      /**
585       * Returns a matcher that matches any character matched by both this matcher and {@code other}.
586       */
587      public CharMatcher and(CharMatcher other) {
588        return new And(Arrays.asList(this, checkNotNull(other)));
589      }
590    
591      private static class And extends CharMatcher {
592        List<CharMatcher> components;
593    
594        And(List<CharMatcher> components) {
595          this.components = components; // Skip defensive copy (private)
596        }
597    
598        @Override public boolean matches(char c) {
599          for (CharMatcher matcher : components) {
600            if (!matcher.matches(c)) {
601              return false;
602            }
603          }
604          return true;
605        }
606    
607        @Override public CharMatcher and(CharMatcher other) {
608          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
609          newComponents.add(checkNotNull(other));
610          return new And(newComponents);
611        }
612      }
613    
614      /**
615       * Returns a matcher that matches any character matched by either this matcher or {@code other}.
616       */
617      public CharMatcher or(CharMatcher other) {
618        return new Or(Arrays.asList(this, checkNotNull(other)));
619      }
620    
621      private static class Or extends CharMatcher {
622        List<CharMatcher> components;
623    
624        Or(List<CharMatcher> components) {
625          this.components = components; // Skip defensive copy (private)
626        }
627    
628        @Override public boolean matches(char c) {
629          for (CharMatcher matcher : components) {
630            if (matcher.matches(c)) {
631              return true;
632            }
633          }
634          return false;
635        }
636    
637        @Override public CharMatcher or(CharMatcher other) {
638          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
639          newComponents.add(checkNotNull(other));
640          return new Or(newComponents);
641        }
642    
643        @Override void setBits(LookupTable table) {
644          for (CharMatcher matcher : components) {
645            matcher.setBits(table);
646          }
647        }
648      }
649    
650      /**
651       * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
652       * query than the original; your mileage may vary. Precomputation takes time and is likely to be
653       * worthwhile only if the precomputed matcher is queried many thousands of times.
654       *
655       * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
656       * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
657       * worthwhile tradeoff in a browser.
658       */
659      public CharMatcher precomputed() {
660        return Platform.precomputeCharMatcher(this);
661      }
662    
663      /**
664       * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
665       * on {@link Platform} so that we can have different behavior in GWT.
666       *
667       * <p>The default precomputation is to cache the configuration of the original matcher in an
668       * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
669       * than the original.
670       *
671       * <p>The default implementation creates a new bit array and passes it to {@link
672       * #setBits(LookupTable)}.
673       */
674      CharMatcher precomputedInternal() {
675        final LookupTable table = new LookupTable();
676        setBits(table);
677    
678        return new CharMatcher() {
679          @Override public boolean matches(char c) {
680            return table.get(c);
681          }
682    
683          // TODO(kevinb): make methods like negate() smart?
684    
685          @Override public CharMatcher precomputed() {
686            return this;
687          }
688        };
689      }
690    
691      /**
692       * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
693       * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
694       *
695       * <p>The default implementation loops over every possible character value, invoking {@link
696       * #matches} for each one.
697       */
698      void setBits(LookupTable table) {
699        char c = Character.MIN_VALUE;
700        while (true) {
701          if (matches(c)) {
702            table.set(c);
703          }
704          if (c++ == Character.MAX_VALUE) {
705            break;
706          }
707        }
708      }
709    
710      /**
711       * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
712       *
713       * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
714       * simpler java.util.BitSet.
715       */
716      private static final class LookupTable {
717        int[] data = new int[2048];
718    
719        void set(char index) {
720          data[index >> 5] |= (1 << index);
721        }
722    
723        boolean get(char index) {
724          return (data[index >> 5] & (1 << index)) != 0;
725        }
726      }
727    
728      // Text processing routines
729    
730      /**
731       * Returns {@code true} if a character sequence contains at least one matching character.
732       * Equivalent to {@code !matchesNoneOf(sequence)}.
733       *
734       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
735       * character, until this returns {@code true} or the end is reached.
736       *
737       * @param sequence the character sequence to examine, possibly empty
738       * @return {@code true} if this matcher matches at least one character in the sequence
739       * @since 8
740       */
741      public boolean matchesAnyOf(CharSequence sequence) {
742        return !matchesNoneOf(sequence);
743      }
744    
745      /**
746       * Returns {@code true} if a character sequence contains only matching characters.
747       *
748       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
749       * character, until this returns {@code false} or the end is reached.
750       *
751       * @param sequence the character sequence to examine, possibly empty
752       * @return {@code true} if this matcher matches every character in the sequence, including when
753       *         the sequence is empty
754       */
755      public boolean matchesAllOf(CharSequence sequence) {
756        for (int i = sequence.length() - 1; i >= 0; i--) {
757          if (!matches(sequence.charAt(i))) {
758            return false;
759          }
760        }
761        return true;
762      }
763    
764      /**
765       * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
766       * {@code !matchesAnyOf(sequence)}.
767       *
768       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
769       * character, until this returns {@code false} or the end is reached.
770       *
771       * @param sequence the character sequence to examine, possibly empty
772       * @return {@code true} if this matcher matches every character in the sequence, including when
773       *         the sequence is empty
774       */
775      public boolean matchesNoneOf(CharSequence sequence) {
776        return indexIn(sequence) == -1;
777      }
778    
779      // TODO(kevinb): add matchesAnyOf()
780    
781      /**
782       * Returns the index of the first matching character in a character sequence, or {@code -1} if no
783       * matching character is present.
784       *
785       * <p>The default implementation iterates over the sequence in forward order calling {@link
786       * #matches} for each character.
787       *
788       * @param sequence the character sequence to examine from the beginning
789       * @return an index, or {@code -1} if no character matches
790       */
791      public int indexIn(CharSequence sequence) {
792        int length = sequence.length();
793        for (int i = 0; i < length; i++) {
794          if (matches(sequence.charAt(i))) {
795            return i;
796          }
797        }
798        return -1;
799      }
800    
801      /**
802       * Returns the index of the first matching character in a character sequence, starting from a
803       * given position, or {@code -1} if no character matches after that position.
804       *
805       * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
806       * start}, calling {@link #matches} for each character.
807       *
808       * @param sequence the character sequence to examine
809       * @param start the first index to examine; must be nonnegative and no greater than {@code
810       *        sequence.length()}
811       * @return the index of the first matching character, guaranteed to be no less than {@code start},
812       *         or {@code -1} if no character matches
813       * @throws IndexOutOfBoundsException if start is negative or greater than {@code
814       *         sequence.length()}
815       */
816      public int indexIn(CharSequence sequence, int start) {
817        int length = sequence.length();
818        Preconditions.checkPositionIndex(start, length);
819        for (int i = start; i < length; i++) {
820          if (matches(sequence.charAt(i))) {
821            return i;
822          }
823        }
824        return -1;
825      }
826    
827      /**
828       * Returns the index of the last matching character in a character sequence, or {@code -1} if no
829       * matching character is present.
830       *
831       * <p>The default implementation iterates over the sequence in reverse order calling {@link
832       * #matches} for each character.
833       *
834       * @param sequence the character sequence to examine from the end
835       * @return an index, or {@code -1} if no character matches
836       */
837      public int lastIndexIn(CharSequence sequence) {
838        for (int i = sequence.length() - 1; i >= 0; i--) {
839          if (matches(sequence.charAt(i))) {
840            return i;
841          }
842        }
843        return -1;
844      }
845    
846      /**
847       * Returns the number of matching characters found in a character sequence.
848       */
849      public int countIn(CharSequence sequence) {
850        int count = 0;
851        for (int i = 0; i < sequence.length(); i++) {
852          if (matches(sequence.charAt(i))) {
853            count++;
854          }
855        }
856        return count;
857      }
858    
859      /**
860       * Returns a string containing all non-matching characters of a character sequence, in order. For
861       * example: <pre>   {@code
862       *
863       *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
864       *
865       * ... returns {@code "bzr"}.
866       */
867      public String removeFrom(CharSequence sequence) {
868        String string = sequence.toString();
869        int pos = indexIn(string);
870        if (pos == -1) {
871          return string;
872        }
873    
874        char[] chars = string.toCharArray();
875        int spread = 1;
876    
877        // This unusual loop comes from extensive benchmarking
878        OUT: while (true) {
879          pos++;
880          while (true) {
881            if (pos == chars.length) {
882              break OUT;
883            }
884            if (matches(chars[pos])) {
885              break;
886            }
887            chars[pos - spread] = chars[pos];
888            pos++;
889          }
890          spread++;
891        }
892        return new String(chars, 0, pos - spread);
893      }
894    
895      /**
896       * Returns a string containing all matching characters of a character sequence, in order. For
897       * example: <pre>   {@code
898       *
899       *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
900       *
901       * ... returns {@code "aaa"}.
902       */
903      public String retainFrom(CharSequence sequence) {
904        return negate().removeFrom(sequence);
905      }
906    
907      /**
908       * Returns a string copy of the input character sequence, with each character that matches this
909       * matcher replaced by a given replacement character. For example: <pre>   {@code
910       *
911       *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
912       *
913       * ... returns {@code "rodor"}.
914       *
915       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
916       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
917       * character.
918       *
919       * @param sequence the character sequence to replace matching characters in
920       * @param replacement the character to append to the result string in place of each matching
921       *        character in {@code sequence}
922       * @return the new string
923       */
924      public String replaceFrom(CharSequence sequence, char replacement) {
925        String string = sequence.toString();
926        int pos = indexIn(string);
927        if (pos == -1) {
928          return string;
929        }
930        char[] chars = string.toCharArray();
931        chars[pos] = replacement;
932        for (int i = pos + 1; i < chars.length; i++) {
933          if (matches(chars[i])) {
934            chars[i] = replacement;
935          }
936        }
937        return new String(chars);
938      }
939    
940      /**
941       * Returns a string copy of the input character sequence, with each character that matches this
942       * matcher replaced by a given replacement sequence. For example: <pre>   {@code
943       *
944       *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
945       *
946       * ... returns {@code "yoohoo"}.
947       *
948       * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
949       * off calling {@link #replaceFrom(CharSequence, char)} directly.
950       *
951       * @param sequence the character sequence to replace matching characters in
952       * @param replacement the characters to append to the result string in place of each matching
953       *        character in {@code sequence}
954       * @return the new string
955       */
956      public String replaceFrom(CharSequence sequence, CharSequence replacement) {
957        int replacementLen = replacement.length();
958        if (replacementLen == 0) {
959          return removeFrom(sequence);
960        }
961        if (replacementLen == 1) {
962          return replaceFrom(sequence, replacement.charAt(0));
963        }
964    
965        String string = sequence.toString();
966        int pos = indexIn(string);
967        if (pos == -1) {
968          return string;
969        }
970    
971        int len = string.length();
972        StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
973    
974        int oldpos = 0;
975        do {
976          buf.append(string, oldpos, pos);
977          buf.append(replacement);
978          oldpos = pos + 1;
979          pos = indexIn(string, oldpos);
980        } while (pos != -1);
981    
982        buf.append(string, oldpos, len);
983        return buf.toString();
984      }
985    
986      /**
987       * Returns a substring of the input character sequence that omits all characters this matcher
988       * matches from the beginning and from the end of the string. For example: <pre>   {@code
989       *
990       *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
991       *
992       * ... returns {@code "cat"}.
993       *
994       * <p>Note that: <pre>   {@code
995       *
996       *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
997       *
998       * ... is equivalent to {@link String#trim()}.
999       */
1000      public String trimFrom(CharSequence sequence) {
1001        int len = sequence.length();
1002        int first;
1003        int last;
1004    
1005        for (first = 0; first < len; first++) {
1006          if (!matches(sequence.charAt(first))) {
1007            break;
1008          }
1009        }
1010        for (last = len - 1; last > first; last--) {
1011          if (!matches(sequence.charAt(last))) {
1012            break;
1013          }
1014        }
1015    
1016        return sequence.subSequence(first, last + 1).toString();
1017      }
1018    
1019      /**
1020       * Returns a substring of the input character sequence that omits all characters this matcher
1021       * matches from the beginning of the string. For example: <pre> {@code
1022       *
1023       *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1024       *
1025       * ... returns {@code "catbab"}.
1026       */
1027      public String trimLeadingFrom(CharSequence sequence) {
1028        int len = sequence.length();
1029        int first;
1030    
1031        for (first = 0; first < len; first++) {
1032          if (!matches(sequence.charAt(first))) {
1033            break;
1034          }
1035        }
1036    
1037        return sequence.subSequence(first, len).toString();
1038      }
1039    
1040      /**
1041       * Returns a substring of the input character sequence that omits all characters this matcher
1042       * matches from the end of the string. For example: <pre> {@code
1043       *
1044       *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1045       *
1046       * ... returns {@code "abacat"}.
1047       */
1048      public String trimTrailingFrom(CharSequence sequence) {
1049        int len = sequence.length();
1050        int last;
1051    
1052        for (last = len - 1; last >= 0; last--) {
1053          if (!matches(sequence.charAt(last))) {
1054            break;
1055          }
1056        }
1057    
1058        return sequence.subSequence(0, last + 1).toString();
1059      }
1060    
1061      /**
1062       * Returns a string copy of the input character sequence, with each group of consecutive
1063       * characters that match this matcher replaced by a single replacement character. For example:
1064       * <pre>   {@code
1065       *
1066       *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1067       *
1068       * ... returns {@code "b-p-r"}.
1069       *
1070       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1071       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1072       * character.
1073       *
1074       * @param sequence the character sequence to replace matching groups of characters in
1075       * @param replacement the character to append to the result string in place of each group of
1076       *        matching characters in {@code sequence}
1077       * @return the new string
1078       */
1079      public String collapseFrom(CharSequence sequence, char replacement) {
1080        int first = indexIn(sequence);
1081        if (first == -1) {
1082          return sequence.toString();
1083        }
1084    
1085        // TODO(kevinb): see if this implementation can be made faster
1086        StringBuilder builder = new StringBuilder(sequence.length())
1087            .append(sequence.subSequence(0, first))
1088            .append(replacement);
1089        boolean in = true;
1090        for (int i = first + 1; i < sequence.length(); i++) {
1091          char c = sequence.charAt(i);
1092          if (apply(c)) {
1093            if (!in) {
1094              builder.append(replacement);
1095              in = true;
1096            }
1097          } else {
1098            builder.append(c);
1099            in = false;
1100          }
1101        }
1102        return builder.toString();
1103      }
1104    
1105      /**
1106       * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1107       * groups of matching characters at the start or end of the sequence are removed without
1108       * replacement.
1109       */
1110      public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1111        int first = negate().indexIn(sequence);
1112        if (first == -1) {
1113          return ""; // everything matches. nothing's left.
1114        }
1115        StringBuilder builder = new StringBuilder(sequence.length());
1116        boolean inMatchingGroup = false;
1117        for (int i = first; i < sequence.length(); i++) {
1118          char c = sequence.charAt(i);
1119          if (apply(c)) {
1120            inMatchingGroup = true;
1121          } else {
1122            if (inMatchingGroup) {
1123              builder.append(replacement);
1124              inMatchingGroup = false;
1125            }
1126            builder.append(c);
1127          }
1128        }
1129        return builder.toString();
1130      }
1131    
1132      // Predicate interface
1133    
1134      /**
1135       * Returns {@code true} if this matcher matches the given character.
1136       *
1137       * @throws NullPointerException if {@code character} is null
1138       */
1139      @Override public boolean apply(Character character) {
1140        return matches(character);
1141      }
1142    }