Source code

001/*
002 * Copyright (C) 2009 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.net;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.base.Ascii;
023import com.google.common.base.CharMatcher;
024import com.google.common.base.Joiner;
025import com.google.common.base.Optional;
026import com.google.common.base.Splitter;
027import com.google.common.collect.ImmutableList;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.Immutable;
030import com.google.errorprone.annotations.concurrent.LazyInit;
031import com.google.thirdparty.publicsuffix.PublicSuffixPatterns;
032import com.google.thirdparty.publicsuffix.PublicSuffixType;
033import java.util.List;
034import javax.annotation.CheckForNull;
035
036/**
037 * An immutable well-formed internet domain name, such as {@code com} or {@code foo.co.uk}. Only
038 * syntactic analysis is performed; no DNS lookups or other network interactions take place. Thus
039 * there is no guarantee that the domain actually exists on the internet.
040 *
041 * <p>One common use of this class is to determine whether a given string is likely to represent an
042 * addressable domain on the web -- that is, for a candidate string {@code "xxx"}, might browsing to
043 * {@code "http://xxx/"} result in a webpage being displayed? In the past, this test was frequently
044 * done by determining whether the domain ended with a {@linkplain #isPublicSuffix() public suffix}
045 * but was not itself a public suffix. However, this test is no longer accurate. There are many
046 * domains which are both public suffixes and addressable as hosts; {@code "uk.com"} is one example.
047 * Using the subset of public suffixes that are {@linkplain #isRegistrySuffix() registry suffixes},
048 * one can get a better result, as only a few registry suffixes are addressable. However, the most
049 * useful test to determine if a domain is a plausible web host is {@link #hasPublicSuffix()}. This
050 * will return {@code true} for many domains which (currently) are not hosts, such as {@code "com"},
051 * but given that any public suffix may become a host without warning, it is better to err on the
052 * side of permissiveness and thus avoid spurious rejection of valid sites. Of course, to actually
053 * determine addressability of any host, clients of this class will need to perform their own DNS
054 * lookups.
055 *
056 * <p>During construction, names are normalized in two ways:
057 *
058 * <ol>
059 *   <li>ASCII uppercase characters are converted to lowercase.
060 *   <li>Unicode dot separators other than the ASCII period ({@code '.'}) are converted to the ASCII
061 *       period.
062 * </ol>
063 *
064 * <p>The normalized values will be returned from {@link #toString()} and {@link #parts()}, and will
065 * be reflected in the result of {@link #equals(Object)}.
066 *
067 * <p><a href="http://en.wikipedia.org/wiki/Internationalized_domain_name">Internationalized domain
068 * names</a> such as {@code 网络.cn} are supported, as are the equivalent <a
069 * href="http://en.wikipedia.org/wiki/Internationalized_domain_name">IDNA Punycode-encoded</a>
070 * versions.
071 *
072 * @author Catherine Berry
073 * @since 5.0
074 */
075@GwtCompatible(emulated = true)
076@Immutable
077@ElementTypesAreNonnullByDefault
078public final class InternetDomainName {
079
080  private static final CharMatcher DOTS_MATCHER = CharMatcher.anyOf(".\u3002\uFF0E\uFF61");
081  private static final Splitter DOT_SPLITTER = Splitter.on('.');
082  private static final Joiner DOT_JOINER = Joiner.on('.');
083
084  /**
085   * Value of {@link #publicSuffixIndex()} or {@link #registrySuffixIndex()} which indicates that no
086   * relevant suffix was found.
087   */
088  private static final int NO_SUFFIX_FOUND = -1;
089
090  /**
091   * Value of {@link #publicSuffixIndexCache} or {@link #registrySuffixIndexCache} which indicates
092   * that they were not initialized yet.
093   */
094  private static final int SUFFIX_NOT_INITIALIZED = -2;
095
096  /**
097   * Maximum parts (labels) in a domain name. This value arises from the 255-octet limit described
098   * in <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11 with the fact that the
099   * encoding of each part occupies at least two bytes (dot plus label externally, length byte plus
100   * label internally). Thus, if all labels have the minimum size of one byte, 127 of them will fit.
101   */
102  private static final int MAX_PARTS = 127;
103
104  /**
105   * Maximum length of a full domain name, including separators, and leaving room for the root
106   * label. See <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11.
107   */
108  private static final int MAX_LENGTH = 253;
109
110  /**
111   * Maximum size of a single part of a domain name. See <a
112   * href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11.
113   */
114  private static final int MAX_DOMAIN_PART_LENGTH = 63;
115
116  /** The full domain name, converted to lower case. */
117  private final String name;
118
119  /** The parts of the domain name, converted to lower case. */
120  private final ImmutableList<String> parts;
121
122  /**
123   * Cached value of #publicSuffixIndex(). Do not use directly.
124   *
125   * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across
126   * threads before it is initialized, then each thread is likely to compute their own copy of the
127   * value.
128   */
129  @SuppressWarnings("Immutable")
130  @LazyInit
131  private int publicSuffixIndexCache = SUFFIX_NOT_INITIALIZED;
132
133  /**
134   * Cached value of #registrySuffixIndex(). Do not use directly.
135   *
136   * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across
137   * threads before it is initialized, then each thread is likely to compute their own copy of the
138   * value.
139   */
140  @SuppressWarnings("Immutable")
141  @LazyInit
142  private int registrySuffixIndexCache = SUFFIX_NOT_INITIALIZED;
143
144  /** Constructor used to implement {@link #from(String)}, and from subclasses. */
145  InternetDomainName(String name) {
146    // Normalize:
147    // * ASCII characters to lowercase
148    // * All dot-like characters to '.'
149    // * Strip trailing '.'
150
151    name = Ascii.toLowerCase(DOTS_MATCHER.replaceFrom(name, '.'));
152
153    if (name.endsWith(".")) {
154      name = name.substring(0, name.length() - 1);
155    }
156
157    checkArgument(name.length() <= MAX_LENGTH, "Domain name too long: '%s':", name);
158    this.name = name;
159
160    this.parts = ImmutableList.copyOf(DOT_SPLITTER.split(name));
161    checkArgument(parts.size() <= MAX_PARTS, "Domain has too many parts: '%s'", name);
162    checkArgument(validateSyntax(parts), "Not a valid domain name: '%s'", name);
163  }
164
165  /**
166   * Internal constructor that skips validations when creating an instance from parts of an
167   * already-validated InternetDomainName, as in {@link ancestor}.
168   */
169  private InternetDomainName(String name, ImmutableList<String> parts) {
170    checkArgument(!parts.isEmpty(), "Cannot create an InternetDomainName with zero parts.");
171    this.name = name;
172    this.parts = parts;
173  }
174
175  /**
176   * The index in the {@link #parts()} list at which the public suffix begins. For example, for the
177   * domain name {@code myblog.blogspot.co.uk}, the value would be 1 (the index of the {@code
178   * blogspot} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no public
179   * suffix was found.
180   */
181  private int publicSuffixIndex() {
182    int publicSuffixIndexLocal = publicSuffixIndexCache;
183    if (publicSuffixIndexLocal == SUFFIX_NOT_INITIALIZED) {
184      publicSuffixIndexCache =
185          publicSuffixIndexLocal = findSuffixOfType(Optional.<PublicSuffixType>absent());
186    }
187    return publicSuffixIndexLocal;
188  }
189
190  /**
191   * The index in the {@link #parts()} list at which the registry suffix begins. For example, for
192   * the domain name {@code myblog.blogspot.co.uk}, the value would be 2 (the index of the {@code
193   * co} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no registry suffix
194   * was found.
195   */
196  private int registrySuffixIndex() {
197    int registrySuffixIndexLocal = registrySuffixIndexCache;
198    if (registrySuffixIndexLocal == SUFFIX_NOT_INITIALIZED) {
199      registrySuffixIndexCache =
200          registrySuffixIndexLocal = findSuffixOfType(Optional.of(PublicSuffixType.REGISTRY));
201    }
202    return registrySuffixIndexLocal;
203  }
204
205  /**
206   * Returns the index of the leftmost part of the suffix, or -1 if not found. Note that the value
207   * defined as a suffix may not produce {@code true} results from {@link #isPublicSuffix()} or
208   * {@link #isRegistrySuffix()} if the domain ends with an excluded domain pattern such as {@code
209   * "nhs.uk"}.
210   *
211   * <p>If a {@code desiredType} is specified, this method only finds suffixes of the given type.
212   * Otherwise, it finds the first suffix of any type.
213   */
214  private int findSuffixOfType(Optional<PublicSuffixType> desiredType) {
215    int partsSize = parts.size();
216
217    for (int i = 0; i < partsSize; i++) {
218      String ancestorName = DOT_JOINER.join(parts.subList(i, partsSize));
219
220      if (i > 0
221          && matchesType(
222              desiredType, Optional.fromNullable(PublicSuffixPatterns.UNDER.get(ancestorName)))) {
223        return i - 1;
224      }
225
226      if (matchesType(
227          desiredType, Optional.fromNullable(PublicSuffixPatterns.EXACT.get(ancestorName)))) {
228        return i;
229      }
230
231      // Excluded domains (e.g. !nhs.uk) use the next highest
232      // domain as the effective public suffix (e.g. uk).
233
234      if (PublicSuffixPatterns.EXCLUDED.containsKey(ancestorName)) {
235        return i + 1;
236      }
237    }
238
239    return NO_SUFFIX_FOUND;
240  }
241
242  /**
243   * Returns an instance of {@link InternetDomainName} after lenient validation. Specifically,
244   * validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>
245   * ("Internationalizing Domain Names in Applications") is skipped, while validation against <a
246   * href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a> is relaxed in the following ways:
247   *
248   * <ul>
249   *   <li>Any part containing non-ASCII characters is considered valid.
250   *   <li>Underscores ('_') are permitted wherever dashes ('-') are permitted.
251   *   <li>Parts other than the final part may start with a digit, as mandated by <a
252   *       href="https://tools.ietf.org/html/rfc1123#section-2">RFC 1123</a>.
253   * </ul>
254   *
255   * @param domain A domain name (not IP address)
256   * @throws IllegalArgumentException if {@code domain} is not syntactically valid according to
257   *     {@link #isValid}
258   * @since 10.0 (previously named {@code fromLenient})
259   */
260  @CanIgnoreReturnValue // TODO(b/219820829): consider removing
261  public static InternetDomainName from(String domain) {
262    return new InternetDomainName(checkNotNull(domain));
263  }
264
265  /**
266   * Validation method used by {@code from} to ensure that the domain name is syntactically valid
267   * according to RFC 1035.
268   *
269   * @return Is the domain name syntactically valid?
270   */
271  private static boolean validateSyntax(List<String> parts) {
272    int lastIndex = parts.size() - 1;
273
274    // Validate the last part specially, as it has different syntax rules.
275
276    if (!validatePart(parts.get(lastIndex), true)) {
277      return false;
278    }
279
280    for (int i = 0; i < lastIndex; i++) {
281      String part = parts.get(i);
282      if (!validatePart(part, false)) {
283        return false;
284      }
285    }
286
287    return true;
288  }
289
290  private static final CharMatcher DASH_MATCHER = CharMatcher.anyOf("-_");
291
292  private static final CharMatcher DIGIT_MATCHER = CharMatcher.inRange('0', '9');
293
294  private static final CharMatcher LETTER_MATCHER =
295      CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z'));
296
297  private static final CharMatcher PART_CHAR_MATCHER =
298      DIGIT_MATCHER.or(LETTER_MATCHER).or(DASH_MATCHER);
299
300  /**
301   * Helper method for {@link #validateSyntax(List)}. Validates that one part of a domain name is
302   * valid.
303   *
304   * @param part The domain name part to be validated
305   * @param isFinalPart Is this the final (rightmost) domain part?
306   * @return Whether the part is valid
307   */
308  private static boolean validatePart(String part, boolean isFinalPart) {
309
310    // These tests could be collapsed into one big boolean expression, but
311    // they have been left as independent tests for clarity.
312
313    if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) {
314      return false;
315    }
316
317    /*
318     * GWT claims to support java.lang.Character's char-classification methods, but it actually only
319     * works for ASCII. So for now, assume any non-ASCII characters are valid. The only place this
320     * seems to be documented is here:
321     * https://groups.google.com/d/topic/google-web-toolkit-contributors/1UEzsryq1XI
322     *
323     * <p>ASCII characters in the part are expected to be valid per RFC 1035, with underscore also
324     * being allowed due to widespread practice.
325     */
326
327    String asciiChars = CharMatcher.ascii().retainFrom(part);
328
329    if (!PART_CHAR_MATCHER.matchesAllOf(asciiChars)) {
330      return false;
331    }
332
333    // No initial or final dashes or underscores.
334
335    if (DASH_MATCHER.matches(part.charAt(0))
336        || DASH_MATCHER.matches(part.charAt(part.length() - 1))) {
337      return false;
338    }
339
340    /*
341     * Note that we allow (in contravention of a strict interpretation of the relevant RFCs) domain
342     * parts other than the last may begin with a digit (for example, "3com.com"). It's important to
343     * disallow an initial digit in the last part; it's the only thing that stops an IPv4 numeric
344     * address like 127.0.0.1 from looking like a valid domain name.
345     */
346
347    if (isFinalPart && DIGIT_MATCHER.matches(part.charAt(0))) {
348      return false;
349    }
350
351    return true;
352  }
353
354  /**
355   * Returns the individual components of this domain name, normalized to all lower case. For
356   * example, for the domain name {@code mail.google.com}, this method returns the list {@code
357   * ["mail", "google", "com"]}.
358   */
359  public ImmutableList<String> parts() {
360    return parts;
361  }
362
363  /**
364   * Indicates whether this domain name represents a <i>public suffix</i>, as defined by the Mozilla
365   * Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A public suffix
366   * is one under which Internet users can directly register names, such as {@code com}, {@code
367   * co.uk} or {@code pvt.k12.wy.us}. Examples of domain names that are <i>not</i> public suffixes
368   * include {@code google.com}, {@code foo.co.uk}, and {@code myblog.blogspot.com}.
369   *
370   * <p>Public suffixes are a proper superset of {@linkplain #isRegistrySuffix() registry suffixes}.
371   * The list of public suffixes additionally contains privately owned domain names under which
372   * Internet users can register subdomains. An example of a public suffix that is not a registry
373   * suffix is {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i>
374   * registry suffixes, since domain name registries collectively control all internet domain names.
375   *
376   * <p>For considerations on whether the public suffix or registry suffix designation is more
377   * suitable for your application, see <a
378   * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>.
379   *
380   * @return {@code true} if this domain name appears exactly on the public suffix list
381   * @since 6.0
382   */
383  public boolean isPublicSuffix() {
384    return publicSuffixIndex() == 0;
385  }
386
387  /**
388   * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix},
389   * including if it is a public suffix itself. For example, returns {@code true} for {@code
390   * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code
391   * google.invalid}. This is the recommended method for determining whether a domain is potentially
392   * an addressable host.
393   *
394   * <p>Note that this method is equivalent to {@link #hasRegistrySuffix()} because all registry
395   * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes.
396   *
397   * @since 6.0
398   */
399  public boolean hasPublicSuffix() {
400    return publicSuffixIndex() != NO_SUFFIX_FOUND;
401  }
402
403  /**
404   * Returns the {@linkplain #isPublicSuffix() public suffix} portion of the domain name, or {@code
405   * null} if no public suffix is present.
406   *
407   * @since 6.0
408   */
409  @CheckForNull
410  public InternetDomainName publicSuffix() {
411    return hasPublicSuffix() ? ancestor(publicSuffixIndex()) : null;
412  }
413
414  /**
415   * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix},
416   * while not being a public suffix itself. For example, returns {@code true} for {@code
417   * www.google.com}, {@code foo.co.uk} and {@code myblog.blogspot.com}, but not for {@code com},
418   * {@code co.uk}, {@code google.invalid}, or {@code blogspot.com}.
419   *
420   * <p>This method can be used to determine whether it will probably be possible to set cookies on
421   * the domain, though even that depends on individual browsers' implementations of cookie
422   * controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details.
423   *
424   * @since 6.0
425   */
426  public boolean isUnderPublicSuffix() {
427    return publicSuffixIndex() > 0;
428  }
429
430  /**
431   * Indicates whether this domain name is composed of exactly one subdomain component followed by a
432   * {@linkplain #isPublicSuffix() public suffix}. For example, returns {@code true} for {@code
433   * google.com} {@code foo.co.uk}, and {@code myblog.blogspot.com}, but not for {@code
434   * www.google.com}, {@code co.uk}, or {@code blogspot.com}.
435   *
436   * <p>This method can be used to determine whether a domain is probably the highest level for
437   * which cookies may be set, though even that depends on individual browsers' implementations of
438   * cookie controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details.
439   *
440   * @since 6.0
441   */
442  public boolean isTopPrivateDomain() {
443    return publicSuffixIndex() == 1;
444  }
445
446  /**
447   * Returns the portion of this domain name that is one level beneath the {@linkplain
448   * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns
449   * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code
450   * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code
451   * blogspot.com} is a public suffix.
452   *
453   * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned.
454   *
455   * <p>This method can be used to determine the probable highest level parent domain for which
456   * cookies may be set, though even that depends on individual browsers' implementations of cookie
457   * controls.
458   *
459   * @throws IllegalStateException if this domain does not end with a public suffix
460   * @since 6.0
461   */
462  public InternetDomainName topPrivateDomain() {
463    if (isTopPrivateDomain()) {
464      return this;
465    }
466    checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name);
467    return ancestor(publicSuffixIndex() - 1);
468  }
469
470  /**
471   * Indicates whether this domain name represents a <i>registry suffix</i>, as defined by a subset
472   * of the Mozilla Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A
473   * registry suffix is one under which Internet users can directly register names via a domain name
474   * registrar, and have such registrations lawfully protected by internet-governing bodies such as
475   * ICANN. Examples of registry suffixes include {@code com}, {@code co.uk}, and {@code
476   * pvt.k12.wy.us}. Examples of domain names that are <i>not</i> registry suffixes include {@code
477   * google.com} and {@code foo.co.uk}.
478   *
479   * <p>Registry suffixes are a proper subset of {@linkplain #isPublicSuffix() public suffixes}. The
480   * list of public suffixes additionally contains privately owned domain names under which Internet
481   * users can register subdomains. An example of a public suffix that is not a registry suffix is
482   * {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> registry
483   * suffixes, since domain name registries collectively control all internet domain names.
484   *
485   * <p>For considerations on whether the public suffix or registry suffix designation is more
486   * suitable for your application, see <a
487   * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>.
488   *
489   * @return {@code true} if this domain name appears exactly on the public suffix list as part of
490   *     the registry suffix section (labelled "ICANN").
491   * @since 23.3
492   */
493  public boolean isRegistrySuffix() {
494    return registrySuffixIndex() == 0;
495  }
496
497  /**
498   * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix},
499   * including if it is a registry suffix itself. For example, returns {@code true} for {@code
500   * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code
501   * google.invalid}.
502   *
503   * <p>Note that this method is equivalent to {@link #hasPublicSuffix()} because all registry
504   * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes.
505   *
506   * @since 23.3
507   */
508  public boolean hasRegistrySuffix() {
509    return registrySuffixIndex() != NO_SUFFIX_FOUND;
510  }
511
512  /**
513   * Returns the {@linkplain #isRegistrySuffix() registry suffix} portion of the domain name, or
514   * {@code null} if no registry suffix is present.
515   *
516   * @since 23.3
517   */
518  @CheckForNull
519  public InternetDomainName registrySuffix() {
520    return hasRegistrySuffix() ? ancestor(registrySuffixIndex()) : null;
521  }
522
523  /**
524   * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix},
525   * while not being a registry suffix itself. For example, returns {@code true} for {@code
526   * www.google.com}, {@code foo.co.uk} and {@code blogspot.com}, but not for {@code com}, {@code
527   * co.uk}, or {@code google.invalid}.
528   *
529   * @since 23.3
530   */
531  public boolean isUnderRegistrySuffix() {
532    return registrySuffixIndex() > 0;
533  }
534
535  /**
536   * Indicates whether this domain name is composed of exactly one subdomain component followed by a
537   * {@linkplain #isRegistrySuffix() registry suffix}. For example, returns {@code true} for {@code
538   * google.com}, {@code foo.co.uk}, and {@code blogspot.com}, but not for {@code www.google.com},
539   * {@code co.uk}, or {@code myblog.blogspot.com}.
540   *
541   * <p><b>Warning:</b> This method should not be used to determine the probable highest level
542   * parent domain for which cookies may be set. Use {@link #topPrivateDomain()} for that purpose.
543   *
544   * @since 23.3
545   */
546  public boolean isTopDomainUnderRegistrySuffix() {
547    return registrySuffixIndex() == 1;
548  }
549
550  /**
551   * Returns the portion of this domain name that is one level beneath the {@linkplain
552   * #isRegistrySuffix() registry suffix}. For example, for {@code x.adwords.google.co.uk} it
553   * returns {@code google.co.uk}, since {@code co.uk} is a registry suffix. Similarly, for {@code
554   * myblog.blogspot.com} it returns {@code blogspot.com}, since {@code com} is a registry suffix.
555   *
556   * <p>If {@link #isTopDomainUnderRegistrySuffix()} is true, the current domain name instance is
557   * returned.
558   *
559   * <p><b>Warning:</b> This method should not be used to determine whether a domain is probably the
560   * highest level for which cookies may be set. Use {@link #isTopPrivateDomain()} for that purpose.
561   *
562   * @throws IllegalStateException if this domain does not end with a registry suffix
563   * @since 23.3
564   */
565  public InternetDomainName topDomainUnderRegistrySuffix() {
566    if (isTopDomainUnderRegistrySuffix()) {
567      return this;
568    }
569    checkState(isUnderRegistrySuffix(), "Not under a registry suffix: %s", name);
570    return ancestor(registrySuffixIndex() - 1);
571  }
572
573  /** Indicates whether this domain is composed of two or more parts. */
574  public boolean hasParent() {
575    return parts.size() > 1;
576  }
577
578  /**
579   * Returns an {@code InternetDomainName} that is the immediate ancestor of this one; that is, the
580   * current domain with the leftmost part removed. For example, the parent of {@code
581   * www.google.com} is {@code google.com}.
582   *
583   * @throws IllegalStateException if the domain has no parent, as determined by {@link #hasParent}
584   */
585  public InternetDomainName parent() {
586    checkState(hasParent(), "Domain '%s' has no parent", name);
587    return ancestor(1);
588  }
589
590  /**
591   * Returns the ancestor of the current domain at the given number of levels "higher" (rightward)
592   * in the subdomain list. The number of levels must be non-negative, and less than {@code N-1},
593   * where {@code N} is the number of parts in the domain.
594   *
595   * <p>TODO: Reasonable candidate for addition to public API.
596   */
597  private InternetDomainName ancestor(int levels) {
598    ImmutableList<String> ancestorParts = parts.subList(levels, parts.size());
599
600    // levels equals the number of dots that are getting clipped away, then add the length of each
601    // clipped part to get the length of the leading substring that is being removed.
602    int substringFrom = levels;
603    for (int i = 0; i < levels; i++) {
604      substringFrom += parts.get(i).length();
605    }
606    String ancestorName = name.substring(substringFrom);
607
608    return new InternetDomainName(ancestorName, ancestorParts);
609  }
610
611  /**
612   * Creates and returns a new {@code InternetDomainName} by prepending the argument and a dot to
613   * the current name. For example, {@code InternetDomainName.from("foo.com").child("www.bar")}
614   * returns a new {@code InternetDomainName} with the value {@code www.bar.foo.com}. Only lenient
615   * validation is performed, as described {@link #from(String) here}.
616   *
617   * @throws NullPointerException if leftParts is null
618   * @throws IllegalArgumentException if the resulting name is not valid
619   */
620  public InternetDomainName child(String leftParts) {
621    return from(checkNotNull(leftParts) + "." + name);
622  }
623
624  /**
625   * Indicates whether the argument is a syntactically valid domain name using lenient validation.
626   * Specifically, validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>
627   * ("Internationalizing Domain Names in Applications") is skipped.
628   *
629   * <p>The following two code snippets are equivalent:
630   *
631   * <pre>{@code
632   * domainName = InternetDomainName.isValid(name)
633   *     ? InternetDomainName.from(name)
634   *     : DEFAULT_DOMAIN;
635   * }</pre>
636   *
637   * <pre>{@code
638   * try {
639   *   domainName = InternetDomainName.from(name);
640   * } catch (IllegalArgumentException e) {
641   *   domainName = DEFAULT_DOMAIN;
642   * }
643   * }</pre>
644   *
645   * @since 8.0 (previously named {@code isValidLenient})
646   */
647  public static boolean isValid(String name) {
648    try {
649      InternetDomainName unused = from(name);
650      return true;
651    } catch (IllegalArgumentException e) {
652      return false;
653    }
654  }
655
656  /**
657   * If a {@code desiredType} is specified, returns true only if the {@code actualType} is
658   * identical. Otherwise, returns true as long as {@code actualType} is present.
659   */
660  private static boolean matchesType(
661      Optional<PublicSuffixType> desiredType, Optional<PublicSuffixType> actualType) {
662    return desiredType.isPresent() ? desiredType.equals(actualType) : actualType.isPresent();
663  }
664
665  /** Returns the domain name, normalized to all lower case. */
666  @Override
667  public String toString() {
668    return name;
669  }
670
671  /**
672   * Equality testing is based on the text supplied by the caller, after normalization as described
673   * in the class documentation. For example, a non-ASCII Unicode domain name and the Punycode
674   * version of the same domain name would not be considered equal.
675   */
676  @Override
677  public boolean equals(@CheckForNull Object object) {
678    if (object == this) {
679      return true;
680    }
681
682    if (object instanceof InternetDomainName) {
683      InternetDomainName that = (InternetDomainName) object;
684      return this.name.equals(that.name);
685    }
686
687    return false;
688  }
689
690  @Override
691  public int hashCode() {
692    return name.hashCode();
693  }
694}