001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.net; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020 021import com.google.common.annotations.GwtCompatible; 022import com.google.common.base.Ascii; 023import com.google.common.base.CharMatcher; 024import com.google.common.base.Joiner; 025import com.google.common.base.Optional; 026import com.google.common.base.Splitter; 027import com.google.common.collect.ImmutableList; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.Immutable; 030import com.google.errorprone.annotations.concurrent.LazyInit; 031import com.google.thirdparty.publicsuffix.PublicSuffixPatterns; 032import com.google.thirdparty.publicsuffix.PublicSuffixType; 033import java.util.List; 034import javax.annotation.CheckForNull; 035 036/** 037 * An immutable well-formed internet domain name, such as {@code com} or {@code foo.co.uk}. Only 038 * syntactic analysis is performed; no DNS lookups or other network interactions take place. Thus 039 * there is no guarantee that the domain actually exists on the internet. 040 * 041 * <p>One common use of this class is to determine whether a given string is likely to represent an 042 * addressable domain on the web -- that is, for a candidate string {@code "xxx"}, might browsing to 043 * {@code "http://xxx/"} result in a webpage being displayed? In the past, this test was frequently 044 * done by determining whether the domain ended with a {@linkplain #isPublicSuffix() public suffix} 045 * but was not itself a public suffix. However, this test is no longer accurate. There are many 046 * domains which are both public suffixes and addressable as hosts; {@code "uk.com"} is one example. 047 * Using the subset of public suffixes that are {@linkplain #isRegistrySuffix() registry suffixes}, 048 * one can get a better result, as only a few registry suffixes are addressable. However, the most 049 * useful test to determine if a domain is a plausible web host is {@link #hasPublicSuffix()}. This 050 * will return {@code true} for many domains which (currently) are not hosts, such as {@code "com"}, 051 * but given that any public suffix may become a host without warning, it is better to err on the 052 * side of permissiveness and thus avoid spurious rejection of valid sites. Of course, to actually 053 * determine addressability of any host, clients of this class will need to perform their own DNS 054 * lookups. 055 * 056 * <p>During construction, names are normalized in two ways: 057 * 058 * <ol> 059 * <li>ASCII uppercase characters are converted to lowercase. 060 * <li>Unicode dot separators other than the ASCII period ({@code '.'}) are converted to the ASCII 061 * period. 062 * </ol> 063 * 064 * <p>The normalized values will be returned from {@link #toString()} and {@link #parts()}, and will 065 * be reflected in the result of {@link #equals(Object)}. 066 * 067 * <p><a href="http://en.wikipedia.org/wiki/Internationalized_domain_name">Internationalized domain 068 * names</a> such as {@code 网络.cn} are supported, as are the equivalent <a 069 * href="http://en.wikipedia.org/wiki/Internationalized_domain_name">IDNA Punycode-encoded</a> 070 * versions. 071 * 072 * @author Catherine Berry 073 * @since 5.0 074 */ 075@GwtCompatible(emulated = true) 076@Immutable 077@ElementTypesAreNonnullByDefault 078public final class InternetDomainName { 079 080 private static final CharMatcher DOTS_MATCHER = CharMatcher.anyOf(".\u3002\uFF0E\uFF61"); 081 private static final Splitter DOT_SPLITTER = Splitter.on('.'); 082 private static final Joiner DOT_JOINER = Joiner.on('.'); 083 084 /** 085 * Value of {@link #publicSuffixIndex()} or {@link #registrySuffixIndex()} which indicates that no 086 * relevant suffix was found. 087 */ 088 private static final int NO_SUFFIX_FOUND = -1; 089 090 /** 091 * Value of {@link #publicSuffixIndexCache} or {@link #registrySuffixIndexCache} which indicates 092 * that they were not initialized yet. 093 */ 094 private static final int SUFFIX_NOT_INITIALIZED = -2; 095 096 /** 097 * Maximum parts (labels) in a domain name. This value arises from the 255-octet limit described 098 * in <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11 with the fact that the 099 * encoding of each part occupies at least two bytes (dot plus label externally, length byte plus 100 * label internally). Thus, if all labels have the minimum size of one byte, 127 of them will fit. 101 */ 102 private static final int MAX_PARTS = 127; 103 104 /** 105 * Maximum length of a full domain name, including separators, and leaving room for the root 106 * label. See <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 107 */ 108 private static final int MAX_LENGTH = 253; 109 110 /** 111 * Maximum size of a single part of a domain name. See <a 112 * href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 113 */ 114 private static final int MAX_DOMAIN_PART_LENGTH = 63; 115 116 /** The full domain name, converted to lower case. */ 117 private final String name; 118 119 /** The parts of the domain name, converted to lower case. */ 120 private final ImmutableList<String> parts; 121 122 /** 123 * Cached value of #publicSuffixIndex(). Do not use directly. 124 * 125 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 126 * threads before it is initialized, then each thread is likely to compute their own copy of the 127 * value. 128 */ 129 @SuppressWarnings("Immutable") 130 @LazyInit 131 private int publicSuffixIndexCache = SUFFIX_NOT_INITIALIZED; 132 133 /** 134 * Cached value of #registrySuffixIndex(). Do not use directly. 135 * 136 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 137 * threads before it is initialized, then each thread is likely to compute their own copy of the 138 * value. 139 */ 140 @SuppressWarnings("Immutable") 141 @LazyInit 142 private int registrySuffixIndexCache = SUFFIX_NOT_INITIALIZED; 143 144 /** Constructor used to implement {@link #from(String)}, and from subclasses. */ 145 InternetDomainName(String name) { 146 // Normalize: 147 // * ASCII characters to lowercase 148 // * All dot-like characters to '.' 149 // * Strip trailing '.' 150 151 name = Ascii.toLowerCase(DOTS_MATCHER.replaceFrom(name, '.')); 152 153 if (name.endsWith(".")) { 154 name = name.substring(0, name.length() - 1); 155 } 156 157 checkArgument(name.length() <= MAX_LENGTH, "Domain name too long: '%s':", name); 158 this.name = name; 159 160 this.parts = ImmutableList.copyOf(DOT_SPLITTER.split(name)); 161 checkArgument(parts.size() <= MAX_PARTS, "Domain has too many parts: '%s'", name); 162 checkArgument(validateSyntax(parts), "Not a valid domain name: '%s'", name); 163 } 164 165 /** 166 * Internal constructor that skips validations when creating an instance from parts of an 167 * already-validated InternetDomainName, as in {@link ancestor}. 168 */ 169 private InternetDomainName(String name, ImmutableList<String> parts) { 170 checkArgument(!parts.isEmpty(), "Cannot create an InternetDomainName with zero parts."); 171 this.name = name; 172 this.parts = parts; 173 } 174 175 /** 176 * The index in the {@link #parts()} list at which the public suffix begins. For example, for the 177 * domain name {@code myblog.blogspot.co.uk}, the value would be 1 (the index of the {@code 178 * blogspot} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no public 179 * suffix was found. 180 */ 181 private int publicSuffixIndex() { 182 int publicSuffixIndexLocal = publicSuffixIndexCache; 183 if (publicSuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 184 publicSuffixIndexCache = 185 publicSuffixIndexLocal = findSuffixOfType(Optional.<PublicSuffixType>absent()); 186 } 187 return publicSuffixIndexLocal; 188 } 189 190 /** 191 * The index in the {@link #parts()} list at which the registry suffix begins. For example, for 192 * the domain name {@code myblog.blogspot.co.uk}, the value would be 2 (the index of the {@code 193 * co} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no registry suffix 194 * was found. 195 */ 196 private int registrySuffixIndex() { 197 int registrySuffixIndexLocal = registrySuffixIndexCache; 198 if (registrySuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 199 registrySuffixIndexCache = 200 registrySuffixIndexLocal = findSuffixOfType(Optional.of(PublicSuffixType.REGISTRY)); 201 } 202 return registrySuffixIndexLocal; 203 } 204 205 /** 206 * Returns the index of the leftmost part of the suffix, or -1 if not found. Note that the value 207 * defined as a suffix may not produce {@code true} results from {@link #isPublicSuffix()} or 208 * {@link #isRegistrySuffix()} if the domain ends with an excluded domain pattern such as {@code 209 * "nhs.uk"}. 210 * 211 * <p>If a {@code desiredType} is specified, this method only finds suffixes of the given type. 212 * Otherwise, it finds the first suffix of any type. 213 */ 214 private int findSuffixOfType(Optional<PublicSuffixType> desiredType) { 215 int partsSize = parts.size(); 216 217 for (int i = 0; i < partsSize; i++) { 218 String ancestorName = DOT_JOINER.join(parts.subList(i, partsSize)); 219 220 if (i > 0 221 && matchesType( 222 desiredType, Optional.fromNullable(PublicSuffixPatterns.UNDER.get(ancestorName)))) { 223 return i - 1; 224 } 225 226 if (matchesType( 227 desiredType, Optional.fromNullable(PublicSuffixPatterns.EXACT.get(ancestorName)))) { 228 return i; 229 } 230 231 // Excluded domains (e.g. !nhs.uk) use the next highest 232 // domain as the effective public suffix (e.g. uk). 233 234 if (PublicSuffixPatterns.EXCLUDED.containsKey(ancestorName)) { 235 return i + 1; 236 } 237 } 238 239 return NO_SUFFIX_FOUND; 240 } 241 242 /** 243 * Returns an instance of {@link InternetDomainName} after lenient validation. Specifically, 244 * validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 245 * ("Internationalizing Domain Names in Applications") is skipped, while validation against <a 246 * href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a> is relaxed in the following ways: 247 * 248 * <ul> 249 * <li>Any part containing non-ASCII characters is considered valid. 250 * <li>Underscores ('_') are permitted wherever dashes ('-') are permitted. 251 * <li>Parts other than the final part may start with a digit, as mandated by <a 252 * href="https://tools.ietf.org/html/rfc1123#section-2">RFC 1123</a>. 253 * </ul> 254 * 255 * @param domain A domain name (not IP address) 256 * @throws IllegalArgumentException if {@code domain} is not syntactically valid according to 257 * {@link #isValid} 258 * @since 10.0 (previously named {@code fromLenient}) 259 */ 260 @CanIgnoreReturnValue // TODO(b/219820829): consider removing 261 public static InternetDomainName from(String domain) { 262 return new InternetDomainName(checkNotNull(domain)); 263 } 264 265 /** 266 * Validation method used by {@code from} to ensure that the domain name is syntactically valid 267 * according to RFC 1035. 268 * 269 * @return Is the domain name syntactically valid? 270 */ 271 private static boolean validateSyntax(List<String> parts) { 272 int lastIndex = parts.size() - 1; 273 274 // Validate the last part specially, as it has different syntax rules. 275 276 if (!validatePart(parts.get(lastIndex), true)) { 277 return false; 278 } 279 280 for (int i = 0; i < lastIndex; i++) { 281 String part = parts.get(i); 282 if (!validatePart(part, false)) { 283 return false; 284 } 285 } 286 287 return true; 288 } 289 290 private static final CharMatcher DASH_MATCHER = CharMatcher.anyOf("-_"); 291 292 private static final CharMatcher DIGIT_MATCHER = CharMatcher.inRange('0', '9'); 293 294 private static final CharMatcher LETTER_MATCHER = 295 CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z')); 296 297 private static final CharMatcher PART_CHAR_MATCHER = 298 DIGIT_MATCHER.or(LETTER_MATCHER).or(DASH_MATCHER); 299 300 /** 301 * Helper method for {@link #validateSyntax(List)}. Validates that one part of a domain name is 302 * valid. 303 * 304 * @param part The domain name part to be validated 305 * @param isFinalPart Is this the final (rightmost) domain part? 306 * @return Whether the part is valid 307 */ 308 private static boolean validatePart(String part, boolean isFinalPart) { 309 310 // These tests could be collapsed into one big boolean expression, but 311 // they have been left as independent tests for clarity. 312 313 if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) { 314 return false; 315 } 316 317 /* 318 * GWT claims to support java.lang.Character's char-classification methods, but it actually only 319 * works for ASCII. So for now, assume any non-ASCII characters are valid. The only place this 320 * seems to be documented is here: 321 * https://groups.google.com/d/topic/google-web-toolkit-contributors/1UEzsryq1XI 322 * 323 * <p>ASCII characters in the part are expected to be valid per RFC 1035, with underscore also 324 * being allowed due to widespread practice. 325 */ 326 327 String asciiChars = CharMatcher.ascii().retainFrom(part); 328 329 if (!PART_CHAR_MATCHER.matchesAllOf(asciiChars)) { 330 return false; 331 } 332 333 // No initial or final dashes or underscores. 334 335 if (DASH_MATCHER.matches(part.charAt(0)) 336 || DASH_MATCHER.matches(part.charAt(part.length() - 1))) { 337 return false; 338 } 339 340 /* 341 * Note that we allow (in contravention of a strict interpretation of the relevant RFCs) domain 342 * parts other than the last may begin with a digit (for example, "3com.com"). It's important to 343 * disallow an initial digit in the last part; it's the only thing that stops an IPv4 numeric 344 * address like 127.0.0.1 from looking like a valid domain name. 345 */ 346 347 if (isFinalPart && DIGIT_MATCHER.matches(part.charAt(0))) { 348 return false; 349 } 350 351 return true; 352 } 353 354 /** 355 * Returns the individual components of this domain name, normalized to all lower case. For 356 * example, for the domain name {@code mail.google.com}, this method returns the list {@code 357 * ["mail", "google", "com"]}. 358 */ 359 public ImmutableList<String> parts() { 360 return parts; 361 } 362 363 /** 364 * Indicates whether this domain name represents a <i>public suffix</i>, as defined by the Mozilla 365 * Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A public suffix 366 * is one under which Internet users can directly register names, such as {@code com}, {@code 367 * co.uk} or {@code pvt.k12.wy.us}. Examples of domain names that are <i>not</i> public suffixes 368 * include {@code google.com}, {@code foo.co.uk}, and {@code myblog.blogspot.com}. 369 * 370 * <p>Public suffixes are a proper superset of {@linkplain #isRegistrySuffix() registry suffixes}. 371 * The list of public suffixes additionally contains privately owned domain names under which 372 * Internet users can register subdomains. An example of a public suffix that is not a registry 373 * suffix is {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> 374 * registry suffixes, since domain name registries collectively control all internet domain names. 375 * 376 * <p>For considerations on whether the public suffix or registry suffix designation is more 377 * suitable for your application, see <a 378 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 379 * 380 * @return {@code true} if this domain name appears exactly on the public suffix list 381 * @since 6.0 382 */ 383 public boolean isPublicSuffix() { 384 return publicSuffixIndex() == 0; 385 } 386 387 /** 388 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 389 * including if it is a public suffix itself. For example, returns {@code true} for {@code 390 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 391 * google.invalid}. This is the recommended method for determining whether a domain is potentially 392 * an addressable host. 393 * 394 * <p>Note that this method is equivalent to {@link #hasRegistrySuffix()} because all registry 395 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 396 * 397 * @since 6.0 398 */ 399 public boolean hasPublicSuffix() { 400 return publicSuffixIndex() != NO_SUFFIX_FOUND; 401 } 402 403 /** 404 * Returns the {@linkplain #isPublicSuffix() public suffix} portion of the domain name, or {@code 405 * null} if no public suffix is present. 406 * 407 * @since 6.0 408 */ 409 @CheckForNull 410 public InternetDomainName publicSuffix() { 411 return hasPublicSuffix() ? ancestor(publicSuffixIndex()) : null; 412 } 413 414 /** 415 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 416 * while not being a public suffix itself. For example, returns {@code true} for {@code 417 * www.google.com}, {@code foo.co.uk} and {@code myblog.blogspot.com}, but not for {@code com}, 418 * {@code co.uk}, {@code google.invalid}, or {@code blogspot.com}. 419 * 420 * <p>This method can be used to determine whether it will probably be possible to set cookies on 421 * the domain, though even that depends on individual browsers' implementations of cookie 422 * controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 423 * 424 * @since 6.0 425 */ 426 public boolean isUnderPublicSuffix() { 427 return publicSuffixIndex() > 0; 428 } 429 430 /** 431 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 432 * {@linkplain #isPublicSuffix() public suffix}. For example, returns {@code true} for {@code 433 * google.com} {@code foo.co.uk}, and {@code myblog.blogspot.com}, but not for {@code 434 * www.google.com}, {@code co.uk}, or {@code blogspot.com}. 435 * 436 * <p>This method can be used to determine whether a domain is probably the highest level for 437 * which cookies may be set, though even that depends on individual browsers' implementations of 438 * cookie controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 439 * 440 * @since 6.0 441 */ 442 public boolean isTopPrivateDomain() { 443 return publicSuffixIndex() == 1; 444 } 445 446 /** 447 * Returns the portion of this domain name that is one level beneath the {@linkplain 448 * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns 449 * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code 450 * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code 451 * blogspot.com} is a public suffix. 452 * 453 * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. 454 * 455 * <p>This method can be used to determine the probable highest level parent domain for which 456 * cookies may be set, though even that depends on individual browsers' implementations of cookie 457 * controls. 458 * 459 * @throws IllegalStateException if this domain does not end with a public suffix 460 * @since 6.0 461 */ 462 public InternetDomainName topPrivateDomain() { 463 if (isTopPrivateDomain()) { 464 return this; 465 } 466 checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); 467 return ancestor(publicSuffixIndex() - 1); 468 } 469 470 /** 471 * Indicates whether this domain name represents a <i>registry suffix</i>, as defined by a subset 472 * of the Mozilla Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A 473 * registry suffix is one under which Internet users can directly register names via a domain name 474 * registrar, and have such registrations lawfully protected by internet-governing bodies such as 475 * ICANN. Examples of registry suffixes include {@code com}, {@code co.uk}, and {@code 476 * pvt.k12.wy.us}. Examples of domain names that are <i>not</i> registry suffixes include {@code 477 * google.com} and {@code foo.co.uk}. 478 * 479 * <p>Registry suffixes are a proper subset of {@linkplain #isPublicSuffix() public suffixes}. The 480 * list of public suffixes additionally contains privately owned domain names under which Internet 481 * users can register subdomains. An example of a public suffix that is not a registry suffix is 482 * {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> registry 483 * suffixes, since domain name registries collectively control all internet domain names. 484 * 485 * <p>For considerations on whether the public suffix or registry suffix designation is more 486 * suitable for your application, see <a 487 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 488 * 489 * @return {@code true} if this domain name appears exactly on the public suffix list as part of 490 * the registry suffix section (labelled "ICANN"). 491 * @since 23.3 492 */ 493 public boolean isRegistrySuffix() { 494 return registrySuffixIndex() == 0; 495 } 496 497 /** 498 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 499 * including if it is a registry suffix itself. For example, returns {@code true} for {@code 500 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 501 * google.invalid}. 502 * 503 * <p>Note that this method is equivalent to {@link #hasPublicSuffix()} because all registry 504 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 505 * 506 * @since 23.3 507 */ 508 public boolean hasRegistrySuffix() { 509 return registrySuffixIndex() != NO_SUFFIX_FOUND; 510 } 511 512 /** 513 * Returns the {@linkplain #isRegistrySuffix() registry suffix} portion of the domain name, or 514 * {@code null} if no registry suffix is present. 515 * 516 * @since 23.3 517 */ 518 @CheckForNull 519 public InternetDomainName registrySuffix() { 520 return hasRegistrySuffix() ? ancestor(registrySuffixIndex()) : null; 521 } 522 523 /** 524 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 525 * while not being a registry suffix itself. For example, returns {@code true} for {@code 526 * www.google.com}, {@code foo.co.uk} and {@code blogspot.com}, but not for {@code com}, {@code 527 * co.uk}, or {@code google.invalid}. 528 * 529 * @since 23.3 530 */ 531 public boolean isUnderRegistrySuffix() { 532 return registrySuffixIndex() > 0; 533 } 534 535 /** 536 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 537 * {@linkplain #isRegistrySuffix() registry suffix}. For example, returns {@code true} for {@code 538 * google.com}, {@code foo.co.uk}, and {@code blogspot.com}, but not for {@code www.google.com}, 539 * {@code co.uk}, or {@code myblog.blogspot.com}. 540 * 541 * <p><b>Warning:</b> This method should not be used to determine the probable highest level 542 * parent domain for which cookies may be set. Use {@link #topPrivateDomain()} for that purpose. 543 * 544 * @since 23.3 545 */ 546 public boolean isTopDomainUnderRegistrySuffix() { 547 return registrySuffixIndex() == 1; 548 } 549 550 /** 551 * Returns the portion of this domain name that is one level beneath the {@linkplain 552 * #isRegistrySuffix() registry suffix}. For example, for {@code x.adwords.google.co.uk} it 553 * returns {@code google.co.uk}, since {@code co.uk} is a registry suffix. Similarly, for {@code 554 * myblog.blogspot.com} it returns {@code blogspot.com}, since {@code com} is a registry suffix. 555 * 556 * <p>If {@link #isTopDomainUnderRegistrySuffix()} is true, the current domain name instance is 557 * returned. 558 * 559 * <p><b>Warning:</b> This method should not be used to determine whether a domain is probably the 560 * highest level for which cookies may be set. Use {@link #isTopPrivateDomain()} for that purpose. 561 * 562 * @throws IllegalStateException if this domain does not end with a registry suffix 563 * @since 23.3 564 */ 565 public InternetDomainName topDomainUnderRegistrySuffix() { 566 if (isTopDomainUnderRegistrySuffix()) { 567 return this; 568 } 569 checkState(isUnderRegistrySuffix(), "Not under a registry suffix: %s", name); 570 return ancestor(registrySuffixIndex() - 1); 571 } 572 573 /** Indicates whether this domain is composed of two or more parts. */ 574 public boolean hasParent() { 575 return parts.size() > 1; 576 } 577 578 /** 579 * Returns an {@code InternetDomainName} that is the immediate ancestor of this one; that is, the 580 * current domain with the leftmost part removed. For example, the parent of {@code 581 * www.google.com} is {@code google.com}. 582 * 583 * @throws IllegalStateException if the domain has no parent, as determined by {@link #hasParent} 584 */ 585 public InternetDomainName parent() { 586 checkState(hasParent(), "Domain '%s' has no parent", name); 587 return ancestor(1); 588 } 589 590 /** 591 * Returns the ancestor of the current domain at the given number of levels "higher" (rightward) 592 * in the subdomain list. The number of levels must be non-negative, and less than {@code N-1}, 593 * where {@code N} is the number of parts in the domain. 594 * 595 * <p>TODO: Reasonable candidate for addition to public API. 596 */ 597 private InternetDomainName ancestor(int levels) { 598 ImmutableList<String> ancestorParts = parts.subList(levels, parts.size()); 599 600 // levels equals the number of dots that are getting clipped away, then add the length of each 601 // clipped part to get the length of the leading substring that is being removed. 602 int substringFrom = levels; 603 for (int i = 0; i < levels; i++) { 604 substringFrom += parts.get(i).length(); 605 } 606 String ancestorName = name.substring(substringFrom); 607 608 return new InternetDomainName(ancestorName, ancestorParts); 609 } 610 611 /** 612 * Creates and returns a new {@code InternetDomainName} by prepending the argument and a dot to 613 * the current name. For example, {@code InternetDomainName.from("foo.com").child("www.bar")} 614 * returns a new {@code InternetDomainName} with the value {@code www.bar.foo.com}. Only lenient 615 * validation is performed, as described {@link #from(String) here}. 616 * 617 * @throws NullPointerException if leftParts is null 618 * @throws IllegalArgumentException if the resulting name is not valid 619 */ 620 public InternetDomainName child(String leftParts) { 621 return from(checkNotNull(leftParts) + "." + name); 622 } 623 624 /** 625 * Indicates whether the argument is a syntactically valid domain name using lenient validation. 626 * Specifically, validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 627 * ("Internationalizing Domain Names in Applications") is skipped. 628 * 629 * <p>The following two code snippets are equivalent: 630 * 631 * <pre>{@code 632 * domainName = InternetDomainName.isValid(name) 633 * ? InternetDomainName.from(name) 634 * : DEFAULT_DOMAIN; 635 * }</pre> 636 * 637 * <pre>{@code 638 * try { 639 * domainName = InternetDomainName.from(name); 640 * } catch (IllegalArgumentException e) { 641 * domainName = DEFAULT_DOMAIN; 642 * } 643 * }</pre> 644 * 645 * @since 8.0 (previously named {@code isValidLenient}) 646 */ 647 public static boolean isValid(String name) { 648 try { 649 InternetDomainName unused = from(name); 650 return true; 651 } catch (IllegalArgumentException e) { 652 return false; 653 } 654 } 655 656 /** 657 * If a {@code desiredType} is specified, returns true only if the {@code actualType} is 658 * identical. Otherwise, returns true as long as {@code actualType} is present. 659 */ 660 private static boolean matchesType( 661 Optional<PublicSuffixType> desiredType, Optional<PublicSuffixType> actualType) { 662 return desiredType.isPresent() ? desiredType.equals(actualType) : actualType.isPresent(); 663 } 664 665 /** Returns the domain name, normalized to all lower case. */ 666 @Override 667 public String toString() { 668 return name; 669 } 670 671 /** 672 * Equality testing is based on the text supplied by the caller, after normalization as described 673 * in the class documentation. For example, a non-ASCII Unicode domain name and the Punycode 674 * version of the same domain name would not be considered equal. 675 */ 676 @Override 677 public boolean equals(@CheckForNull Object object) { 678 if (object == this) { 679 return true; 680 } 681 682 if (object instanceof InternetDomainName) { 683 InternetDomainName that = (InternetDomainName) object; 684 return this.name.equals(that.name); 685 } 686 687 return false; 688 } 689 690 @Override 691 public int hashCode() { 692 return name.hashCode(); 693 } 694}