001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.net; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020 021import com.google.common.annotations.GwtCompatible; 022import com.google.common.base.Ascii; 023import com.google.common.base.CharMatcher; 024import com.google.common.base.Joiner; 025import com.google.common.base.Optional; 026import com.google.common.base.Splitter; 027import com.google.common.collect.ImmutableList; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.Immutable; 030import com.google.errorprone.annotations.concurrent.LazyInit; 031import com.google.thirdparty.publicsuffix.PublicSuffixPatterns; 032import com.google.thirdparty.publicsuffix.PublicSuffixType; 033import java.util.List; 034import javax.annotation.CheckForNull; 035 036/** 037 * An immutable well-formed internet domain name, such as {@code com} or {@code foo.co.uk}. Only 038 * syntactic analysis is performed; no DNS lookups or other network interactions take place. Thus 039 * there is no guarantee that the domain actually exists on the internet. 040 * 041 * <p>One common use of this class is to determine whether a given string is likely to represent an 042 * addressable domain on the web -- that is, for a candidate string {@code "xxx"}, might browsing to 043 * {@code "http://xxx/"} result in a webpage being displayed? In the past, this test was frequently 044 * done by determining whether the domain ended with a {@linkplain #isPublicSuffix() public suffix} 045 * but was not itself a public suffix. However, this test is no longer accurate. There are many 046 * domains which are both public suffixes and addressable as hosts; {@code "uk.com"} is one example. 047 * Using the subset of public suffixes that are {@linkplain #isRegistrySuffix() registry suffixes}, 048 * one can get a better result, as only a few registry suffixes are addressable. However, the most 049 * useful test to determine if a domain is a plausible web host is {@link #hasPublicSuffix()}. This 050 * will return {@code true} for many domains which (currently) are not hosts, such as {@code "com"}, 051 * but given that any public suffix may become a host without warning, it is better to err on the 052 * side of permissiveness and thus avoid spurious rejection of valid sites. Of course, to actually 053 * determine addressability of any host, clients of this class will need to perform their own DNS 054 * lookups. 055 * 056 * <p>During construction, names are normalized in two ways: 057 * 058 * <ol> 059 * <li>ASCII uppercase characters are converted to lowercase. 060 * <li>Unicode dot separators other than the ASCII period ({@code '.'}) are converted to the ASCII 061 * period. 062 * </ol> 063 * 064 * <p>The normalized values will be returned from {@link #toString()} and {@link #parts()}, and will 065 * be reflected in the result of {@link #equals(Object)}. 066 * 067 * <p><a href="http://en.wikipedia.org/wiki/Internationalized_domain_name">Internationalized domain 068 * names</a> such as {@code 网络.cn} are supported, as are the equivalent <a 069 * href="http://en.wikipedia.org/wiki/Internationalized_domain_name">IDNA Punycode-encoded</a> 070 * versions. 071 * 072 * @author Catherine Berry 073 * @since 5.0 074 */ 075@GwtCompatible(emulated = true) 076@Immutable 077@ElementTypesAreNonnullByDefault 078public final class InternetDomainName { 079 080 private static final CharMatcher DOTS_MATCHER = CharMatcher.anyOf(".\u3002\uFF0E\uFF61"); 081 private static final Splitter DOT_SPLITTER = Splitter.on('.'); 082 private static final Joiner DOT_JOINER = Joiner.on('.'); 083 084 /** 085 * Value of {@link #publicSuffixIndex()} or {@link #registrySuffixIndex()} which indicates that no 086 * relevant suffix was found. 087 */ 088 private static final int NO_SUFFIX_FOUND = -1; 089 090 /** 091 * Value of {@link #publicSuffixIndexCache} or {@link #registrySuffixIndexCache} which indicates 092 * that they were not initialized yet. 093 */ 094 private static final int SUFFIX_NOT_INITIALIZED = -2; 095 096 /** 097 * Maximum parts (labels) in a domain name. This value arises from the 255-octet limit described 098 * in <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11 with the fact that the 099 * encoding of each part occupies at least two bytes (dot plus label externally, length byte plus 100 * label internally). Thus, if all labels have the minimum size of one byte, 127 of them will fit. 101 */ 102 private static final int MAX_PARTS = 127; 103 104 /** 105 * Maximum length of a full domain name, including separators, and leaving room for the root 106 * label. See <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 107 */ 108 private static final int MAX_LENGTH = 253; 109 110 /** 111 * Maximum size of a single part of a domain name. See <a 112 * href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 113 */ 114 private static final int MAX_DOMAIN_PART_LENGTH = 63; 115 116 /** The full domain name, converted to lower case. */ 117 private final String name; 118 119 /** The parts of the domain name, converted to lower case. */ 120 private final ImmutableList<String> parts; 121 122 /** 123 * Cached value of #publicSuffixIndex(). Do not use directly. 124 * 125 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 126 * threads before it is initialized, then each thread is likely to compute their own copy of the 127 * value. 128 */ 129 @SuppressWarnings("Immutable") 130 @LazyInit 131 private int publicSuffixIndexCache = SUFFIX_NOT_INITIALIZED; 132 133 /** 134 * Cached value of #registrySuffixIndex(). Do not use directly. 135 * 136 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 137 * threads before it is initialized, then each thread is likely to compute their own copy of the 138 * value. 139 */ 140 @SuppressWarnings("Immutable") 141 @LazyInit 142 private int registrySuffixIndexCache = SUFFIX_NOT_INITIALIZED; 143 144 /** Constructor used to implement {@link #from(String)}, and from subclasses. */ 145 InternetDomainName(String name) { 146 // Normalize: 147 // * ASCII characters to lowercase 148 // * All dot-like characters to '.' 149 // * Strip trailing '.' 150 151 name = Ascii.toLowerCase(DOTS_MATCHER.replaceFrom(name, '.')); 152 153 if (name.endsWith(".")) { 154 name = name.substring(0, name.length() - 1); 155 } 156 157 checkArgument(name.length() <= MAX_LENGTH, "Domain name too long: '%s':", name); 158 this.name = name; 159 160 this.parts = ImmutableList.copyOf(DOT_SPLITTER.split(name)); 161 checkArgument(parts.size() <= MAX_PARTS, "Domain has too many parts: '%s'", name); 162 checkArgument(validateSyntax(parts), "Not a valid domain name: '%s'", name); 163 } 164 165 /** 166 * The index in the {@link #parts()} list at which the public suffix begins. For example, for the 167 * domain name {@code myblog.blogspot.co.uk}, the value would be 1 (the index of the {@code 168 * blogspot} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no public 169 * suffix was found. 170 */ 171 private int publicSuffixIndex() { 172 int publicSuffixIndexLocal = publicSuffixIndexCache; 173 if (publicSuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 174 publicSuffixIndexCache = 175 publicSuffixIndexLocal = findSuffixOfType(Optional.<PublicSuffixType>absent()); 176 } 177 return publicSuffixIndexLocal; 178 } 179 180 /** 181 * The index in the {@link #parts()} list at which the registry suffix begins. For example, for 182 * the domain name {@code myblog.blogspot.co.uk}, the value would be 2 (the index of the {@code 183 * co} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no registry suffix 184 * was found. 185 */ 186 private int registrySuffixIndex() { 187 int registrySuffixIndexLocal = registrySuffixIndexCache; 188 if (registrySuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 189 registrySuffixIndexCache = 190 registrySuffixIndexLocal = findSuffixOfType(Optional.of(PublicSuffixType.REGISTRY)); 191 } 192 return registrySuffixIndexLocal; 193 } 194 195 /** 196 * Returns the index of the leftmost part of the suffix, or -1 if not found. Note that the value 197 * defined as a suffix may not produce {@code true} results from {@link #isPublicSuffix()} or 198 * {@link #isRegistrySuffix()} if the domain ends with an excluded domain pattern such as {@code 199 * "nhs.uk"}. 200 * 201 * <p>If a {@code desiredType} is specified, this method only finds suffixes of the given type. 202 * Otherwise, it finds the first suffix of any type. 203 */ 204 private int findSuffixOfType(Optional<PublicSuffixType> desiredType) { 205 int partsSize = parts.size(); 206 207 for (int i = 0; i < partsSize; i++) { 208 String ancestorName = DOT_JOINER.join(parts.subList(i, partsSize)); 209 210 if (i > 0 211 && matchesType( 212 desiredType, Optional.fromNullable(PublicSuffixPatterns.UNDER.get(ancestorName)))) { 213 return i - 1; 214 } 215 216 if (matchesType( 217 desiredType, Optional.fromNullable(PublicSuffixPatterns.EXACT.get(ancestorName)))) { 218 return i; 219 } 220 221 // Excluded domains (e.g. !nhs.uk) use the next highest 222 // domain as the effective public suffix (e.g. uk). 223 224 if (PublicSuffixPatterns.EXCLUDED.containsKey(ancestorName)) { 225 return i + 1; 226 } 227 } 228 229 return NO_SUFFIX_FOUND; 230 } 231 232 /** 233 * Returns an instance of {@link InternetDomainName} after lenient validation. Specifically, 234 * validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 235 * ("Internationalizing Domain Names in Applications") is skipped, while validation against <a 236 * href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a> is relaxed in the following ways: 237 * 238 * <ul> 239 * <li>Any part containing non-ASCII characters is considered valid. 240 * <li>Underscores ('_') are permitted wherever dashes ('-') are permitted. 241 * <li>Parts other than the final part may start with a digit, as mandated by <a 242 * href="https://tools.ietf.org/html/rfc1123#section-2">RFC 1123</a>. 243 * </ul> 244 * 245 * @param domain A domain name (not IP address) 246 * @throws IllegalArgumentException if {@code domain} is not syntactically valid according to 247 * {@link #isValid} 248 * @since 10.0 (previously named {@code fromLenient}) 249 */ 250 @CanIgnoreReturnValue // TODO(b/219820829): consider removing 251 public static InternetDomainName from(String domain) { 252 return new InternetDomainName(checkNotNull(domain)); 253 } 254 255 /** 256 * Validation method used by {@code from} to ensure that the domain name is syntactically valid 257 * according to RFC 1035. 258 * 259 * @return Is the domain name syntactically valid? 260 */ 261 private static boolean validateSyntax(List<String> parts) { 262 int lastIndex = parts.size() - 1; 263 264 // Validate the last part specially, as it has different syntax rules. 265 266 if (!validatePart(parts.get(lastIndex), true)) { 267 return false; 268 } 269 270 for (int i = 0; i < lastIndex; i++) { 271 String part = parts.get(i); 272 if (!validatePart(part, false)) { 273 return false; 274 } 275 } 276 277 return true; 278 } 279 280 private static final CharMatcher DASH_MATCHER = CharMatcher.anyOf("-_"); 281 282 private static final CharMatcher DIGIT_MATCHER = CharMatcher.inRange('0', '9'); 283 284 private static final CharMatcher LETTER_MATCHER = 285 CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z')); 286 287 private static final CharMatcher PART_CHAR_MATCHER = 288 DIGIT_MATCHER.or(LETTER_MATCHER).or(DASH_MATCHER); 289 290 /** 291 * Helper method for {@link #validateSyntax(List)}. Validates that one part of a domain name is 292 * valid. 293 * 294 * @param part The domain name part to be validated 295 * @param isFinalPart Is this the final (rightmost) domain part? 296 * @return Whether the part is valid 297 */ 298 private static boolean validatePart(String part, boolean isFinalPart) { 299 300 // These tests could be collapsed into one big boolean expression, but 301 // they have been left as independent tests for clarity. 302 303 if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) { 304 return false; 305 } 306 307 /* 308 * GWT claims to support java.lang.Character's char-classification methods, but it actually only 309 * works for ASCII. So for now, assume any non-ASCII characters are valid. The only place this 310 * seems to be documented is here: 311 * https://groups.google.com/d/topic/google-web-toolkit-contributors/1UEzsryq1XI 312 * 313 * <p>ASCII characters in the part are expected to be valid per RFC 1035, with underscore also 314 * being allowed due to widespread practice. 315 */ 316 317 String asciiChars = CharMatcher.ascii().retainFrom(part); 318 319 if (!PART_CHAR_MATCHER.matchesAllOf(asciiChars)) { 320 return false; 321 } 322 323 // No initial or final dashes or underscores. 324 325 if (DASH_MATCHER.matches(part.charAt(0)) 326 || DASH_MATCHER.matches(part.charAt(part.length() - 1))) { 327 return false; 328 } 329 330 /* 331 * Note that we allow (in contravention of a strict interpretation of the relevant RFCs) domain 332 * parts other than the last may begin with a digit (for example, "3com.com"). It's important to 333 * disallow an initial digit in the last part; it's the only thing that stops an IPv4 numeric 334 * address like 127.0.0.1 from looking like a valid domain name. 335 */ 336 337 if (isFinalPart && DIGIT_MATCHER.matches(part.charAt(0))) { 338 return false; 339 } 340 341 return true; 342 } 343 344 /** 345 * Returns the individual components of this domain name, normalized to all lower case. For 346 * example, for the domain name {@code mail.google.com}, this method returns the list {@code 347 * ["mail", "google", "com"]}. 348 */ 349 public ImmutableList<String> parts() { 350 return parts; 351 } 352 353 /** 354 * Indicates whether this domain name represents a <i>public suffix</i>, as defined by the Mozilla 355 * Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A public suffix 356 * is one under which Internet users can directly register names, such as {@code com}, {@code 357 * co.uk} or {@code pvt.k12.wy.us}. Examples of domain names that are <i>not</i> public suffixes 358 * include {@code google.com}, {@code foo.co.uk}, and {@code myblog.blogspot.com}. 359 * 360 * <p>Public suffixes are a proper superset of {@linkplain #isRegistrySuffix() registry suffixes}. 361 * The list of public suffixes additionally contains privately owned domain names under which 362 * Internet users can register subdomains. An example of a public suffix that is not a registry 363 * suffix is {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> 364 * registry suffixes, since domain name registries collectively control all internet domain names. 365 * 366 * <p>For considerations on whether the public suffix or registry suffix designation is more 367 * suitable for your application, see <a 368 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 369 * 370 * @return {@code true} if this domain name appears exactly on the public suffix list 371 * @since 6.0 372 */ 373 public boolean isPublicSuffix() { 374 return publicSuffixIndex() == 0; 375 } 376 377 /** 378 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 379 * including if it is a public suffix itself. For example, returns {@code true} for {@code 380 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 381 * google.invalid}. This is the recommended method for determining whether a domain is potentially 382 * an addressable host. 383 * 384 * <p>Note that this method is equivalent to {@link #hasRegistrySuffix()} because all registry 385 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 386 * 387 * @since 6.0 388 */ 389 public boolean hasPublicSuffix() { 390 return publicSuffixIndex() != NO_SUFFIX_FOUND; 391 } 392 393 /** 394 * Returns the {@linkplain #isPublicSuffix() public suffix} portion of the domain name, or {@code 395 * null} if no public suffix is present. 396 * 397 * @since 6.0 398 */ 399 @CheckForNull 400 public InternetDomainName publicSuffix() { 401 return hasPublicSuffix() ? ancestor(publicSuffixIndex()) : null; 402 } 403 404 /** 405 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 406 * while not being a public suffix itself. For example, returns {@code true} for {@code 407 * www.google.com}, {@code foo.co.uk} and {@code myblog.blogspot.com}, but not for {@code com}, 408 * {@code co.uk}, {@code google.invalid}, or {@code blogspot.com}. 409 * 410 * <p>This method can be used to determine whether it will probably be possible to set cookies on 411 * the domain, though even that depends on individual browsers' implementations of cookie 412 * controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 413 * 414 * @since 6.0 415 */ 416 public boolean isUnderPublicSuffix() { 417 return publicSuffixIndex() > 0; 418 } 419 420 /** 421 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 422 * {@linkplain #isPublicSuffix() public suffix}. For example, returns {@code true} for {@code 423 * google.com} {@code foo.co.uk}, and {@code myblog.blogspot.com}, but not for {@code 424 * www.google.com}, {@code co.uk}, or {@code blogspot.com}. 425 * 426 * <p>This method can be used to determine whether a domain is probably the highest level for 427 * which cookies may be set, though even that depends on individual browsers' implementations of 428 * cookie controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 429 * 430 * @since 6.0 431 */ 432 public boolean isTopPrivateDomain() { 433 return publicSuffixIndex() == 1; 434 } 435 436 /** 437 * Returns the portion of this domain name that is one level beneath the {@linkplain 438 * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns 439 * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code 440 * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code 441 * blogspot.com} is a public suffix. 442 * 443 * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. 444 * 445 * <p>This method can be used to determine the probable highest level parent domain for which 446 * cookies may be set, though even that depends on individual browsers' implementations of cookie 447 * controls. 448 * 449 * @throws IllegalStateException if this domain does not end with a public suffix 450 * @since 6.0 451 */ 452 public InternetDomainName topPrivateDomain() { 453 if (isTopPrivateDomain()) { 454 return this; 455 } 456 checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); 457 return ancestor(publicSuffixIndex() - 1); 458 } 459 460 /** 461 * Indicates whether this domain name represents a <i>registry suffix</i>, as defined by a subset 462 * of the Mozilla Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A 463 * registry suffix is one under which Internet users can directly register names via a domain name 464 * registrar, and have such registrations lawfully protected by internet-governing bodies such as 465 * ICANN. Examples of registry suffixes include {@code com}, {@code co.uk}, and {@code 466 * pvt.k12.wy.us}. Examples of domain names that are <i>not</i> registry suffixes include {@code 467 * google.com} and {@code foo.co.uk}. 468 * 469 * <p>Registry suffixes are a proper subset of {@linkplain #isPublicSuffix() public suffixes}. The 470 * list of public suffixes additionally contains privately owned domain names under which Internet 471 * users can register subdomains. An example of a public suffix that is not a registry suffix is 472 * {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> registry 473 * suffixes, since domain name registries collectively control all internet domain names. 474 * 475 * <p>For considerations on whether the public suffix or registry suffix designation is more 476 * suitable for your application, see <a 477 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 478 * 479 * @return {@code true} if this domain name appears exactly on the public suffix list as part of 480 * the registry suffix section (labelled "ICANN"). 481 * @since 23.3 482 */ 483 public boolean isRegistrySuffix() { 484 return registrySuffixIndex() == 0; 485 } 486 487 /** 488 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 489 * including if it is a registry suffix itself. For example, returns {@code true} for {@code 490 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 491 * google.invalid}. 492 * 493 * <p>Note that this method is equivalent to {@link #hasPublicSuffix()} because all registry 494 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 495 * 496 * @since 23.3 497 */ 498 public boolean hasRegistrySuffix() { 499 return registrySuffixIndex() != NO_SUFFIX_FOUND; 500 } 501 502 /** 503 * Returns the {@linkplain #isRegistrySuffix() registry suffix} portion of the domain name, or 504 * {@code null} if no registry suffix is present. 505 * 506 * @since 23.3 507 */ 508 @CheckForNull 509 public InternetDomainName registrySuffix() { 510 return hasRegistrySuffix() ? ancestor(registrySuffixIndex()) : null; 511 } 512 513 /** 514 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 515 * while not being a registry suffix itself. For example, returns {@code true} for {@code 516 * www.google.com}, {@code foo.co.uk} and {@code blogspot.com}, but not for {@code com}, {@code 517 * co.uk}, or {@code google.invalid}. 518 * 519 * @since 23.3 520 */ 521 public boolean isUnderRegistrySuffix() { 522 return registrySuffixIndex() > 0; 523 } 524 525 /** 526 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 527 * {@linkplain #isRegistrySuffix() registry suffix}. For example, returns {@code true} for {@code 528 * google.com}, {@code foo.co.uk}, and {@code blogspot.com}, but not for {@code www.google.com}, 529 * {@code co.uk}, or {@code myblog.blogspot.com}. 530 * 531 * <p><b>Warning:</b> This method should not be used to determine the probable highest level 532 * parent domain for which cookies may be set. Use {@link #topPrivateDomain()} for that purpose. 533 * 534 * @since 23.3 535 */ 536 public boolean isTopDomainUnderRegistrySuffix() { 537 return registrySuffixIndex() == 1; 538 } 539 540 /** 541 * Returns the portion of this domain name that is one level beneath the {@linkplain 542 * #isRegistrySuffix() registry suffix}. For example, for {@code x.adwords.google.co.uk} it 543 * returns {@code google.co.uk}, since {@code co.uk} is a registry suffix. Similarly, for {@code 544 * myblog.blogspot.com} it returns {@code blogspot.com}, since {@code com} is a registry suffix. 545 * 546 * <p>If {@link #isTopDomainUnderRegistrySuffix()} is true, the current domain name instance is 547 * returned. 548 * 549 * <p><b>Warning:</b> This method should not be used to determine whether a domain is probably the 550 * highest level for which cookies may be set. Use {@link #isTopPrivateDomain()} for that purpose. 551 * 552 * @throws IllegalStateException if this domain does not end with a registry suffix 553 * @since 23.3 554 */ 555 public InternetDomainName topDomainUnderRegistrySuffix() { 556 if (isTopDomainUnderRegistrySuffix()) { 557 return this; 558 } 559 checkState(isUnderRegistrySuffix(), "Not under a registry suffix: %s", name); 560 return ancestor(registrySuffixIndex() - 1); 561 } 562 563 /** Indicates whether this domain is composed of two or more parts. */ 564 public boolean hasParent() { 565 return parts.size() > 1; 566 } 567 568 /** 569 * Returns an {@code InternetDomainName} that is the immediate ancestor of this one; that is, the 570 * current domain with the leftmost part removed. For example, the parent of {@code 571 * www.google.com} is {@code google.com}. 572 * 573 * @throws IllegalStateException if the domain has no parent, as determined by {@link #hasParent} 574 */ 575 public InternetDomainName parent() { 576 checkState(hasParent(), "Domain '%s' has no parent", name); 577 return ancestor(1); 578 } 579 580 /** 581 * Returns the ancestor of the current domain at the given number of levels "higher" (rightward) 582 * in the subdomain list. The number of levels must be non-negative, and less than {@code N-1}, 583 * where {@code N} is the number of parts in the domain. 584 * 585 * <p>TODO: Reasonable candidate for addition to public API. 586 */ 587 private InternetDomainName ancestor(int levels) { 588 return from(DOT_JOINER.join(parts.subList(levels, parts.size()))); 589 } 590 591 /** 592 * Creates and returns a new {@code InternetDomainName} by prepending the argument and a dot to 593 * the current name. For example, {@code InternetDomainName.from("foo.com").child("www.bar")} 594 * returns a new {@code InternetDomainName} with the value {@code www.bar.foo.com}. Only lenient 595 * validation is performed, as described {@link #from(String) here}. 596 * 597 * @throws NullPointerException if leftParts is null 598 * @throws IllegalArgumentException if the resulting name is not valid 599 */ 600 public InternetDomainName child(String leftParts) { 601 return from(checkNotNull(leftParts) + "." + name); 602 } 603 604 /** 605 * Indicates whether the argument is a syntactically valid domain name using lenient validation. 606 * Specifically, validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 607 * ("Internationalizing Domain Names in Applications") is skipped. 608 * 609 * <p>The following two code snippets are equivalent: 610 * 611 * <pre>{@code 612 * domainName = InternetDomainName.isValid(name) 613 * ? InternetDomainName.from(name) 614 * : DEFAULT_DOMAIN; 615 * }</pre> 616 * 617 * <pre>{@code 618 * try { 619 * domainName = InternetDomainName.from(name); 620 * } catch (IllegalArgumentException e) { 621 * domainName = DEFAULT_DOMAIN; 622 * } 623 * }</pre> 624 * 625 * @since 8.0 (previously named {@code isValidLenient}) 626 */ 627 public static boolean isValid(String name) { 628 try { 629 InternetDomainName unused = from(name); 630 return true; 631 } catch (IllegalArgumentException e) { 632 return false; 633 } 634 } 635 636 /** 637 * If a {@code desiredType} is specified, returns true only if the {@code actualType} is 638 * identical. Otherwise, returns true as long as {@code actualType} is present. 639 */ 640 private static boolean matchesType( 641 Optional<PublicSuffixType> desiredType, Optional<PublicSuffixType> actualType) { 642 return desiredType.isPresent() ? desiredType.equals(actualType) : actualType.isPresent(); 643 } 644 645 /** Returns the domain name, normalized to all lower case. */ 646 @Override 647 public String toString() { 648 return name; 649 } 650 651 /** 652 * Equality testing is based on the text supplied by the caller, after normalization as described 653 * in the class documentation. For example, a non-ASCII Unicode domain name and the Punycode 654 * version of the same domain name would not be considered equal. 655 */ 656 @Override 657 public boolean equals(@CheckForNull Object object) { 658 if (object == this) { 659 return true; 660 } 661 662 if (object instanceof InternetDomainName) { 663 InternetDomainName that = (InternetDomainName) object; 664 return this.name.equals(that.name); 665 } 666 667 return false; 668 } 669 670 @Override 671 public int hashCode() { 672 return name.hashCode(); 673 } 674}