001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.net; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020 021import com.google.common.annotations.GwtCompatible; 022import com.google.common.base.Ascii; 023import com.google.common.base.CharMatcher; 024import com.google.common.base.Joiner; 025import com.google.common.base.Optional; 026import com.google.common.base.Splitter; 027import com.google.common.collect.ImmutableList; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.Immutable; 030import com.google.errorprone.annotations.concurrent.LazyInit; 031import com.google.thirdparty.publicsuffix.PublicSuffixPatterns; 032import com.google.thirdparty.publicsuffix.PublicSuffixType; 033import java.util.List; 034import org.jspecify.annotations.Nullable; 035 036/** 037 * An immutable well-formed internet domain name, such as {@code com} or {@code foo.co.uk}. Only 038 * syntactic analysis is performed; no DNS lookups or other network interactions take place. Thus 039 * there is no guarantee that the domain actually exists on the internet. 040 * 041 * <p>One common use of this class is to determine whether a given string is likely to represent an 042 * addressable domain on the web -- that is, for a candidate string {@code "xxx"}, might browsing to 043 * {@code "http://xxx/"} result in a webpage being displayed? In the past, this test was frequently 044 * done by determining whether the domain ended with a {@linkplain #isPublicSuffix() public suffix} 045 * but was not itself a public suffix. However, this test is no longer accurate. There are many 046 * domains which are both public suffixes and addressable as hosts; {@code "uk.com"} is one example. 047 * Using the subset of public suffixes that are {@linkplain #isRegistrySuffix() registry suffixes}, 048 * one can get a better result, as only a few registry suffixes are addressable. However, the most 049 * useful test to determine if a domain is a plausible web host is {@link #hasPublicSuffix()}. This 050 * will return {@code true} for many domains which (currently) are not hosts, such as {@code "com"}, 051 * but given that any public suffix may become a host without warning, it is better to err on the 052 * side of permissiveness and thus avoid spurious rejection of valid sites. Of course, to actually 053 * determine addressability of any host, clients of this class will need to perform their own DNS 054 * lookups. 055 * 056 * <p>During construction, names are normalized in two ways: 057 * 058 * <ol> 059 * <li>ASCII uppercase characters are converted to lowercase. 060 * <li>Unicode dot separators other than the ASCII period ({@code '.'}) are converted to the ASCII 061 * period. 062 * </ol> 063 * 064 * <p>The normalized values will be returned from {@link #toString()} and {@link #parts()}, and will 065 * be reflected in the result of {@link #equals(Object)}. 066 * 067 * <p><a href="http://en.wikipedia.org/wiki/Internationalized_domain_name">Internationalized domain 068 * names</a> such as {@code 网络.cn} are supported, as are the equivalent <a 069 * href="http://en.wikipedia.org/wiki/Internationalized_domain_name">IDNA Punycode-encoded</a> 070 * versions. 071 * 072 * @author Catherine Berry 073 * @since 5.0 074 */ 075@GwtCompatible(emulated = true) 076@Immutable 077public final class InternetDomainName { 078 079 private static final CharMatcher DOTS_MATCHER = CharMatcher.anyOf(".\u3002\uFF0E\uFF61"); 080 private static final Splitter DOT_SPLITTER = Splitter.on('.'); 081 private static final Joiner DOT_JOINER = Joiner.on('.'); 082 083 /** 084 * Value of {@link #publicSuffixIndex()} or {@link #registrySuffixIndex()} which indicates that no 085 * relevant suffix was found. 086 */ 087 private static final int NO_SUFFIX_FOUND = -1; 088 089 /** 090 * Value of {@link #publicSuffixIndexCache} or {@link #registrySuffixIndexCache} which indicates 091 * that they were not initialized yet. 092 */ 093 private static final int SUFFIX_NOT_INITIALIZED = -2; 094 095 /** 096 * Maximum parts (labels) in a domain name. This value arises from the 255-octet limit described 097 * in <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11 with the fact that the 098 * encoding of each part occupies at least two bytes (dot plus label externally, length byte plus 099 * label internally). Thus, if all labels have the minimum size of one byte, 127 of them will fit. 100 */ 101 private static final int MAX_PARTS = 127; 102 103 /** 104 * Maximum length of a full domain name, including separators, and leaving room for the root 105 * label. See <a href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 106 */ 107 private static final int MAX_LENGTH = 253; 108 109 /** 110 * Maximum size of a single part of a domain name. See <a 111 * href="http://www.ietf.org/rfc/rfc2181.txt">RFC 2181</a> part 11. 112 */ 113 private static final int MAX_DOMAIN_PART_LENGTH = 63; 114 115 /** The full domain name, converted to lower case. */ 116 private final String name; 117 118 /** The parts of the domain name, converted to lower case. */ 119 private final ImmutableList<String> parts; 120 121 /** 122 * Cached value of #publicSuffixIndex(). Do not use directly. 123 * 124 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 125 * threads before it is initialized, then each thread is likely to compute their own copy of the 126 * value. 127 */ 128 @SuppressWarnings("Immutable") 129 @LazyInit 130 private int publicSuffixIndexCache = SUFFIX_NOT_INITIALIZED; 131 132 /** 133 * Cached value of #registrySuffixIndex(). Do not use directly. 134 * 135 * <p>Since this field isn't {@code volatile}, if an instance of this class is shared across 136 * threads before it is initialized, then each thread is likely to compute their own copy of the 137 * value. 138 */ 139 @SuppressWarnings("Immutable") 140 @LazyInit 141 private int registrySuffixIndexCache = SUFFIX_NOT_INITIALIZED; 142 143 /** Constructor used to implement {@link #from(String)}, and from subclasses. */ 144 InternetDomainName(String name) { 145 // Normalize: 146 // * ASCII characters to lowercase 147 // * All dot-like characters to '.' 148 // * Strip trailing '.' 149 150 name = Ascii.toLowerCase(DOTS_MATCHER.replaceFrom(name, '.')); 151 152 if (name.endsWith(".")) { 153 name = name.substring(0, name.length() - 1); 154 } 155 156 checkArgument(name.length() <= MAX_LENGTH, "Domain name too long: '%s':", name); 157 this.name = name; 158 159 this.parts = ImmutableList.copyOf(DOT_SPLITTER.split(name)); 160 checkArgument(parts.size() <= MAX_PARTS, "Domain has too many parts: '%s'", name); 161 checkArgument(validateSyntax(parts), "Not a valid domain name: '%s'", name); 162 } 163 164 /** 165 * Internal constructor that skips validations when creating an instance from parts of an 166 * already-validated InternetDomainName, as in {@link ancestor}. 167 */ 168 private InternetDomainName(String name, ImmutableList<String> parts) { 169 checkArgument(!parts.isEmpty(), "Cannot create an InternetDomainName with zero parts."); 170 this.name = name; 171 this.parts = parts; 172 } 173 174 /** 175 * The index in the {@link #parts()} list at which the public suffix begins. For example, for the 176 * domain name {@code myblog.blogspot.co.uk}, the value would be 1 (the index of the {@code 177 * blogspot} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no public 178 * suffix was found. 179 */ 180 private int publicSuffixIndex() { 181 int publicSuffixIndexLocal = publicSuffixIndexCache; 182 if (publicSuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 183 publicSuffixIndexCache = 184 publicSuffixIndexLocal = findSuffixOfType(Optional.<PublicSuffixType>absent()); 185 } 186 return publicSuffixIndexLocal; 187 } 188 189 /** 190 * The index in the {@link #parts()} list at which the registry suffix begins. For example, for 191 * the domain name {@code myblog.blogspot.co.uk}, the value would be 2 (the index of the {@code 192 * co} part). The value is negative (specifically, {@link #NO_SUFFIX_FOUND}) if no registry suffix 193 * was found. 194 */ 195 private int registrySuffixIndex() { 196 int registrySuffixIndexLocal = registrySuffixIndexCache; 197 if (registrySuffixIndexLocal == SUFFIX_NOT_INITIALIZED) { 198 registrySuffixIndexCache = 199 registrySuffixIndexLocal = findSuffixOfType(Optional.of(PublicSuffixType.REGISTRY)); 200 } 201 return registrySuffixIndexLocal; 202 } 203 204 /** 205 * Returns the index of the leftmost part of the suffix, or -1 if not found. Note that the value 206 * defined as a suffix may not produce {@code true} results from {@link #isPublicSuffix()} or 207 * {@link #isRegistrySuffix()} if the domain ends with an excluded domain pattern such as {@code 208 * "nhs.uk"}. 209 * 210 * <p>If a {@code desiredType} is specified, this method only finds suffixes of the given type. 211 * Otherwise, it finds the first suffix of any type. 212 */ 213 private int findSuffixOfType(Optional<PublicSuffixType> desiredType) { 214 int partsSize = parts.size(); 215 216 for (int i = 0; i < partsSize; i++) { 217 String ancestorName = DOT_JOINER.join(parts.subList(i, partsSize)); 218 219 if (i > 0 220 && matchesType( 221 desiredType, Optional.fromNullable(PublicSuffixPatterns.UNDER.get(ancestorName)))) { 222 return i - 1; 223 } 224 225 if (matchesType( 226 desiredType, Optional.fromNullable(PublicSuffixPatterns.EXACT.get(ancestorName)))) { 227 return i; 228 } 229 230 // Excluded domains (e.g. !nhs.uk) use the next highest 231 // domain as the effective public suffix (e.g. uk). 232 233 if (PublicSuffixPatterns.EXCLUDED.containsKey(ancestorName)) { 234 return i + 1; 235 } 236 } 237 238 return NO_SUFFIX_FOUND; 239 } 240 241 /** 242 * Returns an instance of {@link InternetDomainName} after lenient validation. Specifically, 243 * validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 244 * ("Internationalizing Domain Names in Applications") is skipped, while validation against <a 245 * href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a> is relaxed in the following ways: 246 * 247 * <ul> 248 * <li>Any part containing non-ASCII characters is considered valid. 249 * <li>Underscores ('_') are permitted wherever dashes ('-') are permitted. 250 * <li>Parts other than the final part may start with a digit, as mandated by <a 251 * href="https://tools.ietf.org/html/rfc1123#section-2">RFC 1123</a>. 252 * </ul> 253 * 254 * @param domain A domain name (not IP address) 255 * @throws IllegalArgumentException if {@code domain} is not syntactically valid according to 256 * {@link #isValid} 257 * @since 10.0 (previously named {@code fromLenient}) 258 */ 259 @CanIgnoreReturnValue // TODO(b/219820829): consider removing 260 public static InternetDomainName from(String domain) { 261 return new InternetDomainName(checkNotNull(domain)); 262 } 263 264 /** 265 * Validation method used by {@code from} to ensure that the domain name is syntactically valid 266 * according to RFC 1035. 267 * 268 * @return Is the domain name syntactically valid? 269 */ 270 private static boolean validateSyntax(List<String> parts) { 271 int lastIndex = parts.size() - 1; 272 273 // Validate the last part specially, as it has different syntax rules. 274 275 if (!validatePart(parts.get(lastIndex), true)) { 276 return false; 277 } 278 279 for (int i = 0; i < lastIndex; i++) { 280 String part = parts.get(i); 281 if (!validatePart(part, false)) { 282 return false; 283 } 284 } 285 286 return true; 287 } 288 289 private static final CharMatcher DASH_MATCHER = CharMatcher.anyOf("-_"); 290 291 private static final CharMatcher DIGIT_MATCHER = CharMatcher.inRange('0', '9'); 292 293 private static final CharMatcher LETTER_MATCHER = 294 CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z')); 295 296 private static final CharMatcher PART_CHAR_MATCHER = 297 DIGIT_MATCHER.or(LETTER_MATCHER).or(DASH_MATCHER); 298 299 /** 300 * Helper method for {@link #validateSyntax(List)}. Validates that one part of a domain name is 301 * valid. 302 * 303 * @param part The domain name part to be validated 304 * @param isFinalPart Is this the final (rightmost) domain part? 305 * @return Whether the part is valid 306 */ 307 private static boolean validatePart(String part, boolean isFinalPart) { 308 309 // These tests could be collapsed into one big boolean expression, but 310 // they have been left as independent tests for clarity. 311 312 if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) { 313 return false; 314 } 315 316 /* 317 * GWT claims to support java.lang.Character's char-classification methods, but it actually only 318 * works for ASCII. So for now, assume any non-ASCII characters are valid. The only place this 319 * seems to be documented is here: 320 * https://groups.google.com/d/topic/google-web-toolkit-contributors/1UEzsryq1XI 321 * 322 * <p>ASCII characters in the part are expected to be valid per RFC 1035, with underscore also 323 * being allowed due to widespread practice. 324 */ 325 326 String asciiChars = CharMatcher.ascii().retainFrom(part); 327 328 if (!PART_CHAR_MATCHER.matchesAllOf(asciiChars)) { 329 return false; 330 } 331 332 // No initial or final dashes or underscores. 333 334 if (DASH_MATCHER.matches(part.charAt(0)) 335 || DASH_MATCHER.matches(part.charAt(part.length() - 1))) { 336 return false; 337 } 338 339 /* 340 * Note that we allow (in contravention of a strict interpretation of the relevant RFCs) domain 341 * parts other than the last may begin with a digit (for example, "3com.com"). It's important to 342 * disallow an initial digit in the last part; it's the only thing that stops an IPv4 numeric 343 * address like 127.0.0.1 from looking like a valid domain name. 344 */ 345 346 if (isFinalPart && DIGIT_MATCHER.matches(part.charAt(0))) { 347 return false; 348 } 349 350 return true; 351 } 352 353 /** 354 * Returns the individual components of this domain name, normalized to all lower case. For 355 * example, for the domain name {@code mail.google.com}, this method returns the list {@code 356 * ["mail", "google", "com"]}. 357 */ 358 public ImmutableList<String> parts() { 359 return parts; 360 } 361 362 /** 363 * Indicates whether this domain name represents a <i>public suffix</i>, as defined by the Mozilla 364 * Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A public suffix 365 * is one under which Internet users can directly register names, such as {@code com}, {@code 366 * co.uk} or {@code pvt.k12.wy.us}. Examples of domain names that are <i>not</i> public suffixes 367 * include {@code google.com}, {@code foo.co.uk}, and {@code myblog.blogspot.com}. 368 * 369 * <p>Public suffixes are a proper superset of {@linkplain #isRegistrySuffix() registry suffixes}. 370 * The list of public suffixes additionally contains privately owned domain names under which 371 * Internet users can register subdomains. An example of a public suffix that is not a registry 372 * suffix is {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> 373 * registry suffixes, since domain name registries collectively control all internet domain names. 374 * 375 * <p>For considerations on whether the public suffix or registry suffix designation is more 376 * suitable for your application, see <a 377 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 378 * 379 * @return {@code true} if this domain name appears exactly on the public suffix list 380 * @since 6.0 381 */ 382 public boolean isPublicSuffix() { 383 return publicSuffixIndex() == 0; 384 } 385 386 /** 387 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 388 * including if it is a public suffix itself. For example, returns {@code true} for {@code 389 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 390 * google.invalid}. This is the recommended method for determining whether a domain is potentially 391 * an addressable host. 392 * 393 * <p>Note that this method is equivalent to {@link #hasRegistrySuffix()} because all registry 394 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 395 * 396 * @since 6.0 397 */ 398 public boolean hasPublicSuffix() { 399 return publicSuffixIndex() != NO_SUFFIX_FOUND; 400 } 401 402 /** 403 * Returns the {@linkplain #isPublicSuffix() public suffix} portion of the domain name, or {@code 404 * null} if no public suffix is present. 405 * 406 * @since 6.0 407 */ 408 public @Nullable InternetDomainName publicSuffix() { 409 return hasPublicSuffix() ? ancestor(publicSuffixIndex()) : null; 410 } 411 412 /** 413 * Indicates whether this domain name ends in a {@linkplain #isPublicSuffix() public suffix}, 414 * while not being a public suffix itself. For example, returns {@code true} for {@code 415 * www.google.com}, {@code foo.co.uk} and {@code myblog.blogspot.com}, but not for {@code com}, 416 * {@code co.uk}, {@code google.invalid}, or {@code blogspot.com}. 417 * 418 * <p>This method can be used to determine whether it will probably be possible to set cookies on 419 * the domain, though even that depends on individual browsers' implementations of cookie 420 * controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 421 * 422 * @since 6.0 423 */ 424 public boolean isUnderPublicSuffix() { 425 return publicSuffixIndex() > 0; 426 } 427 428 /** 429 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 430 * {@linkplain #isPublicSuffix() public suffix}. For example, returns {@code true} for {@code 431 * google.com} {@code foo.co.uk}, and {@code myblog.blogspot.com}, but not for {@code 432 * www.google.com}, {@code co.uk}, or {@code blogspot.com}. 433 * 434 * <p>This method can be used to determine whether a domain is probably the highest level for 435 * which cookies may be set, though even that depends on individual browsers' implementations of 436 * cookie controls. See <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> for details. 437 * 438 * @since 6.0 439 */ 440 public boolean isTopPrivateDomain() { 441 return publicSuffixIndex() == 1; 442 } 443 444 /** 445 * Returns the portion of this domain name that is one level beneath the {@linkplain 446 * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns 447 * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code 448 * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code 449 * blogspot.com} is a public suffix. 450 * 451 * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. 452 * 453 * <p>This method can be used to determine the probable highest level parent domain for which 454 * cookies may be set, though even that depends on individual browsers' implementations of cookie 455 * controls. 456 * 457 * @throws IllegalStateException if this domain does not end with a public suffix 458 * @since 6.0 459 */ 460 public InternetDomainName topPrivateDomain() { 461 if (isTopPrivateDomain()) { 462 return this; 463 } 464 checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); 465 return ancestor(publicSuffixIndex() - 1); 466 } 467 468 /** 469 * Indicates whether this domain name represents a <i>registry suffix</i>, as defined by a subset 470 * of the Mozilla Foundation's <a href="http://publicsuffix.org/">Public Suffix List</a> (PSL). A 471 * registry suffix is one under which Internet users can directly register names via a domain name 472 * registrar, and have such registrations lawfully protected by internet-governing bodies such as 473 * ICANN. Examples of registry suffixes include {@code com}, {@code co.uk}, and {@code 474 * pvt.k12.wy.us}. Examples of domain names that are <i>not</i> registry suffixes include {@code 475 * google.com} and {@code foo.co.uk}. 476 * 477 * <p>Registry suffixes are a proper subset of {@linkplain #isPublicSuffix() public suffixes}. The 478 * list of public suffixes additionally contains privately owned domain names under which Internet 479 * users can register subdomains. An example of a public suffix that is not a registry suffix is 480 * {@code blogspot.com}. Note that it is true that all public suffixes <i>have</i> registry 481 * suffixes, since domain name registries collectively control all internet domain names. 482 * 483 * <p>For considerations on whether the public suffix or registry suffix designation is more 484 * suitable for your application, see <a 485 * href="https://github.com/google/guava/wiki/InternetDomainNameExplained">this article</a>. 486 * 487 * @return {@code true} if this domain name appears exactly on the public suffix list as part of 488 * the registry suffix section (labelled "ICANN"). 489 * @since 23.3 490 */ 491 public boolean isRegistrySuffix() { 492 return registrySuffixIndex() == 0; 493 } 494 495 /** 496 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 497 * including if it is a registry suffix itself. For example, returns {@code true} for {@code 498 * www.google.com}, {@code foo.co.uk} and {@code com}, but not for {@code invalid} or {@code 499 * google.invalid}. 500 * 501 * <p>Note that this method is equivalent to {@link #hasPublicSuffix()} because all registry 502 * suffixes are public suffixes <i>and</i> all public suffixes have registry suffixes. 503 * 504 * @since 23.3 505 */ 506 public boolean hasRegistrySuffix() { 507 return registrySuffixIndex() != NO_SUFFIX_FOUND; 508 } 509 510 /** 511 * Returns the {@linkplain #isRegistrySuffix() registry suffix} portion of the domain name, or 512 * {@code null} if no registry suffix is present. 513 * 514 * @since 23.3 515 */ 516 public @Nullable InternetDomainName registrySuffix() { 517 return hasRegistrySuffix() ? ancestor(registrySuffixIndex()) : null; 518 } 519 520 /** 521 * Indicates whether this domain name ends in a {@linkplain #isRegistrySuffix() registry suffix}, 522 * while not being a registry suffix itself. For example, returns {@code true} for {@code 523 * www.google.com}, {@code foo.co.uk} and {@code blogspot.com}, but not for {@code com}, {@code 524 * co.uk}, or {@code google.invalid}. 525 * 526 * @since 23.3 527 */ 528 public boolean isUnderRegistrySuffix() { 529 return registrySuffixIndex() > 0; 530 } 531 532 /** 533 * Indicates whether this domain name is composed of exactly one subdomain component followed by a 534 * {@linkplain #isRegistrySuffix() registry suffix}. For example, returns {@code true} for {@code 535 * google.com}, {@code foo.co.uk}, and {@code blogspot.com}, but not for {@code www.google.com}, 536 * {@code co.uk}, or {@code myblog.blogspot.com}. 537 * 538 * <p><b>Warning:</b> This method should not be used to determine the probable highest level 539 * parent domain for which cookies may be set. Use {@link #topPrivateDomain()} for that purpose. 540 * 541 * @since 23.3 542 */ 543 public boolean isTopDomainUnderRegistrySuffix() { 544 return registrySuffixIndex() == 1; 545 } 546 547 /** 548 * Returns the portion of this domain name that is one level beneath the {@linkplain 549 * #isRegistrySuffix() registry suffix}. For example, for {@code x.adwords.google.co.uk} it 550 * returns {@code google.co.uk}, since {@code co.uk} is a registry suffix. Similarly, for {@code 551 * myblog.blogspot.com} it returns {@code blogspot.com}, since {@code com} is a registry suffix. 552 * 553 * <p>If {@link #isTopDomainUnderRegistrySuffix()} is true, the current domain name instance is 554 * returned. 555 * 556 * <p><b>Warning:</b> This method should not be used to determine whether a domain is probably the 557 * highest level for which cookies may be set. Use {@link #isTopPrivateDomain()} for that purpose. 558 * 559 * @throws IllegalStateException if this domain does not end with a registry suffix 560 * @since 23.3 561 */ 562 public InternetDomainName topDomainUnderRegistrySuffix() { 563 if (isTopDomainUnderRegistrySuffix()) { 564 return this; 565 } 566 checkState(isUnderRegistrySuffix(), "Not under a registry suffix: %s", name); 567 return ancestor(registrySuffixIndex() - 1); 568 } 569 570 /** Indicates whether this domain is composed of two or more parts. */ 571 public boolean hasParent() { 572 return parts.size() > 1; 573 } 574 575 /** 576 * Returns an {@code InternetDomainName} that is the immediate ancestor of this one; that is, the 577 * current domain with the leftmost part removed. For example, the parent of {@code 578 * www.google.com} is {@code google.com}. 579 * 580 * @throws IllegalStateException if the domain has no parent, as determined by {@link #hasParent} 581 */ 582 public InternetDomainName parent() { 583 checkState(hasParent(), "Domain '%s' has no parent", name); 584 return ancestor(1); 585 } 586 587 /** 588 * Returns the ancestor of the current domain at the given number of levels "higher" (rightward) 589 * in the subdomain list. The number of levels must be non-negative, and less than {@code N-1}, 590 * where {@code N} is the number of parts in the domain. 591 * 592 * <p>TODO: Reasonable candidate for addition to public API. 593 */ 594 private InternetDomainName ancestor(int levels) { 595 ImmutableList<String> ancestorParts = parts.subList(levels, parts.size()); 596 597 // levels equals the number of dots that are getting clipped away, then add the length of each 598 // clipped part to get the length of the leading substring that is being removed. 599 int substringFrom = levels; 600 for (int i = 0; i < levels; i++) { 601 substringFrom += parts.get(i).length(); 602 } 603 String ancestorName = name.substring(substringFrom); 604 605 return new InternetDomainName(ancestorName, ancestorParts); 606 } 607 608 /** 609 * Creates and returns a new {@code InternetDomainName} by prepending the argument and a dot to 610 * the current name. For example, {@code InternetDomainName.from("foo.com").child("www.bar")} 611 * returns a new {@code InternetDomainName} with the value {@code www.bar.foo.com}. Only lenient 612 * validation is performed, as described {@link #from(String) here}. 613 * 614 * @throws NullPointerException if leftParts is null 615 * @throws IllegalArgumentException if the resulting name is not valid 616 */ 617 public InternetDomainName child(String leftParts) { 618 return from(checkNotNull(leftParts) + "." + name); 619 } 620 621 /** 622 * Indicates whether the argument is a syntactically valid domain name using lenient validation. 623 * Specifically, validation against <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> 624 * ("Internationalizing Domain Names in Applications") is skipped. 625 * 626 * <p>The following two code snippets are equivalent: 627 * 628 * <pre>{@code 629 * domainName = InternetDomainName.isValid(name) 630 * ? InternetDomainName.from(name) 631 * : DEFAULT_DOMAIN; 632 * }</pre> 633 * 634 * <pre>{@code 635 * try { 636 * domainName = InternetDomainName.from(name); 637 * } catch (IllegalArgumentException e) { 638 * domainName = DEFAULT_DOMAIN; 639 * } 640 * }</pre> 641 * 642 * @since 8.0 (previously named {@code isValidLenient}) 643 */ 644 public static boolean isValid(String name) { 645 try { 646 InternetDomainName unused = from(name); 647 return true; 648 } catch (IllegalArgumentException e) { 649 return false; 650 } 651 } 652 653 /** 654 * If a {@code desiredType} is specified, returns true only if the {@code actualType} is 655 * identical. Otherwise, returns true as long as {@code actualType} is present. 656 */ 657 private static boolean matchesType( 658 Optional<PublicSuffixType> desiredType, Optional<PublicSuffixType> actualType) { 659 return desiredType.isPresent() ? desiredType.equals(actualType) : actualType.isPresent(); 660 } 661 662 /** Returns the domain name, normalized to all lower case. */ 663 @Override 664 public String toString() { 665 return name; 666 } 667 668 /** 669 * Equality testing is based on the text supplied by the caller, after normalization as described 670 * in the class documentation. For example, a non-ASCII Unicode domain name and the Punycode 671 * version of the same domain name would not be considered equal. 672 */ 673 @Override 674 public boolean equals(@Nullable Object object) { 675 if (object == this) { 676 return true; 677 } 678 679 if (object instanceof InternetDomainName) { 680 InternetDomainName that = (InternetDomainName) object; 681 return this.name.equals(that.name); 682 } 683 684 return false; 685 } 686 687 @Override 688 public int hashCode() { 689 return name.hashCode(); 690 } 691}