Coverage for pycommons / net / url.py: 98%

86 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-24 03:11 +0000

1""" 

2A string class representing a URL. 

3 

4Like the class :class:`~pycommons.io.path.Path` in 

5:mod:`pycommons.io.path` does for paths in the file system, the class 

6:class:`~pycommons.net.url.URL` offers some sort of canonical and very 

7conservative representation of URLs, which, at the same time, is also a 

8string. 

9This makes it convenient pass the instances of this class into functions 

10that otherwise expect strings. 

11It also allows you to write functions that expect strings and URLs as 

12parameter. 

13 

14>>> u = URL("https://thomasweise.github.io/contact/#address-in-english") 

15>>> u 

16'https://thomasweise.github.io/contact/#address-in-english' 

17>>> u.scheme 

18'https' 

19>>> u.host 

20'thomasweise.github.io' 

21>>> u.path 

22'/contact/' 

23>>> u.fragment 

24'address-in-english' 

25 

26>>> u = URL("http://thomasweise.github.io/contact/") 

27>>> u 

28'http://thomasweise.github.io/contact' 

29>>> u.scheme 

30'http' 

31>>> u.host 

32'thomasweise.github.io' 

33>>> u.path 

34'/contact' 

35>>> print(u.fragment) 

36None 

37""" 

38 

39from re import Match, search 

40from re import compile as _compile 

41from typing import ( 

42 Any, 

43 Final, 

44 Pattern, 

45 cast, 

46) 

47from urllib.parse import ParseResult, urljoin, urlparse 

48 

49# noinspection PyPackageRequirements 

50from pycommons.strings.chars import WHITESPACE_OR_NEWLINE 

51from pycommons.types import check_int_range 

52 

53#: text that is forbidden in a URL 

54_FORBIDDEN_IN_RELATIVE_URL: Final[Pattern] = _compile( 

55 f"@.*@|[{WHITESPACE_OR_NEWLINE}" 

56 r"\\%*?&+\"'=$§!,;|<>\[\](){}²³°^]+|://.*://") 

57 

58#: text that is forbidden in a fully-expanded URL 

59_FORBIDDEN_IN_FULL_URL: Final[Pattern] = _compile( 

60 _FORBIDDEN_IN_RELATIVE_URL.pattern + r"|\.\.|\/\.+\/|\A\.+\Z") 

61 

62#: text that is forbidden in a fragment 

63_FORBIDDEN_IN_FRAGMENT: Final[Pattern] = _compile( 

64 _FORBIDDEN_IN_FULL_URL.pattern + r"|#") 

65 

66 

67def _check_url_part(part: Any, forbidden: Pattern) -> str: 

68 """ 

69 Check an url part. 

70 

71 :param part: the part 

72 :param forbidden: the pattern of forbidden text 

73 :returns: the url as str 

74 

75 >>> try: 

76 ... _check_url_part("", _FORBIDDEN_IN_RELATIVE_URL) 

77 ... except ValueError as ve: 

78 ... print(ve) 

79 URL part '' has invalid length 0. 

80 

81 >>> try: 

82 ... _check_url_part(" ", _FORBIDDEN_IN_RELATIVE_URL) 

83 ... except ValueError as ve: 

84 ... print(ve) 

85 URL part ' ' contains the forbidden text ' '. 

86 

87 >>> try: 

88 ... _check_url_part("Äquator", _FORBIDDEN_IN_RELATIVE_URL) 

89 ... except ValueError as ve: 

90 ... print(ve) 

91 URL part 'Äquator' contains non-ASCII characters. 

92 

93 >>> try: 

94 ... _check_url_part("2" * 260, _FORBIDDEN_IN_RELATIVE_URL) 

95 ... except ValueError as ve: 

96 ... print(str(ve)[:60]) 

97 URL part '22222222222222222222222222222222222222222222222222 

98 

99 >>> try: 

100 ... _check_url_part(None, _FORBIDDEN_IN_RELATIVE_URL) 

101 ... except TypeError as te: 

102 ... print(te) 

103 descriptor '__len__' requires a 'str' object but received a 'NoneType' 

104 

105 >>> try: 

106 ... _check_url_part(2, _FORBIDDEN_IN_RELATIVE_URL) 

107 ... except TypeError as te: 

108 ... print(te) 

109 descriptor '__len__' requires a 'str' object but received a 'int' 

110 

111 >>> isinstance(_check_url_part("123", _FORBIDDEN_IN_RELATIVE_URL), str) 

112 True 

113 

114 >>> try: 

115 ... _check_url_part(3, _FORBIDDEN_IN_RELATIVE_URL) 

116 ... except TypeError as te: 

117 ... print(te) 

118 descriptor '__len__' requires a 'str' object but received a 'int' 

119 

120 >>> try: 

121 ... _check_url_part("3", 5) 

122 ... except TypeError as te: 

123 ... print(te) 

124 first argument must be string or compiled pattern 

125 """ 

126 if not (0 < str.__len__(part) < 255): 

127 raise ValueError(f"URL part {part!r} has invalid length {len(part)}.") 

128 the_match: Final[Match | None] = search(forbidden, part) 

129 if the_match is not None: 

130 raise ValueError(f"URL part {part!r} contains the forbidden " 

131 f"text {the_match.group()!r}.") 

132 urlstr: Final[str] = cast("str", part) 

133 if not urlstr.isascii(): 

134 raise ValueError( 

135 f"URL part {urlstr!r} contains non-ASCII characters.") 

136 if urlstr.endswith(("#", "@")): 

137 raise ValueError( 

138 f"URL part must not end in {urlstr[-1]!r}, but {urlstr!r} does.") 

139 return urlstr 

140 

141 

142#: the mailto scheme 

143_MAILTO_1: Final[str] = "mailto" 

144#: the mailto prefix 

145_MAILTO_2: Final[str] = _MAILTO_1 + ":" 

146#: the mailto full prefix 

147_MAILTO_3: Final[str] = _MAILTO_2 + "//" 

148#: the ssh scheme 

149_SSH: Final[str] = "ssh" 

150 

151#: the schemes that require usernames 

152_REQUIRE_USER_NAME_SCHEMES: Final[set] = {_MAILTO_1, _SSH} 

153 

154#: the permitted URL schemes without '@' 

155_ALLOWED_SCHEMES: Final[set] = {"http", "https"}.union( 

156 _REQUIRE_USER_NAME_SCHEMES) 

157 

158 

159class URL(str): # noqa: SLOT000 

160 r""" 

161 A normalized and expanded URL. 

162 

163 This is a very strict URL parsing routine. The idea is that it will only 

164 produce URLs that are safe for use in almost any environment and throw 

165 exceptions otherwise. 

166 

167 We limit the URLs to very few different types and allowed schemes. 

168 Non-ASCII characters are not allowed, and neither are spaces, `'%'`, 

169 `'*'`, `'?'`, `'+'`, `'&'`, `'<'`, `'>'`, `','`, `'$'`, `'§'`, `"'"`, 

170 `'"'`, `'['`, `']'`, `'{'`, `'}'`, `'('`, `')'`, ` nor `'\'` and a few 

171 more. 

172 

173 We also allow `'@'` to occur at most once. This means that URLs cannot 

174 have any parameters and also that URL-escaping non-ASCII characters is not 

175 possible either. We thus limit the URLs to mainly static content pointers. 

176 

177 We also only permit simple schemes such as `http`, `https`, `mailto`, and 

178 `ssh`. 

179 

180 The final URL also cannot contain any `'/./'` or `'/../'` or consist of 

181 any component that equals `'..'`. No URL or component must be longer than 

182 255 characters either. It is also not allowed that `'://'` occurs twice. 

183 If the URL is a `mailto` or `ssh` URL, it must provide a username 

184 component. 

185 

186 If a port is provided, it must be greater than 0 and less than 65536. 

187 If a port is specified, a host must be specified as well. 

188 Only if a netloc is found, then a port or a host may be specified. 

189 

190 The URL `value` may be a relative URL that is turned into an absolute URL 

191 using the base URL `base_url`. Of course, then the same restrictions apply 

192 to the relative original URL, the base URL, and the final absolute URL. 

193 

194 This function tries to detect email addresses and turns them into valid 

195 `mailto://` urls. 

196 This function gobbles up single trailing `/` characters. 

197 

198 An instance of `URL` is also an instance of :class:`str`, so you can use 

199 it as string whereever you want. It additionally offers the following 

200 attributes: 

201 

202 - :attr:`~URL.scheme`: the URL scheme, e.g., `"http"` 

203 - :attr:`~URL.netloc`: the URL network location, including user (if any), 

204 host, and port (if any) 

205 - :attr:`~URL.host`: the host of the URL 

206 - :attr:`~URL.port`: the port of the URL, or `None` if no port is 

207 specified 

208 - :attr:`~URL.path`: the path part of the URL (without the 

209 :attr:`~URL.fragment` part, if any), or `None` if no path part is 

210 specified 

211 - :attr:`~URL.fragment`: the fragment part of the path, or `None` if the 

212 path has no fragment 

213 

214 

215 >>> u1 = URL("mailto:tweise@hfuu.edu.cn") 

216 >>> print(u1) 

217 mailto://tweise@hfuu.edu.cn 

218 >>> print(u1.scheme) 

219 mailto 

220 >>> print(u1.netloc) 

221 tweise@hfuu.edu.cn 

222 >>> print(u1.host) 

223 hfuu.edu.cn 

224 >>> print(u1.port) 

225 None 

226 >>> print(u1.path) 

227 None 

228 >>> print(u1.fragment) 

229 None 

230 

231 >>> u = URL("tweise@hfuu.edu.cn") 

232 >>> print(u) 

233 mailto://tweise@hfuu.edu.cn 

234 >>> print(u.scheme) 

235 mailto 

236 >>> print(u.netloc) 

237 tweise@hfuu.edu.cn 

238 >>> print(u.host) 

239 hfuu.edu.cn 

240 >>> print(u.port) 

241 None 

242 >>> print(u.path) 

243 None 

244 >>> print(u.fragment) 

245 None 

246 

247 >>> URL("mailto://tweise@hfuu.edu.cn") 

248 'mailto://tweise@hfuu.edu.cn' 

249 

250 >>> u2 = URL("https://example.com/abc") 

251 >>> print(u2) 

252 https://example.com/abc 

253 >>> print(u2.scheme) 

254 https 

255 >>> print(u2.netloc) 

256 example.com 

257 >>> print(u2.host) 

258 example.com 

259 >>> print(u2.port) 

260 None 

261 >>> print(u2.path) 

262 /abc 

263 >>> print(u2.fragment) 

264 None 

265 >>> u1.host != u2.host 

266 True 

267 

268 >>> u = URL("https://example.com/abc/") 

269 >>> print(u) 

270 https://example.com/abc 

271 >>> print(u.scheme) 

272 https 

273 >>> print(u.netloc) 

274 example.com 

275 >>> print(u.host) 

276 example.com 

277 >>> print(u.port) 

278 None 

279 >>> print(u.path) 

280 /abc 

281 >>> print(u.fragment) 

282 None 

283 

284 >>> u = URL("https://example.com/") 

285 >>> print(u) 

286 https://example.com 

287 >>> print(u.scheme) 

288 https 

289 >>> print(u.netloc) 

290 example.com 

291 >>> print(u.host) 

292 example.com 

293 >>> print(u.port) 

294 None 

295 >>> print(u.path) 

296 None 

297 >>> print(u.fragment) 

298 None 

299 

300 >>> u = URL("ssh://git@example.com/abc") 

301 >>> print(u) 

302 ssh://git@example.com/abc 

303 >>> print(u.scheme) 

304 ssh 

305 >>> print(u.netloc) 

306 git@example.com 

307 >>> print(u.host) 

308 example.com 

309 >>> print(u.port) 

310 None 

311 >>> print(u.path) 

312 /abc 

313 >>> print(u.fragment) 

314 None 

315 

316 >>> URL("1.txt", "http://example.com/thomasWeise") 

317 'http://example.com/1.txt' 

318 

319 >>> URL("1.txt", "http://example.com/thomasWeise/") 

320 'http://example.com/thomasWeise/1.txt' 

321 

322 >>> URL("../1.txt", "http://example.com/thomasWeise/") 

323 'http://example.com/1.txt' 

324 

325 >>> URL("https://example.com/1.txt", 

326 ... "http://github.com/thomasWeise/") 

327 'https://example.com/1.txt' 

328 

329 >>> URL("http://example.com:123/1") 

330 'http://example.com:123/1' 

331 

332 >>> u = URL("http://example.com:34/index.html#1") 

333 >>> print(u) 

334 http://example.com:34/index.html#1 

335 >>> print(u.scheme) 

336 http 

337 >>> print(u.netloc) 

338 example.com:34 

339 >>> print(u.host) 

340 example.com 

341 >>> print(u.port) 

342 34 

343 >>> print(u.path) 

344 /index.html 

345 >>> print(u.fragment) 

346 1 

347 

348 >>> try: 

349 ... URL("tweise@@hfuu.edu.cn") 

350 ... except ValueError as ve: 

351 ... print(ve) 

352 URL part 'tweise@@hfuu.edu.cn' contains the forbidden text '@@'. 

353 

354 >>> try: 

355 ... URL("http://example.com/index.html#") 

356 ... except ValueError as ve: 

357 ... print(ve) 

358 URL part must not end in '#', but 'http://example.com/index.html#' does. 

359 

360 >>> try: 

361 ... URL("http://example.com/index.html@") 

362 ... except ValueError as ve: 

363 ... print(ve) 

364 URL part must not end in '@', but 'http://example.com/index.html@' does. 

365 

366 >>> try: 

367 ... URL("https://example.com/abc(/23") 

368 ... except ValueError as ve: 

369 ... print(ve) 

370 URL part 'https://example.com/abc(/23' contains the forbidden text '('. 

371 

372 >>> try: 

373 ... URL("https://example.com/abc]/23") 

374 ... except ValueError as ve: 

375 ... print(ve) 

376 URL part 'https://example.com/abc]/23' contains the forbidden text ']'. 

377 

378 >>> try: 

379 ... URL("https://example.com/abcä/23") 

380 ... except ValueError as ve: 

381 ... print(ve) 

382 URL part 'https://example.com/abcä/23' contains non-ASCII characters. 

383 

384 >>> try: 

385 ... URL("https://example.com/abc/./23") 

386 ... except ValueError as ve: 

387 ... print(ve) 

388 URL part 'https://example.com/abc/./23' contains the forbidden text '/./'. 

389 

390 >>> try: 

391 ... URL("https://example.com/abc/../1.txt") 

392 ... except ValueError as ve: 

393 ... print(str(ve)[:-4]) 

394 URL part 'https://example.com/abc/../1.txt' contains the forbidden text '/. 

395 

396 >>> try: 

397 ... URL(r"https://example.com/abc\./23") 

398 ... except ValueError as ve: 

399 ... print(ve) 

400 URL part 'https://example.com/abc\\./23' contains the forbidden text '\\'. 

401 

402 >>> try: 

403 ... URL("https://1.2.com/abc/23/../r") 

404 ... except ValueError as ve: 

405 ... print(ve) 

406 URL part 'https://1.2.com/abc/23/../r' contains the forbidden text '/../'. 

407 

408 >>> try: 

409 ... URL("https://exa mple.com") 

410 ... except ValueError as ve: 

411 ... print(ve) 

412 URL part 'https://exa mple.com' contains the forbidden text ' '. 

413 

414 >>> try: 

415 ... URL("ftp://example.com") 

416 ... except ValueError as ve: 

417 ... print(str(ve)[:66]) 

418 Invalid scheme 'ftp' of url 'ftp://example.com' under base None, o 

419 

420 >>> try: 

421 ... URL("http://example.com%32") 

422 ... except ValueError as ve: 

423 ... print(str(ve)) 

424 URL part 'http://example.com%32' contains the forbidden text '%'. 

425 

426 >>> try: 

427 ... URL("mailto://example.com") 

428 ... except ValueError as ve: 

429 ... print(str(ve)[:66]) 

430 'mailto' url 'mailto://example.com' must contain '@' and have user 

431 

432 >>> try: 

433 ... URL("ssh://example.com") 

434 ... except ValueError as ve: 

435 ... print(str(ve)[:65]) 

436 'ssh' url 'ssh://example.com' must contain '@' and have username, 

437 

438 >>> try: 

439 ... URL("ftp://example.com*32") 

440 ... except ValueError as ve: 

441 ... print(str(ve)) 

442 URL part 'ftp://example.com*32' contains the forbidden text '*'. 

443 

444 >>> try: 

445 ... URL("http://example.com/https://h") 

446 ... except ValueError as ve: 

447 ... print(str(ve)[:74]) 

448 URL part 'http://example.com/https://h' contains the forbidden text '://ex 

449 

450 >>> try: 

451 ... URL("http://user@example.com") 

452 ... except ValueError as ve: 

453 ... print(str(ve)[:66]) 

454 'http' url 'http://user@example.com' must not contain '@' and have 

455 

456 >>> try: 

457 ... URL("http://" + ("a" * 250)) 

458 ... except ValueError as ve: 

459 ... print(str(ve)[-30:]) 

460 aaaaa' has invalid length 257. 

461 

462 >>> try: 

463 ... URL("http://.") 

464 ... except ValueError as ve: 

465 ... print(ve) 

466 URL part '.' contains the forbidden text '.'. 

467 

468 >>> try: 

469 ... URL("http://..") 

470 ... except ValueError as ve: 

471 ... print(ve) 

472 URL part 'http://..' contains the forbidden text '..'. 

473 

474 >>> try: 

475 ... URL("http://www.example.com/../1") 

476 ... except ValueError as ve: 

477 ... print(ve) 

478 URL part 'http://www.example.com/../1' contains the forbidden text '/../'. 

479 

480 >>> try: 

481 ... URL("http://www.example.com/./1") 

482 ... except ValueError as ve: 

483 ... print(ve) 

484 URL part 'http://www.example.com/./1' contains the forbidden text '/./'. 

485 

486 >>> try: 

487 ... URL("http://user@example.com/@1") 

488 ... except ValueError as ve: 

489 ... print(str(ve)[:-9]) 

490 URL part 'http://user@example.com/@1' contains the forbidden text '@exampl 

491 

492 >>> try: 

493 ... URL("http://:45/1.txt") 

494 ... except ValueError as ve: 

495 ... print(ve) 

496 URL 'http://:45/1.txt' has no host? 

497 

498 >>> try: 

499 ... URL("http://example.com:-3/@1") 

500 ... except ValueError as ve: 

501 ... print(ve) 

502 Port could not be cast to integer value as '-3' 

503 

504 >>> try: 

505 ... URL("http://example.com:0/@1") 

506 ... except ValueError as ve: 

507 ... print(ve) 

508 port=0 is invalid, must be in 1..65535. 

509 

510 >>> try: 

511 ... URL("http://example.com:65536/@1") 

512 ... except ValueError as ve: 

513 ... print(ve) 

514 Port out of range 0-65535 

515 

516 >>> try: 

517 ... URL(1) 

518 ... except TypeError as te: 

519 ... print(te) 

520 descriptor '__len__' requires a 'str' object but received a 'int' 

521 

522 >>> try: 

523 ... URL(None) 

524 ... except TypeError as te: 

525 ... print(te) 

526 descriptor '__len__' requires a 'str' object but received a 'NoneType' 

527 

528 >>> try: 

529 ... URL("http::/1.txt", 1) 

530 ... except TypeError as te: 

531 ... print(te) 

532 descriptor '__len__' requires a 'str' object but received a 'int' 

533 

534 >>> try: 

535 ... URL("http::/1.txt?x=1") 

536 ... except ValueError as ve: 

537 ... print(ve) 

538 URL part 'http::/1.txt?x=1' contains the forbidden text '?'. 

539 

540 >>> try: 

541 ... URL("http::/1.txt&x=1") 

542 ... except ValueError as ve: 

543 ... print(ve) 

544 URL part 'http::/1.txt&x=1' contains the forbidden text '&'. 

545 

546 >>> try: 

547 ... URL("http::/1.+txt&x=1") 

548 ... except ValueError as ve: 

549 ... print(ve) 

550 URL part 'http::/1.+txt&x=1' contains the forbidden text '+'. 

551 

552 >>> try: 

553 ... URL("http::/1*.+txt&x=1") 

554 ... except ValueError as ve: 

555 ... print(ve) 

556 URL part 'http::/1*.+txt&x=1' contains the forbidden text '*'. 

557 

558 >>> try: 

559 ... URL("http://example.com#1#2") 

560 ... except ValueError as ve: 

561 ... print(ve) 

562 URL part '1#2' contains the forbidden text '#'. 

563 """ 

564 

565 #: the protocol scheme, e.g., `"https"` 

566 scheme: Final[str] # type: ignore 

567 #: the network location, usually of the form `"user@host:port"`, i.e., 

568 #: composed of user name (if present), host, and port (if present) 

569 netloc: Final[str] # type: ignore 

570 #: the host str 

571 host: Final[str] # type: ignore 

572 #: the port, if any (else `None`) 

573 port: Final[int | None] # type: ignore 

574 #: the path, if any (else `None`), but without the fragment component 

575 path: Final[str | None] # type: ignore 

576 #: the path fragment, i.e., the part following a `"#"`, if any (else 

577 #: `None`) 

578 fragment: Final[str | None] # type: ignore 

579 

580 def __new__(cls, value: Any, base_url: Any | None = None): 

581 """ 

582 Create the URL. 

583 

584 :param value: either the full absolute URL or a URL that should be 

585 resolved against the URL `base_url` 

586 :param base_url: the base URL to resolve `value` against, or `None` if 

587 `value` is already an absolute URL 

588 """ 

589 if isinstance(value, URL): 

590 return cast("URL", value) 

591 

592 url: str = _check_url_part( 

593 value, _FORBIDDEN_IN_FULL_URL if base_url is None 

594 else _FORBIDDEN_IN_RELATIVE_URL) 

595 if base_url is not None: 

596 url = _check_url_part(urljoin(_check_url_part( 

597 base_url, _FORBIDDEN_IN_FULL_URL), url), 

598 _FORBIDDEN_IN_FULL_URL) 

599 

600 url = url.removesuffix("/") 

601 

602 # normalize mailto URLs that do not contain // 

603 is_mailto: bool = url.startswith(_MAILTO_2) 

604 if is_mailto and (not url.startswith(_MAILTO_3)): 

605 url = _MAILTO_3 + url[str.__len__(_MAILTO_2):] 

606 

607 res: ParseResult = urlparse(url) 

608 scheme: str | None = res.scheme 

609 if ((scheme is None) or (str.__len__(scheme) == 0)) and ( 

610 url.count("@") == 1): 

611 res = urlparse(_MAILTO_3 + url) 

612 scheme = res.scheme 

613 is_mailto = True 

614 scheme = _check_url_part(scheme, _FORBIDDEN_IN_FULL_URL) 

615 

616 if scheme not in _ALLOWED_SCHEMES: 

617 raise ValueError( 

618 f"Invalid scheme {scheme!r} of url {url!r} under base " 

619 f"{base_url!r}, only {_ALLOWED_SCHEMES!r} are " 

620 "permitted.") 

621 

622 netloc: Final[str] = _check_url_part( 

623 res.netloc, _FORBIDDEN_IN_FULL_URL) 

624 

625 host: Final[str] = res.hostname 

626 if host is None: 

627 raise ValueError(f"URL {url!r} has no host?") 

628 _check_url_part(host, _FORBIDDEN_IN_FULL_URL) 

629 port: Final[int | None] = res.port 

630 if port is not None: 

631 check_int_range(port, "port", 1, 65535) 

632 

633 path: str | None = res.path 

634 if str.__len__(path) > 0: 

635 _check_url_part(path, _FORBIDDEN_IN_FULL_URL) 

636 else: 

637 path = None 

638 

639 if is_mailto != (scheme == _MAILTO_1): # this should be impossible 

640 raise ValueError(f"url {url!r} has scheme {scheme!r}?") 

641 requires_at: Final[bool] = is_mailto or ( 

642 scheme in _REQUIRE_USER_NAME_SCHEMES) 

643 has_at: Final[bool] = "@" in netloc 

644 has_user: Final[bool] = (res.username is not None) and ( 

645 str.__len__(res.username) > 0) 

646 if requires_at != (has_at and has_user): 

647 raise ValueError( 

648 f"{scheme!r} url {url!r} must {'' if requires_at else 'not '}" 

649 f"contain '@' and have username, but got " 

650 f"{'@' if has_at else 'no @'} and " 

651 f"{repr(res.username) if has_user else 'no username'}.") 

652 

653 if ((str.__len__(res.query) != 0) or (str.__len__(res.params) != 0) 

654 or (res.password is not None)): 

655 # should be impossible, as our regex check already picks this up 

656 raise ValueError( 

657 f"Query/parameters/password found in url {url!r}.") 

658 

659 fragment: str | None = res.fragment 

660 if str.__len__(fragment) <= 0: 

661 fragment = None 

662 else: 

663 _check_url_part(fragment, _FORBIDDEN_IN_FRAGMENT) 

664 

665 result = super().__new__(cls, _check_url_part( 

666 res.geturl(), _FORBIDDEN_IN_FULL_URL)) 

667 

668 #: the protocol scheme 

669 result.scheme: Final[str] = scheme # type: ignore 

670 #: the network location: user@host:port 

671 result.netloc: Final[str] = netloc # type: ignore 

672 #: the host 

673 result.host: Final[str] = host # type: ignore 

674 #: the port, if any (else `None`) 

675 result.port: Final[int | None] = port # type: ignore 

676 #: the path, if any (else `None`) 

677 result.path: Final[str | None] = path # type: ignore 

678 #: the path fragment, if any (else `None`) 

679 result.fragment: Final[str | None] = fragment # type: ignore 

680 return result