httputil.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392
  1. #
  2. # Copyright 2009 Facebook
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. """HTTP utility code shared by clients and servers.
  16. This module also defines the `HTTPServerRequest` class which is exposed
  17. via `tornado.web.RequestHandler.request`.
  18. """
  19. import calendar
  20. import collections.abc
  21. import copy
  22. import dataclasses
  23. import datetime
  24. import email.utils
  25. from functools import lru_cache
  26. from http.client import responses
  27. import http.cookies
  28. import re
  29. from ssl import SSLError
  30. import time
  31. import unicodedata
  32. from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
  33. from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
  34. from tornado.util import ObjectDict, unicode_type
  35. # responses is unused in this file, but we re-export it to other files.
  36. # Reference it so pyflakes doesn't complain.
  37. responses
  38. import typing
  39. from typing import (
  40. Tuple,
  41. Iterable,
  42. List,
  43. Mapping,
  44. Iterator,
  45. Dict,
  46. Union,
  47. Optional,
  48. Awaitable,
  49. Generator,
  50. AnyStr,
  51. )
  52. if typing.TYPE_CHECKING:
  53. from typing import Deque # noqa: F401
  54. from asyncio import Future # noqa: F401
  55. import unittest # noqa: F401
  56. # This can be done unconditionally in the base class of HTTPHeaders
  57. # after we drop support for Python 3.8.
  58. StrMutableMapping = collections.abc.MutableMapping[str, str]
  59. else:
  60. StrMutableMapping = collections.abc.MutableMapping
  61. # To be used with str.strip() and related methods.
  62. HTTP_WHITESPACE = " \t"
  63. # Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
  64. # chars greater than \xFF (which may appear after decoding utf8).
  65. _FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0A-\x1F\x7F]")
  66. class _ABNF:
  67. """Class that holds a subset of ABNF rules from RFC 9110 and friends.
  68. Class attributes are re.Pattern objects, with the same name as in the RFC
  69. (with hyphens changed to underscores). Currently contains only the subset
  70. we use (which is why this class is not public). Unfortunately the fields
  71. cannot be alphabetized as they are in the RFCs because of dependencies.
  72. """
  73. # RFC 3986 (URI)
  74. # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
  75. # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
  76. # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
  77. # position, not only for their use in IPv6 literals.
  78. uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
  79. uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
  80. uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
  81. uri_host = re.compile(
  82. rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
  83. )
  84. uri_port = re.compile(r"[0-9]*")
  85. # RFC 5234 (ABNF)
  86. VCHAR = re.compile(r"[\x21-\x7E]")
  87. # RFC 9110 (HTTP Semantics)
  88. obs_text = re.compile(r"[\x80-\xFF]")
  89. field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
  90. # Not exactly from the RFC to simplify and combine field-content and field-value.
  91. field_value = re.compile(
  92. rf"|"
  93. rf"{field_vchar.pattern}|"
  94. rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
  95. )
  96. tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
  97. token = re.compile(rf"{tchar.pattern}+")
  98. field_name = token
  99. method = token
  100. host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
  101. # RFC 9112 (HTTP/1.1)
  102. HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
  103. reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
  104. # request_target delegates to the URI RFC 3986, which is complex and may be
  105. # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
  106. # characters). Instead, we allow everything but control chars and whitespace.
  107. request_target = re.compile(rf"{field_vchar.pattern}+")
  108. request_line = re.compile(
  109. rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
  110. )
  111. status_code = re.compile(r"[0-9]{3}")
  112. status_line = re.compile(
  113. rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
  114. )
  115. @lru_cache(1000)
  116. def _normalize_header(name: str) -> str:
  117. """Map a header name to Http-Header-Case.
  118. >>> _normalize_header("coNtent-TYPE")
  119. 'Content-Type'
  120. """
  121. return "-".join([w.capitalize() for w in name.split("-")])
  122. class HTTPHeaders(StrMutableMapping):
  123. """A dictionary that maintains ``Http-Header-Case`` for all keys.
  124. Supports multiple values per key via a pair of new methods,
  125. `add()` and `get_list()`. The regular dictionary interface
  126. returns a single value per key, with multiple values joined by a
  127. comma.
  128. >>> h = HTTPHeaders({"content-type": "text/html"})
  129. >>> list(h.keys())
  130. ['Content-Type']
  131. >>> h["Content-Type"]
  132. 'text/html'
  133. >>> h.add("Set-Cookie", "A=B")
  134. >>> h.add("Set-Cookie", "C=D")
  135. >>> h["set-cookie"]
  136. 'A=B,C=D'
  137. >>> h.get_list("set-cookie")
  138. ['A=B', 'C=D']
  139. >>> for (k,v) in sorted(h.get_all()):
  140. ... print('%s: %s' % (k,v))
  141. ...
  142. Content-Type: text/html
  143. Set-Cookie: A=B
  144. Set-Cookie: C=D
  145. """
  146. @typing.overload
  147. def __init__(self, __arg: Mapping[str, List[str]]) -> None:
  148. pass
  149. @typing.overload # noqa: F811
  150. def __init__(self, __arg: Mapping[str, str]) -> None:
  151. pass
  152. @typing.overload # noqa: F811
  153. def __init__(self, *args: Tuple[str, str]) -> None:
  154. pass
  155. @typing.overload # noqa: F811
  156. def __init__(self, **kwargs: str) -> None:
  157. pass
  158. def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
  159. # Formally, HTTP headers are a mapping from a field name to a "combined field value",
  160. # which may be constructed from multiple field lines by joining them with commas.
  161. # In practice, however, some headers (notably Set-Cookie) do not follow this convention,
  162. # so we maintain a mapping from field name to a list of field lines in self._as_list.
  163. # self._combined_cache is a cache of the combined field values derived from self._as_list
  164. # on demand (and cleared whenever the list is modified).
  165. self._as_list: dict[str, list[str]] = {}
  166. self._combined_cache: dict[str, str] = {}
  167. self._last_key = None # type: Optional[str]
  168. if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
  169. # Copy constructor
  170. for k, v in args[0].get_all():
  171. self.add(k, v)
  172. else:
  173. # Dict-style initialization
  174. self.update(*args, **kwargs)
  175. # new public methods
  176. def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:
  177. """Adds a new value for the given key."""
  178. if not _ABNF.field_name.fullmatch(name):
  179. raise HTTPInputError("Invalid header name %r" % name)
  180. if _chars_are_bytes:
  181. if not _ABNF.field_value.fullmatch(to_unicode(value)):
  182. # TODO: the fact we still support bytes here (contrary to type annotations)
  183. # and still test for it should probably be changed.
  184. raise HTTPInputError("Invalid header value %r" % value)
  185. else:
  186. if _FORBIDDEN_HEADER_CHARS_RE.search(value):
  187. raise HTTPInputError("Invalid header value %r" % value)
  188. norm_name = _normalize_header(name)
  189. self._last_key = norm_name
  190. if norm_name in self:
  191. self._combined_cache.pop(norm_name, None)
  192. self._as_list[norm_name].append(value)
  193. else:
  194. self[norm_name] = value
  195. def get_list(self, name: str) -> List[str]:
  196. """Returns all values for the given header as a list."""
  197. norm_name = _normalize_header(name)
  198. return self._as_list.get(norm_name, [])
  199. def get_all(self) -> Iterable[Tuple[str, str]]:
  200. """Returns an iterable of all (name, value) pairs.
  201. If a header has multiple values, multiple pairs will be
  202. returned with the same name.
  203. """
  204. for name, values in self._as_list.items():
  205. for value in values:
  206. yield (name, value)
  207. def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:
  208. r"""Updates the dictionary with a single header line.
  209. >>> h = HTTPHeaders()
  210. >>> h.parse_line("Content-Type: text/html")
  211. >>> h.get('content-type')
  212. 'text/html'
  213. >>> h.parse_line("Content-Length: 42\r\n")
  214. >>> h.get('content-type')
  215. 'text/html'
  216. .. versionchanged:: 6.5
  217. Now supports lines with or without the trailing CRLF, making it possible
  218. to pass lines from AsyncHTTPClient's header_callback directly to this method.
  219. .. deprecated:: 6.5
  220. In Tornado 7.0, certain deprecated features of HTTP will become errors.
  221. Specifically, line folding and the use of LF (with CR) as a line separator
  222. will be removed.
  223. """
  224. if m := re.search(r"\r?\n$", line):
  225. # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
  226. # terminator and ignore any preceding CR.
  227. # TODO(7.0): Remove this support for LF-only line endings.
  228. line = line[: m.start()]
  229. if not line:
  230. # Empty line, or the final CRLF of a header block.
  231. return
  232. if line[0] in HTTP_WHITESPACE:
  233. # continuation of a multi-line header
  234. # TODO(7.0): Remove support for line folding.
  235. if self._last_key is None:
  236. raise HTTPInputError("first header line cannot start with whitespace")
  237. new_part = " " + line.strip(HTTP_WHITESPACE)
  238. if _chars_are_bytes:
  239. if not _ABNF.field_value.fullmatch(new_part[1:]):
  240. raise HTTPInputError("Invalid header continuation %r" % new_part)
  241. else:
  242. if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
  243. raise HTTPInputError("Invalid header value %r" % new_part)
  244. self._as_list[self._last_key][-1] += new_part
  245. self._combined_cache.pop(self._last_key, None)
  246. else:
  247. try:
  248. name, value = line.split(":", 1)
  249. except ValueError:
  250. raise HTTPInputError("no colon in header line")
  251. self.add(
  252. name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes
  253. )
  254. @classmethod
  255. def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":
  256. """Returns a dictionary from HTTP header text.
  257. >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
  258. >>> sorted(h.items())
  259. [('Content-Length', '42'), ('Content-Type', 'text/html')]
  260. .. versionchanged:: 5.1
  261. Raises `HTTPInputError` on malformed headers instead of a
  262. mix of `KeyError`, and `ValueError`.
  263. """
  264. # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which
  265. # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which
  266. # they are to be interpreted as utf-8). For historical reasons, this method handled this by
  267. # expecting both callers to decode the headers to strings before parsing them. This wasn't a
  268. # problem until we started doing stricter validation of the characters allowed in HTTP
  269. # headers (using ABNF rules defined in terms of byte values), which inadvertently started
  270. # disallowing non-latin1 characters in multipart/form-data filenames.
  271. #
  272. # This method should have accepted bytes and a desired encoding, but this change is being
  273. # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes
  274. # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes
  275. # smuggled through the latin1 encoding) or to accept any non-control unicode characters
  276. # as required by multipart/form-data. This method will change to accept bytes in a future
  277. # release.
  278. h = cls()
  279. start = 0
  280. while True:
  281. lf = headers.find("\n", start)
  282. if lf == -1:
  283. h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)
  284. break
  285. line = headers[start : lf + 1]
  286. start = lf + 1
  287. h.parse_line(line, _chars_are_bytes=_chars_are_bytes)
  288. return h
  289. # MutableMapping abstract method implementations.
  290. def __setitem__(self, name: str, value: str) -> None:
  291. norm_name = _normalize_header(name)
  292. self._combined_cache[norm_name] = value
  293. self._as_list[norm_name] = [value]
  294. def __contains__(self, name: object) -> bool:
  295. # This is an important optimization to avoid the expensive concatenation
  296. # in __getitem__ when it's not needed.
  297. if not isinstance(name, str):
  298. return False
  299. norm_name = _normalize_header(name)
  300. return norm_name in self._as_list
  301. def __getitem__(self, name: str) -> str:
  302. header = _normalize_header(name)
  303. if header not in self._combined_cache:
  304. self._combined_cache[header] = ",".join(self._as_list[header])
  305. return self._combined_cache[header]
  306. def __delitem__(self, name: str) -> None:
  307. norm_name = _normalize_header(name)
  308. del self._combined_cache[norm_name]
  309. del self._as_list[norm_name]
  310. def __len__(self) -> int:
  311. return len(self._as_list)
  312. def __iter__(self) -> Iterator[typing.Any]:
  313. return iter(self._as_list)
  314. def copy(self) -> "HTTPHeaders":
  315. # defined in dict but not in MutableMapping.
  316. return HTTPHeaders(self)
  317. # Use our overridden copy method for the copy.copy module.
  318. # This makes shallow copies one level deeper, but preserves
  319. # the appearance that HTTPHeaders is a single container.
  320. __copy__ = copy
  321. def __str__(self) -> str:
  322. lines = []
  323. for name, value in self.get_all():
  324. lines.append(f"{name}: {value}\n")
  325. return "".join(lines)
  326. __unicode__ = __str__
  327. class HTTPServerRequest:
  328. """A single HTTP request.
  329. All attributes are type `str` unless otherwise noted.
  330. .. attribute:: method
  331. HTTP request method, e.g. "GET" or "POST"
  332. .. attribute:: uri
  333. The requested uri.
  334. .. attribute:: path
  335. The path portion of `uri`
  336. .. attribute:: query
  337. The query portion of `uri`
  338. .. attribute:: version
  339. HTTP version specified in request, e.g. "HTTP/1.1"
  340. .. attribute:: headers
  341. `.HTTPHeaders` dictionary-like object for request headers. Acts like
  342. a case-insensitive dictionary with additional methods for repeated
  343. headers.
  344. .. attribute:: body
  345. Request body, if present, as a byte string.
  346. .. attribute:: remote_ip
  347. Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
  348. will pass along the real IP address provided by a load balancer
  349. in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
  350. .. versionchanged:: 3.1
  351. The list format of ``X-Forwarded-For`` is now supported.
  352. .. attribute:: protocol
  353. The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
  354. is set, will pass along the protocol used by a load balancer if
  355. reported via an ``X-Scheme`` header.
  356. .. attribute:: host
  357. The requested hostname, usually taken from the ``Host`` header.
  358. .. attribute:: arguments
  359. GET/POST arguments are available in the arguments property, which
  360. maps arguments names to lists of values (to support multiple values
  361. for individual names). Names are of type `str`, while arguments
  362. are byte strings. Note that this is different from
  363. `.RequestHandler.get_argument`, which returns argument values as
  364. unicode strings.
  365. .. attribute:: query_arguments
  366. Same format as ``arguments``, but contains only arguments extracted
  367. from the query string.
  368. .. versionadded:: 3.2
  369. .. attribute:: body_arguments
  370. Same format as ``arguments``, but contains only arguments extracted
  371. from the request body.
  372. .. versionadded:: 3.2
  373. .. attribute:: files
  374. File uploads are available in the files property, which maps file
  375. names to lists of `.HTTPFile`.
  376. .. attribute:: connection
  377. An HTTP request is attached to a single HTTP connection, which can
  378. be accessed through the "connection" attribute. Since connections
  379. are typically kept open in HTTP/1.1, multiple requests can be handled
  380. sequentially on a single connection.
  381. .. versionchanged:: 4.0
  382. Moved from ``tornado.httpserver.HTTPRequest``.
  383. .. deprecated:: 6.5.2
  384. The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use
  385. ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and
  386. temporarily restored in 6.5.2.
  387. """
  388. path = None # type: str
  389. query = None # type: str
  390. # HACK: Used for stream_request_body
  391. _body_future = None # type: Future[None]
  392. def __init__(
  393. self,
  394. method: Optional[str] = None,
  395. uri: Optional[str] = None,
  396. version: str = "HTTP/1.0",
  397. headers: Optional[HTTPHeaders] = None,
  398. body: Optional[bytes] = None,
  399. host: Optional[str] = None,
  400. files: Optional[Dict[str, List["HTTPFile"]]] = None,
  401. connection: Optional["HTTPConnection"] = None,
  402. start_line: Optional["RequestStartLine"] = None,
  403. server_connection: Optional[object] = None,
  404. ) -> None:
  405. if start_line is not None:
  406. method, uri, version = start_line
  407. self.method = method
  408. self.uri = uri
  409. self.version = version
  410. self.headers = headers or HTTPHeaders()
  411. self.body = body or b""
  412. # set remote IP and protocol
  413. context = getattr(connection, "context", None)
  414. self.remote_ip = getattr(context, "remote_ip", None)
  415. self.protocol = getattr(context, "protocol", "http")
  416. try:
  417. self.host = host or self.headers["Host"]
  418. except KeyError:
  419. if version == "HTTP/1.0":
  420. # HTTP/1.0 does not require the Host header.
  421. self.host = "127.0.0.1"
  422. else:
  423. raise HTTPInputError("Missing Host header")
  424. if not _ABNF.host.fullmatch(self.host):
  425. raise HTTPInputError("Invalid Host header: %r" % self.host)
  426. if "," in self.host:
  427. # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
  428. # Server MUST respond with 400 Bad Request if multiple
  429. # Host headers are present.
  430. #
  431. # We test for the presence of a comma instead of the number of
  432. # headers received because a proxy may have converted
  433. # multiple headers into a single comma-separated value
  434. # (per RFC 9110 section 5.3).
  435. #
  436. # This is technically a departure from the RFC since the ABNF
  437. # does not forbid commas in the host header. However, since
  438. # commas are not allowed in DNS names, it is appropriate to
  439. # disallow them. (The same argument could be made for other special
  440. # characters, but commas are the most problematic since they could
  441. # be used to exploit differences between proxies when multiple headers
  442. # are supplied).
  443. raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
  444. self.host_name = split_host_and_port(self.host.lower())[0]
  445. self.files = files or {}
  446. self.connection = connection
  447. self.server_connection = server_connection
  448. self._start_time = time.time()
  449. self._finish_time = None
  450. if uri is not None:
  451. self.path, sep, self.query = uri.partition("?")
  452. self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
  453. self.query_arguments = copy.deepcopy(self.arguments)
  454. self.body_arguments = {} # type: Dict[str, List[bytes]]
  455. @property
  456. def cookies(self) -> Dict[str, http.cookies.Morsel]:
  457. """A dictionary of ``http.cookies.Morsel`` objects."""
  458. if not hasattr(self, "_cookies"):
  459. self._cookies = (
  460. http.cookies.SimpleCookie()
  461. ) # type: http.cookies.SimpleCookie
  462. if "Cookie" in self.headers:
  463. try:
  464. parsed = parse_cookie(self.headers["Cookie"])
  465. except Exception:
  466. pass
  467. else:
  468. for k, v in parsed.items():
  469. try:
  470. self._cookies[k] = v
  471. except Exception:
  472. # SimpleCookie imposes some restrictions on keys;
  473. # parse_cookie does not. Discard any cookies
  474. # with disallowed keys.
  475. pass
  476. return self._cookies
  477. def full_url(self) -> str:
  478. """Reconstructs the full URL for this request."""
  479. return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
  480. def request_time(self) -> float:
  481. """Returns the amount of time it took for this request to execute."""
  482. if self._finish_time is None:
  483. return time.time() - self._start_time
  484. else:
  485. return self._finish_time - self._start_time
  486. def get_ssl_certificate(
  487. self, binary_form: bool = False
  488. ) -> Union[None, Dict, bytes]:
  489. """Returns the client's SSL certificate, if any.
  490. To use client certificates, the HTTPServer's
  491. `ssl.SSLContext.verify_mode` field must be set, e.g.::
  492. ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
  493. ssl_ctx.load_cert_chain("foo.crt", "foo.key")
  494. ssl_ctx.load_verify_locations("cacerts.pem")
  495. ssl_ctx.verify_mode = ssl.CERT_REQUIRED
  496. server = HTTPServer(app, ssl_options=ssl_ctx)
  497. By default, the return value is a dictionary (or None, if no
  498. client certificate is present). If ``binary_form`` is true, a
  499. DER-encoded form of the certificate is returned instead. See
  500. SSLSocket.getpeercert() in the standard library for more
  501. details.
  502. http://docs.python.org/library/ssl.html#sslsocket-objects
  503. """
  504. try:
  505. if self.connection is None:
  506. return None
  507. # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
  508. return self.connection.stream.socket.getpeercert( # type: ignore
  509. binary_form=binary_form
  510. )
  511. except SSLError:
  512. return None
  513. def _parse_body(self) -> None:
  514. parse_body_arguments(
  515. self.headers.get("Content-Type", ""),
  516. self.body,
  517. self.body_arguments,
  518. self.files,
  519. self.headers,
  520. )
  521. for k, v in self.body_arguments.items():
  522. self.arguments.setdefault(k, []).extend(v)
  523. def __repr__(self) -> str:
  524. attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
  525. args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])
  526. return f"{self.__class__.__name__}({args})"
  527. class HTTPInputError(Exception):
  528. """Exception class for malformed HTTP requests or responses
  529. from remote sources.
  530. .. versionadded:: 4.0
  531. """
  532. pass
  533. class HTTPOutputError(Exception):
  534. """Exception class for errors in HTTP output.
  535. .. versionadded:: 4.0
  536. """
  537. pass
  538. class HTTPServerConnectionDelegate:
  539. """Implement this interface to handle requests from `.HTTPServer`.
  540. .. versionadded:: 4.0
  541. """
  542. def start_request(
  543. self, server_conn: object, request_conn: "HTTPConnection"
  544. ) -> "HTTPMessageDelegate":
  545. """This method is called by the server when a new request has started.
  546. :arg server_conn: is an opaque object representing the long-lived
  547. (e.g. tcp-level) connection.
  548. :arg request_conn: is a `.HTTPConnection` object for a single
  549. request/response exchange.
  550. This method should return a `.HTTPMessageDelegate`.
  551. """
  552. raise NotImplementedError()
  553. def on_close(self, server_conn: object) -> None:
  554. """This method is called when a connection has been closed.
  555. :arg server_conn: is a server connection that has previously been
  556. passed to ``start_request``.
  557. """
  558. pass
  559. class HTTPMessageDelegate:
  560. """Implement this interface to handle an HTTP request or response.
  561. .. versionadded:: 4.0
  562. """
  563. # TODO: genericize this class to avoid exposing the Union.
  564. def headers_received(
  565. self,
  566. start_line: Union["RequestStartLine", "ResponseStartLine"],
  567. headers: HTTPHeaders,
  568. ) -> Optional[Awaitable[None]]:
  569. """Called when the HTTP headers have been received and parsed.
  570. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
  571. depending on whether this is a client or server message.
  572. :arg headers: a `.HTTPHeaders` instance.
  573. Some `.HTTPConnection` methods can only be called during
  574. ``headers_received``.
  575. May return a `.Future`; if it does the body will not be read
  576. until it is done.
  577. """
  578. pass
  579. def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
  580. """Called when a chunk of data has been received.
  581. May return a `.Future` for flow control.
  582. """
  583. pass
  584. def finish(self) -> None:
  585. """Called after the last chunk of data has been received."""
  586. pass
  587. def on_connection_close(self) -> None:
  588. """Called if the connection is closed without finishing the request.
  589. If ``headers_received`` is called, either ``finish`` or
  590. ``on_connection_close`` will be called, but not both.
  591. """
  592. pass
  593. class HTTPConnection:
  594. """Applications use this interface to write their responses.
  595. .. versionadded:: 4.0
  596. """
  597. def write_headers(
  598. self,
  599. start_line: Union["RequestStartLine", "ResponseStartLine"],
  600. headers: HTTPHeaders,
  601. chunk: Optional[bytes] = None,
  602. ) -> "Future[None]":
  603. """Write an HTTP header block.
  604. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
  605. :arg headers: a `.HTTPHeaders` instance.
  606. :arg chunk: the first (optional) chunk of data. This is an optimization
  607. so that small responses can be written in the same call as their
  608. headers.
  609. The ``version`` field of ``start_line`` is ignored.
  610. Returns a future for flow control.
  611. .. versionchanged:: 6.0
  612. The ``callback`` argument was removed.
  613. """
  614. raise NotImplementedError()
  615. def write(self, chunk: bytes) -> "Future[None]":
  616. """Writes a chunk of body data.
  617. Returns a future for flow control.
  618. .. versionchanged:: 6.0
  619. The ``callback`` argument was removed.
  620. """
  621. raise NotImplementedError()
  622. def finish(self) -> None:
  623. """Indicates that the last body data has been written."""
  624. raise NotImplementedError()
  625. def url_concat(
  626. url: str,
  627. args: Union[
  628. None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
  629. ],
  630. ) -> str:
  631. """Concatenate url and arguments regardless of whether
  632. url has existing query parameters.
  633. ``args`` may be either a dictionary or a list of key-value pairs
  634. (the latter allows for multiple values with the same key.
  635. >>> url_concat("http://example.com/foo", dict(c="d"))
  636. 'http://example.com/foo?c=d'
  637. >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
  638. 'http://example.com/foo?a=b&c=d'
  639. >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
  640. 'http://example.com/foo?a=b&c=d&c=d2'
  641. """
  642. if args is None:
  643. return url
  644. parsed_url = urlparse(url)
  645. if isinstance(args, dict):
  646. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  647. parsed_query.extend(args.items())
  648. elif isinstance(args, list) or isinstance(args, tuple):
  649. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  650. parsed_query.extend(args)
  651. else:
  652. err = "'args' parameter should be dict, list or tuple. Not {0}".format(
  653. type(args)
  654. )
  655. raise TypeError(err)
  656. final_query = urlencode(parsed_query)
  657. url = urlunparse(
  658. (
  659. parsed_url[0],
  660. parsed_url[1],
  661. parsed_url[2],
  662. parsed_url[3],
  663. final_query,
  664. parsed_url[5],
  665. )
  666. )
  667. return url
  668. class HTTPFile(ObjectDict):
  669. """Represents a file uploaded via a form.
  670. For backwards compatibility, its instance attributes are also
  671. accessible as dictionary keys.
  672. * ``filename``
  673. * ``body``
  674. * ``content_type``
  675. """
  676. filename: str
  677. body: bytes
  678. content_type: str
  679. def _parse_request_range(
  680. range_header: str,
  681. ) -> Optional[Tuple[Optional[int], Optional[int]]]:
  682. """Parses a Range header.
  683. Returns either ``None`` or tuple ``(start, end)``.
  684. Note that while the HTTP headers use inclusive byte positions,
  685. this method returns indexes suitable for use in slices.
  686. >>> start, end = _parse_request_range("bytes=1-2")
  687. >>> start, end
  688. (1, 3)
  689. >>> [0, 1, 2, 3, 4][start:end]
  690. [1, 2]
  691. >>> _parse_request_range("bytes=6-")
  692. (6, None)
  693. >>> _parse_request_range("bytes=-6")
  694. (-6, None)
  695. >>> _parse_request_range("bytes=-0")
  696. (None, 0)
  697. >>> _parse_request_range("bytes=")
  698. (None, None)
  699. >>> _parse_request_range("foo=42")
  700. >>> _parse_request_range("bytes=1-2,6-10")
  701. Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
  702. See [0] for the details of the range header.
  703. [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
  704. """
  705. unit, _, value = range_header.partition("=")
  706. unit, value = unit.strip(), value.strip()
  707. if unit != "bytes":
  708. return None
  709. start_b, _, end_b = value.partition("-")
  710. try:
  711. start = _int_or_none(start_b)
  712. end = _int_or_none(end_b)
  713. except ValueError:
  714. return None
  715. if end is not None:
  716. if start is None:
  717. if end != 0:
  718. start = -end
  719. end = None
  720. else:
  721. end += 1
  722. return (start, end)
  723. def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
  724. """Returns a suitable Content-Range header:
  725. >>> print(_get_content_range(None, 1, 4))
  726. bytes 0-0/4
  727. >>> print(_get_content_range(1, 3, 4))
  728. bytes 1-2/4
  729. >>> print(_get_content_range(None, None, 4))
  730. bytes 0-3/4
  731. """
  732. start = start or 0
  733. end = (end or total) - 1
  734. return f"bytes {start}-{end}/{total}"
  735. def _int_or_none(val: str) -> Optional[int]:
  736. val = val.strip()
  737. if val == "":
  738. return None
  739. return int(val)
  740. @dataclasses.dataclass
  741. class ParseMultipartConfig:
  742. """This class configures the parsing of ``multipart/form-data`` request bodies.
  743. Its primary purpose is to place limits on the size and complexity of request messages
  744. to avoid potential denial-of-service attacks.
  745. .. versionadded:: 6.5.5
  746. """
  747. enabled: bool = True
  748. """Set this to false to disable the parsing of ``multipart/form-data`` requests entirely.
  749. This may be desirable for applications that do not need to handle this format, since
  750. multipart request have a history of DoS vulnerabilities in Tornado. Multipart requests
  751. are used primarily for ``<input type="file">`` in HTML forms, or in APIs that mimic this
  752. format. File uploads that use the HTTP ``PUT`` method generally do not use the multipart
  753. format.
  754. """
  755. max_parts: int = 100
  756. """The maximum number of parts accepted in a multipart request.
  757. Each ``<input>`` element in an HTML form corresponds to at least one "part".
  758. """
  759. max_part_header_size: int = 10 * 1024
  760. """The maximum size of the headers for each part of a multipart request.
  761. The header for a part contains the name of the form field and optionally the filename
  762. and content type of the uploaded file.
  763. """
  764. @dataclasses.dataclass
  765. class ParseBodyConfig:
  766. """This class configures the parsing of request bodies.
  767. .. versionadded:: 6.5.5
  768. """
  769. multipart: ParseMultipartConfig = dataclasses.field(
  770. default_factory=ParseMultipartConfig
  771. )
  772. """Configuration for ``multipart/form-data`` request bodies."""
  773. _DEFAULT_PARSE_BODY_CONFIG = ParseBodyConfig()
  774. def set_parse_body_config(config: ParseBodyConfig) -> None:
  775. r"""Sets the **global** default configuration for parsing request bodies.
  776. This global setting is provided as a stopgap for applications that need to raise the limits
  777. introduced in Tornado 6.5.5, or who wish to disable the parsing of multipart/form-data bodies
  778. entirely. Non-global configuration for this functionality will be introduced in a future
  779. release.
  780. >>> content_type = "multipart/form-data; boundary=foo"
  781. >>> multipart_body = b"--foo--\r\n"
  782. >>> parse_body_arguments(content_type, multipart_body, {}, {})
  783. >>> multipart_config = ParseMultipartConfig(enabled=False)
  784. >>> config = ParseBodyConfig(multipart=multipart_config)
  785. >>> set_parse_body_config(config)
  786. >>> parse_body_arguments(content_type, multipart_body, {}, {})
  787. Traceback (most recent call last):
  788. ...
  789. tornado.httputil.HTTPInputError: ...: multipart/form-data parsing is disabled
  790. >>> set_parse_body_config(ParseBodyConfig()) # reset to defaults
  791. .. versionadded:: 6.5.5
  792. """
  793. global _DEFAULT_PARSE_BODY_CONFIG
  794. _DEFAULT_PARSE_BODY_CONFIG = config
  795. def parse_body_arguments(
  796. content_type: str,
  797. body: bytes,
  798. arguments: Dict[str, List[bytes]],
  799. files: Dict[str, List[HTTPFile]],
  800. headers: Optional[HTTPHeaders] = None,
  801. *,
  802. config: Optional[ParseBodyConfig] = None,
  803. ) -> None:
  804. """Parses a form request body.
  805. Supports ``application/x-www-form-urlencoded`` and
  806. ``multipart/form-data``. The ``content_type`` parameter should be
  807. a string and ``body`` should be a byte string. The ``arguments``
  808. and ``files`` parameters are dictionaries that will be updated
  809. with the parsed contents.
  810. """
  811. if config is None:
  812. config = _DEFAULT_PARSE_BODY_CONFIG
  813. if content_type.startswith("application/x-www-form-urlencoded"):
  814. if headers and "Content-Encoding" in headers:
  815. raise HTTPInputError(
  816. "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
  817. )
  818. try:
  819. # real charset decoding will happen in RequestHandler.decode_argument()
  820. uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
  821. except Exception as e:
  822. raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
  823. for name, values in uri_arguments.items():
  824. if values:
  825. arguments.setdefault(name, []).extend(values)
  826. elif content_type.startswith("multipart/form-data"):
  827. if headers and "Content-Encoding" in headers:
  828. raise HTTPInputError(
  829. "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
  830. )
  831. try:
  832. fields = content_type.split(";")
  833. if fields[0].strip() != "multipart/form-data":
  834. # This catches "Content-Type: multipart/form-dataxyz"
  835. raise HTTPInputError("Invalid content type")
  836. for field in fields:
  837. k, sep, v = field.strip().partition("=")
  838. if k == "boundary" and v:
  839. parse_multipart_form_data(
  840. utf8(v), body, arguments, files, config=config.multipart
  841. )
  842. break
  843. else:
  844. raise HTTPInputError("multipart boundary not found")
  845. except Exception as e:
  846. raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
  847. def parse_multipart_form_data(
  848. boundary: bytes,
  849. data: bytes,
  850. arguments: Dict[str, List[bytes]],
  851. files: Dict[str, List[HTTPFile]],
  852. *,
  853. config: Optional[ParseMultipartConfig] = None,
  854. ) -> None:
  855. """Parses a ``multipart/form-data`` body.
  856. The ``boundary`` and ``data`` parameters are both byte strings.
  857. The dictionaries given in the arguments and files parameters
  858. will be updated with the contents of the body.
  859. .. versionchanged:: 5.1
  860. Now recognizes non-ASCII filenames in RFC 2231/5987
  861. (``filename*=``) format.
  862. """
  863. if config is None:
  864. config = _DEFAULT_PARSE_BODY_CONFIG.multipart
  865. if not config.enabled:
  866. raise HTTPInputError("multipart/form-data parsing is disabled")
  867. # The standard allows for the boundary to be quoted in the header,
  868. # although it's rare (it happens at least for google app engine
  869. # xmpp). I think we're also supposed to handle backslash-escapes
  870. # here but I'll save that until we see a client that uses them
  871. # in the wild.
  872. if boundary.startswith(b'"') and boundary.endswith(b'"'):
  873. boundary = boundary[1:-1]
  874. final_boundary_index = data.rfind(b"--" + boundary + b"--")
  875. if final_boundary_index == -1:
  876. raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
  877. parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
  878. if len(parts) > config.max_parts:
  879. raise HTTPInputError("multipart/form-data has too many parts")
  880. for part in parts:
  881. if not part:
  882. continue
  883. eoh = part.find(b"\r\n\r\n")
  884. if eoh == -1:
  885. raise HTTPInputError("multipart/form-data missing headers")
  886. if eoh > config.max_part_header_size:
  887. raise HTTPInputError("multipart/form-data part header too large")
  888. headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
  889. disp_header = headers.get("Content-Disposition", "")
  890. disposition, disp_params = _parse_header(disp_header)
  891. if disposition != "form-data" or not part.endswith(b"\r\n"):
  892. raise HTTPInputError("Invalid multipart/form-data")
  893. value = part[eoh + 4 : -2]
  894. if not disp_params.get("name"):
  895. raise HTTPInputError("multipart/form-data missing name")
  896. name = disp_params["name"]
  897. if disp_params.get("filename"):
  898. ctype = headers.get("Content-Type", "application/unknown")
  899. files.setdefault(name, []).append(
  900. HTTPFile(
  901. filename=disp_params["filename"], body=value, content_type=ctype
  902. )
  903. )
  904. else:
  905. arguments.setdefault(name, []).append(value)
  906. def format_timestamp(
  907. ts: Union[int, float, tuple, time.struct_time, datetime.datetime],
  908. ) -> str:
  909. """Formats a timestamp in the format used by HTTP.
  910. The argument may be a numeric timestamp as returned by `time.time`,
  911. a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
  912. object. Naive `datetime.datetime` objects are assumed to represent
  913. UTC; aware objects are converted to UTC before formatting.
  914. >>> format_timestamp(1359312200)
  915. 'Sun, 27 Jan 2013 18:43:20 GMT'
  916. """
  917. if isinstance(ts, (int, float)):
  918. time_num = ts
  919. elif isinstance(ts, (tuple, time.struct_time)):
  920. time_num = calendar.timegm(ts)
  921. elif isinstance(ts, datetime.datetime):
  922. time_num = calendar.timegm(ts.utctimetuple())
  923. else:
  924. raise TypeError("unknown timestamp type: %r" % ts)
  925. return email.utils.formatdate(time_num, usegmt=True)
  926. class RequestStartLine(typing.NamedTuple):
  927. method: str
  928. path: str
  929. version: str
  930. def parse_request_start_line(line: str) -> RequestStartLine:
  931. """Returns a (method, path, version) tuple for an HTTP 1.x request line.
  932. The response is a `typing.NamedTuple`.
  933. >>> parse_request_start_line("GET /foo HTTP/1.1")
  934. RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
  935. """
  936. match = _ABNF.request_line.fullmatch(line)
  937. if not match:
  938. # https://tools.ietf.org/html/rfc7230#section-3.1.1
  939. # invalid request-line SHOULD respond with a 400 (Bad Request)
  940. raise HTTPInputError("Malformed HTTP request line")
  941. r = RequestStartLine(match.group(1), match.group(2), match.group(3))
  942. if not r.version.startswith("HTTP/1"):
  943. # HTTP/2 and above doesn't use parse_request_start_line.
  944. # This could be folded into the regex but we don't want to deviate
  945. # from the ABNF in the RFCs.
  946. raise HTTPInputError("Unexpected HTTP version %r" % r.version)
  947. return r
  948. class ResponseStartLine(typing.NamedTuple):
  949. version: str
  950. code: int
  951. reason: str
  952. def parse_response_start_line(line: str) -> ResponseStartLine:
  953. """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
  954. The response is a `typing.NamedTuple`.
  955. >>> parse_response_start_line("HTTP/1.1 200 OK")
  956. ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
  957. """
  958. match = _ABNF.status_line.fullmatch(line)
  959. if not match:
  960. raise HTTPInputError("Error parsing response start line")
  961. r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
  962. if not r.version.startswith("HTTP/1"):
  963. # HTTP/2 and above doesn't use parse_response_start_line.
  964. raise HTTPInputError("Unexpected HTTP version %r" % r.version)
  965. return r
  966. # _parseparam and _parse_header are copied and modified from python2.7's cgi.py
  967. # The original 2.7 version of this code did not correctly support some
  968. # combinations of semicolons and double quotes.
  969. # It has also been modified to support valueless parameters as seen in
  970. # websocket extension negotiations, and to support non-ascii values in
  971. # RFC 2231/5987 format.
  972. #
  973. # _parseparam has been further modified with the logic from
  974. # https://github.com/python/cpython/pull/136072/files
  975. # to avoid quadratic behavior when parsing semicolons in quoted strings.
  976. #
  977. # TODO: See if we can switch to email.message.Message for this functionality.
  978. # This is the suggested replacement for the cgi.py module now that cgi has
  979. # been removed from recent versions of Python. We need to verify that
  980. # the email module is consistent with our existing behavior (and all relevant
  981. # RFCs for multipart/form-data) before making this change.
  982. def _parseparam(s: str) -> Generator[str, None, None]:
  983. start = 0
  984. while s.find(";", start) == start:
  985. start += 1
  986. end = s.find(";", start)
  987. ind, diff = start, 0
  988. while end > 0:
  989. diff += s.count('"', ind, end) - s.count('\\"', ind, end)
  990. if diff % 2 == 0:
  991. break
  992. end, ind = ind, s.find(";", end + 1)
  993. if end < 0:
  994. end = len(s)
  995. f = s[start:end]
  996. yield f.strip()
  997. start = end
  998. def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
  999. r"""Parse a Content-type like header.
  1000. Return the main content-type and a dictionary of options.
  1001. >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
  1002. >>> ct, d = _parse_header(d)
  1003. >>> ct
  1004. 'form-data'
  1005. >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
  1006. True
  1007. >>> d['foo']
  1008. 'b\\a"r'
  1009. """
  1010. parts = _parseparam(";" + line)
  1011. key = next(parts)
  1012. # decode_params treats first argument special, but we already stripped key
  1013. params = [("Dummy", "value")]
  1014. for p in parts:
  1015. i = p.find("=")
  1016. if i >= 0:
  1017. name = p[:i].strip().lower()
  1018. value = p[i + 1 :].strip()
  1019. params.append((name, native_str(value)))
  1020. decoded_params = email.utils.decode_params(params)
  1021. decoded_params.pop(0) # get rid of the dummy again
  1022. pdict = {}
  1023. for name, decoded_value in decoded_params:
  1024. value = email.utils.collapse_rfc2231_value(decoded_value)
  1025. if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
  1026. value = value[1:-1]
  1027. pdict[name] = value
  1028. return key, pdict
  1029. def _encode_header(key: str, pdict: Dict[str, str]) -> str:
  1030. """Inverse of _parse_header.
  1031. >>> _encode_header('permessage-deflate',
  1032. ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
  1033. 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
  1034. """
  1035. if not pdict:
  1036. return key
  1037. out = [key]
  1038. # Sort the parameters just to make it easy to test.
  1039. for k, v in sorted(pdict.items()):
  1040. if v is None:
  1041. out.append(k)
  1042. else:
  1043. # TODO: quote if necessary.
  1044. out.append(f"{k}={v}")
  1045. return "; ".join(out)
  1046. def encode_username_password(
  1047. username: Union[str, bytes], password: Union[str, bytes]
  1048. ) -> bytes:
  1049. """Encodes a username/password pair in the format used by HTTP auth.
  1050. The return value is a byte string in the form ``username:password``.
  1051. .. versionadded:: 5.1
  1052. """
  1053. if isinstance(username, unicode_type):
  1054. username = unicodedata.normalize("NFC", username)
  1055. if isinstance(password, unicode_type):
  1056. password = unicodedata.normalize("NFC", password)
  1057. return utf8(username) + b":" + utf8(password)
  1058. def doctests():
  1059. # type: () -> unittest.TestSuite
  1060. import doctest
  1061. return doctest.DocTestSuite(optionflags=doctest.ELLIPSIS)
  1062. _netloc_re = re.compile(r"^(.+):(\d+)$")
  1063. def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
  1064. """Returns ``(host, port)`` tuple from ``netloc``.
  1065. Returned ``port`` will be ``None`` if not present.
  1066. .. versionadded:: 4.1
  1067. """
  1068. match = _netloc_re.match(netloc)
  1069. if match:
  1070. host = match.group(1)
  1071. port = int(match.group(2)) # type: Optional[int]
  1072. else:
  1073. host = netloc
  1074. port = None
  1075. return (host, port)
  1076. def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
  1077. """Generator converting a result of ``parse_qs`` back to name-value pairs.
  1078. .. versionadded:: 5.0
  1079. """
  1080. for k, vs in qs.items():
  1081. for v in vs:
  1082. yield (k, v)
  1083. _unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
  1084. def _unquote_replace(m: re.Match) -> str:
  1085. if m[1]:
  1086. return chr(int(m[1], 8))
  1087. else:
  1088. return m[2]
  1089. def _unquote_cookie(s: str) -> str:
  1090. """Handle double quotes and escaping in cookie values.
  1091. This method is copied verbatim from the Python 3.13 standard
  1092. library (http.cookies._unquote) so we don't have to depend on
  1093. non-public interfaces.
  1094. """
  1095. # If there aren't any doublequotes,
  1096. # then there can't be any special characters. See RFC 2109.
  1097. if s is None or len(s) < 2:
  1098. return s
  1099. if s[0] != '"' or s[-1] != '"':
  1100. return s
  1101. # We have to assume that we must decode this string.
  1102. # Down to work.
  1103. # Remove the "s
  1104. s = s[1:-1]
  1105. # Check for special sequences. Examples:
  1106. # \012 --> \n
  1107. # \" --> "
  1108. #
  1109. return _unquote_sub(_unquote_replace, s)
  1110. def parse_cookie(cookie: str) -> Dict[str, str]:
  1111. """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
  1112. This function attempts to mimic browser cookie parsing behavior;
  1113. it specifically does not follow any of the cookie-related RFCs
  1114. (because browsers don't either).
  1115. The algorithm used is identical to that used by Django version 1.9.10.
  1116. .. versionadded:: 4.4.2
  1117. """
  1118. cookiedict = {}
  1119. for chunk in cookie.split(";"):
  1120. if "=" in chunk:
  1121. key, val = chunk.split("=", 1)
  1122. else:
  1123. # Assume an empty name per
  1124. # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
  1125. key, val = "", chunk
  1126. key, val = key.strip(), val.strip()
  1127. if key or val:
  1128. # unquote using Python's algorithm.
  1129. cookiedict[key] = _unquote_cookie(val)
  1130. return cookiedict