| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392 |
- #
- # Copyright 2009 Facebook
- #
- # Licensed under the Apache License, Version 2.0 (the "License"); you may
- # not use this file except in compliance with the License. You may obtain
- # a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- # License for the specific language governing permissions and limitations
- # under the License.
- """HTTP utility code shared by clients and servers.
- This module also defines the `HTTPServerRequest` class which is exposed
- via `tornado.web.RequestHandler.request`.
- """
- import calendar
- import collections.abc
- import copy
- import dataclasses
- import datetime
- import email.utils
- from functools import lru_cache
- from http.client import responses
- import http.cookies
- import re
- from ssl import SSLError
- import time
- import unicodedata
- from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
- from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
- from tornado.util import ObjectDict, unicode_type
- # responses is unused in this file, but we re-export it to other files.
- # Reference it so pyflakes doesn't complain.
- responses
- import typing
- from typing import (
- Tuple,
- Iterable,
- List,
- Mapping,
- Iterator,
- Dict,
- Union,
- Optional,
- Awaitable,
- Generator,
- AnyStr,
- )
- if typing.TYPE_CHECKING:
- from typing import Deque # noqa: F401
- from asyncio import Future # noqa: F401
- import unittest # noqa: F401
- # This can be done unconditionally in the base class of HTTPHeaders
- # after we drop support for Python 3.8.
- StrMutableMapping = collections.abc.MutableMapping[str, str]
- else:
- StrMutableMapping = collections.abc.MutableMapping
- # To be used with str.strip() and related methods.
- HTTP_WHITESPACE = " \t"
- # Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
- # chars greater than \xFF (which may appear after decoding utf8).
- _FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0A-\x1F\x7F]")
- class _ABNF:
- """Class that holds a subset of ABNF rules from RFC 9110 and friends.
- Class attributes are re.Pattern objects, with the same name as in the RFC
- (with hyphens changed to underscores). Currently contains only the subset
- we use (which is why this class is not public). Unfortunately the fields
- cannot be alphabetized as they are in the RFCs because of dependencies.
- """
- # RFC 3986 (URI)
- # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
- # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
- # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
- # position, not only for their use in IPv6 literals.
- uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
- uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
- uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
- uri_host = re.compile(
- rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
- )
- uri_port = re.compile(r"[0-9]*")
- # RFC 5234 (ABNF)
- VCHAR = re.compile(r"[\x21-\x7E]")
- # RFC 9110 (HTTP Semantics)
- obs_text = re.compile(r"[\x80-\xFF]")
- field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
- # Not exactly from the RFC to simplify and combine field-content and field-value.
- field_value = re.compile(
- rf"|"
- rf"{field_vchar.pattern}|"
- rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
- )
- tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
- token = re.compile(rf"{tchar.pattern}+")
- field_name = token
- method = token
- host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
- # RFC 9112 (HTTP/1.1)
- HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
- reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
- # request_target delegates to the URI RFC 3986, which is complex and may be
- # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
- # characters). Instead, we allow everything but control chars and whitespace.
- request_target = re.compile(rf"{field_vchar.pattern}+")
- request_line = re.compile(
- rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
- )
- status_code = re.compile(r"[0-9]{3}")
- status_line = re.compile(
- rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
- )
- @lru_cache(1000)
- def _normalize_header(name: str) -> str:
- """Map a header name to Http-Header-Case.
- >>> _normalize_header("coNtent-TYPE")
- 'Content-Type'
- """
- return "-".join([w.capitalize() for w in name.split("-")])
- class HTTPHeaders(StrMutableMapping):
- """A dictionary that maintains ``Http-Header-Case`` for all keys.
- Supports multiple values per key via a pair of new methods,
- `add()` and `get_list()`. The regular dictionary interface
- returns a single value per key, with multiple values joined by a
- comma.
- >>> h = HTTPHeaders({"content-type": "text/html"})
- >>> list(h.keys())
- ['Content-Type']
- >>> h["Content-Type"]
- 'text/html'
- >>> h.add("Set-Cookie", "A=B")
- >>> h.add("Set-Cookie", "C=D")
- >>> h["set-cookie"]
- 'A=B,C=D'
- >>> h.get_list("set-cookie")
- ['A=B', 'C=D']
- >>> for (k,v) in sorted(h.get_all()):
- ... print('%s: %s' % (k,v))
- ...
- Content-Type: text/html
- Set-Cookie: A=B
- Set-Cookie: C=D
- """
- @typing.overload
- def __init__(self, __arg: Mapping[str, List[str]]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, __arg: Mapping[str, str]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, *args: Tuple[str, str]) -> None:
- pass
- @typing.overload # noqa: F811
- def __init__(self, **kwargs: str) -> None:
- pass
- def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
- # Formally, HTTP headers are a mapping from a field name to a "combined field value",
- # which may be constructed from multiple field lines by joining them with commas.
- # In practice, however, some headers (notably Set-Cookie) do not follow this convention,
- # so we maintain a mapping from field name to a list of field lines in self._as_list.
- # self._combined_cache is a cache of the combined field values derived from self._as_list
- # on demand (and cleared whenever the list is modified).
- self._as_list: dict[str, list[str]] = {}
- self._combined_cache: dict[str, str] = {}
- self._last_key = None # type: Optional[str]
- if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
- # Copy constructor
- for k, v in args[0].get_all():
- self.add(k, v)
- else:
- # Dict-style initialization
- self.update(*args, **kwargs)
- # new public methods
- def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:
- """Adds a new value for the given key."""
- if not _ABNF.field_name.fullmatch(name):
- raise HTTPInputError("Invalid header name %r" % name)
- if _chars_are_bytes:
- if not _ABNF.field_value.fullmatch(to_unicode(value)):
- # TODO: the fact we still support bytes here (contrary to type annotations)
- # and still test for it should probably be changed.
- raise HTTPInputError("Invalid header value %r" % value)
- else:
- if _FORBIDDEN_HEADER_CHARS_RE.search(value):
- raise HTTPInputError("Invalid header value %r" % value)
- norm_name = _normalize_header(name)
- self._last_key = norm_name
- if norm_name in self:
- self._combined_cache.pop(norm_name, None)
- self._as_list[norm_name].append(value)
- else:
- self[norm_name] = value
- def get_list(self, name: str) -> List[str]:
- """Returns all values for the given header as a list."""
- norm_name = _normalize_header(name)
- return self._as_list.get(norm_name, [])
- def get_all(self) -> Iterable[Tuple[str, str]]:
- """Returns an iterable of all (name, value) pairs.
- If a header has multiple values, multiple pairs will be
- returned with the same name.
- """
- for name, values in self._as_list.items():
- for value in values:
- yield (name, value)
- def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:
- r"""Updates the dictionary with a single header line.
- >>> h = HTTPHeaders()
- >>> h.parse_line("Content-Type: text/html")
- >>> h.get('content-type')
- 'text/html'
- >>> h.parse_line("Content-Length: 42\r\n")
- >>> h.get('content-type')
- 'text/html'
- .. versionchanged:: 6.5
- Now supports lines with or without the trailing CRLF, making it possible
- to pass lines from AsyncHTTPClient's header_callback directly to this method.
- .. deprecated:: 6.5
- In Tornado 7.0, certain deprecated features of HTTP will become errors.
- Specifically, line folding and the use of LF (with CR) as a line separator
- will be removed.
- """
- if m := re.search(r"\r?\n$", line):
- # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
- # terminator and ignore any preceding CR.
- # TODO(7.0): Remove this support for LF-only line endings.
- line = line[: m.start()]
- if not line:
- # Empty line, or the final CRLF of a header block.
- return
- if line[0] in HTTP_WHITESPACE:
- # continuation of a multi-line header
- # TODO(7.0): Remove support for line folding.
- if self._last_key is None:
- raise HTTPInputError("first header line cannot start with whitespace")
- new_part = " " + line.strip(HTTP_WHITESPACE)
- if _chars_are_bytes:
- if not _ABNF.field_value.fullmatch(new_part[1:]):
- raise HTTPInputError("Invalid header continuation %r" % new_part)
- else:
- if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
- raise HTTPInputError("Invalid header value %r" % new_part)
- self._as_list[self._last_key][-1] += new_part
- self._combined_cache.pop(self._last_key, None)
- else:
- try:
- name, value = line.split(":", 1)
- except ValueError:
- raise HTTPInputError("no colon in header line")
- self.add(
- name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes
- )
- @classmethod
- def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":
- """Returns a dictionary from HTTP header text.
- >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
- >>> sorted(h.items())
- [('Content-Length', '42'), ('Content-Type', 'text/html')]
- .. versionchanged:: 5.1
- Raises `HTTPInputError` on malformed headers instead of a
- mix of `KeyError`, and `ValueError`.
- """
- # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which
- # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which
- # they are to be interpreted as utf-8). For historical reasons, this method handled this by
- # expecting both callers to decode the headers to strings before parsing them. This wasn't a
- # problem until we started doing stricter validation of the characters allowed in HTTP
- # headers (using ABNF rules defined in terms of byte values), which inadvertently started
- # disallowing non-latin1 characters in multipart/form-data filenames.
- #
- # This method should have accepted bytes and a desired encoding, but this change is being
- # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes
- # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes
- # smuggled through the latin1 encoding) or to accept any non-control unicode characters
- # as required by multipart/form-data. This method will change to accept bytes in a future
- # release.
- h = cls()
- start = 0
- while True:
- lf = headers.find("\n", start)
- if lf == -1:
- h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)
- break
- line = headers[start : lf + 1]
- start = lf + 1
- h.parse_line(line, _chars_are_bytes=_chars_are_bytes)
- return h
- # MutableMapping abstract method implementations.
- def __setitem__(self, name: str, value: str) -> None:
- norm_name = _normalize_header(name)
- self._combined_cache[norm_name] = value
- self._as_list[norm_name] = [value]
- def __contains__(self, name: object) -> bool:
- # This is an important optimization to avoid the expensive concatenation
- # in __getitem__ when it's not needed.
- if not isinstance(name, str):
- return False
- norm_name = _normalize_header(name)
- return norm_name in self._as_list
- def __getitem__(self, name: str) -> str:
- header = _normalize_header(name)
- if header not in self._combined_cache:
- self._combined_cache[header] = ",".join(self._as_list[header])
- return self._combined_cache[header]
- def __delitem__(self, name: str) -> None:
- norm_name = _normalize_header(name)
- del self._combined_cache[norm_name]
- del self._as_list[norm_name]
- def __len__(self) -> int:
- return len(self._as_list)
- def __iter__(self) -> Iterator[typing.Any]:
- return iter(self._as_list)
- def copy(self) -> "HTTPHeaders":
- # defined in dict but not in MutableMapping.
- return HTTPHeaders(self)
- # Use our overridden copy method for the copy.copy module.
- # This makes shallow copies one level deeper, but preserves
- # the appearance that HTTPHeaders is a single container.
- __copy__ = copy
- def __str__(self) -> str:
- lines = []
- for name, value in self.get_all():
- lines.append(f"{name}: {value}\n")
- return "".join(lines)
- __unicode__ = __str__
- class HTTPServerRequest:
- """A single HTTP request.
- All attributes are type `str` unless otherwise noted.
- .. attribute:: method
- HTTP request method, e.g. "GET" or "POST"
- .. attribute:: uri
- The requested uri.
- .. attribute:: path
- The path portion of `uri`
- .. attribute:: query
- The query portion of `uri`
- .. attribute:: version
- HTTP version specified in request, e.g. "HTTP/1.1"
- .. attribute:: headers
- `.HTTPHeaders` dictionary-like object for request headers. Acts like
- a case-insensitive dictionary with additional methods for repeated
- headers.
- .. attribute:: body
- Request body, if present, as a byte string.
- .. attribute:: remote_ip
- Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
- will pass along the real IP address provided by a load balancer
- in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
- .. versionchanged:: 3.1
- The list format of ``X-Forwarded-For`` is now supported.
- .. attribute:: protocol
- The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
- is set, will pass along the protocol used by a load balancer if
- reported via an ``X-Scheme`` header.
- .. attribute:: host
- The requested hostname, usually taken from the ``Host`` header.
- .. attribute:: arguments
- GET/POST arguments are available in the arguments property, which
- maps arguments names to lists of values (to support multiple values
- for individual names). Names are of type `str`, while arguments
- are byte strings. Note that this is different from
- `.RequestHandler.get_argument`, which returns argument values as
- unicode strings.
- .. attribute:: query_arguments
- Same format as ``arguments``, but contains only arguments extracted
- from the query string.
- .. versionadded:: 3.2
- .. attribute:: body_arguments
- Same format as ``arguments``, but contains only arguments extracted
- from the request body.
- .. versionadded:: 3.2
- .. attribute:: files
- File uploads are available in the files property, which maps file
- names to lists of `.HTTPFile`.
- .. attribute:: connection
- An HTTP request is attached to a single HTTP connection, which can
- be accessed through the "connection" attribute. Since connections
- are typically kept open in HTTP/1.1, multiple requests can be handled
- sequentially on a single connection.
- .. versionchanged:: 4.0
- Moved from ``tornado.httpserver.HTTPRequest``.
- .. deprecated:: 6.5.2
- The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use
- ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and
- temporarily restored in 6.5.2.
- """
- path = None # type: str
- query = None # type: str
- # HACK: Used for stream_request_body
- _body_future = None # type: Future[None]
- def __init__(
- self,
- method: Optional[str] = None,
- uri: Optional[str] = None,
- version: str = "HTTP/1.0",
- headers: Optional[HTTPHeaders] = None,
- body: Optional[bytes] = None,
- host: Optional[str] = None,
- files: Optional[Dict[str, List["HTTPFile"]]] = None,
- connection: Optional["HTTPConnection"] = None,
- start_line: Optional["RequestStartLine"] = None,
- server_connection: Optional[object] = None,
- ) -> None:
- if start_line is not None:
- method, uri, version = start_line
- self.method = method
- self.uri = uri
- self.version = version
- self.headers = headers or HTTPHeaders()
- self.body = body or b""
- # set remote IP and protocol
- context = getattr(connection, "context", None)
- self.remote_ip = getattr(context, "remote_ip", None)
- self.protocol = getattr(context, "protocol", "http")
- try:
- self.host = host or self.headers["Host"]
- except KeyError:
- if version == "HTTP/1.0":
- # HTTP/1.0 does not require the Host header.
- self.host = "127.0.0.1"
- else:
- raise HTTPInputError("Missing Host header")
- if not _ABNF.host.fullmatch(self.host):
- raise HTTPInputError("Invalid Host header: %r" % self.host)
- if "," in self.host:
- # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
- # Server MUST respond with 400 Bad Request if multiple
- # Host headers are present.
- #
- # We test for the presence of a comma instead of the number of
- # headers received because a proxy may have converted
- # multiple headers into a single comma-separated value
- # (per RFC 9110 section 5.3).
- #
- # This is technically a departure from the RFC since the ABNF
- # does not forbid commas in the host header. However, since
- # commas are not allowed in DNS names, it is appropriate to
- # disallow them. (The same argument could be made for other special
- # characters, but commas are the most problematic since they could
- # be used to exploit differences between proxies when multiple headers
- # are supplied).
- raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
- self.host_name = split_host_and_port(self.host.lower())[0]
- self.files = files or {}
- self.connection = connection
- self.server_connection = server_connection
- self._start_time = time.time()
- self._finish_time = None
- if uri is not None:
- self.path, sep, self.query = uri.partition("?")
- self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
- self.query_arguments = copy.deepcopy(self.arguments)
- self.body_arguments = {} # type: Dict[str, List[bytes]]
- @property
- def cookies(self) -> Dict[str, http.cookies.Morsel]:
- """A dictionary of ``http.cookies.Morsel`` objects."""
- if not hasattr(self, "_cookies"):
- self._cookies = (
- http.cookies.SimpleCookie()
- ) # type: http.cookies.SimpleCookie
- if "Cookie" in self.headers:
- try:
- parsed = parse_cookie(self.headers["Cookie"])
- except Exception:
- pass
- else:
- for k, v in parsed.items():
- try:
- self._cookies[k] = v
- except Exception:
- # SimpleCookie imposes some restrictions on keys;
- # parse_cookie does not. Discard any cookies
- # with disallowed keys.
- pass
- return self._cookies
- def full_url(self) -> str:
- """Reconstructs the full URL for this request."""
- return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
- def request_time(self) -> float:
- """Returns the amount of time it took for this request to execute."""
- if self._finish_time is None:
- return time.time() - self._start_time
- else:
- return self._finish_time - self._start_time
- def get_ssl_certificate(
- self, binary_form: bool = False
- ) -> Union[None, Dict, bytes]:
- """Returns the client's SSL certificate, if any.
- To use client certificates, the HTTPServer's
- `ssl.SSLContext.verify_mode` field must be set, e.g.::
- ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
- ssl_ctx.load_cert_chain("foo.crt", "foo.key")
- ssl_ctx.load_verify_locations("cacerts.pem")
- ssl_ctx.verify_mode = ssl.CERT_REQUIRED
- server = HTTPServer(app, ssl_options=ssl_ctx)
- By default, the return value is a dictionary (or None, if no
- client certificate is present). If ``binary_form`` is true, a
- DER-encoded form of the certificate is returned instead. See
- SSLSocket.getpeercert() in the standard library for more
- details.
- http://docs.python.org/library/ssl.html#sslsocket-objects
- """
- try:
- if self.connection is None:
- return None
- # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
- return self.connection.stream.socket.getpeercert( # type: ignore
- binary_form=binary_form
- )
- except SSLError:
- return None
- def _parse_body(self) -> None:
- parse_body_arguments(
- self.headers.get("Content-Type", ""),
- self.body,
- self.body_arguments,
- self.files,
- self.headers,
- )
- for k, v in self.body_arguments.items():
- self.arguments.setdefault(k, []).extend(v)
- def __repr__(self) -> str:
- attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
- args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])
- return f"{self.__class__.__name__}({args})"
- class HTTPInputError(Exception):
- """Exception class for malformed HTTP requests or responses
- from remote sources.
- .. versionadded:: 4.0
- """
- pass
- class HTTPOutputError(Exception):
- """Exception class for errors in HTTP output.
- .. versionadded:: 4.0
- """
- pass
- class HTTPServerConnectionDelegate:
- """Implement this interface to handle requests from `.HTTPServer`.
- .. versionadded:: 4.0
- """
- def start_request(
- self, server_conn: object, request_conn: "HTTPConnection"
- ) -> "HTTPMessageDelegate":
- """This method is called by the server when a new request has started.
- :arg server_conn: is an opaque object representing the long-lived
- (e.g. tcp-level) connection.
- :arg request_conn: is a `.HTTPConnection` object for a single
- request/response exchange.
- This method should return a `.HTTPMessageDelegate`.
- """
- raise NotImplementedError()
- def on_close(self, server_conn: object) -> None:
- """This method is called when a connection has been closed.
- :arg server_conn: is a server connection that has previously been
- passed to ``start_request``.
- """
- pass
- class HTTPMessageDelegate:
- """Implement this interface to handle an HTTP request or response.
- .. versionadded:: 4.0
- """
- # TODO: genericize this class to avoid exposing the Union.
- def headers_received(
- self,
- start_line: Union["RequestStartLine", "ResponseStartLine"],
- headers: HTTPHeaders,
- ) -> Optional[Awaitable[None]]:
- """Called when the HTTP headers have been received and parsed.
- :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
- depending on whether this is a client or server message.
- :arg headers: a `.HTTPHeaders` instance.
- Some `.HTTPConnection` methods can only be called during
- ``headers_received``.
- May return a `.Future`; if it does the body will not be read
- until it is done.
- """
- pass
- def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
- """Called when a chunk of data has been received.
- May return a `.Future` for flow control.
- """
- pass
- def finish(self) -> None:
- """Called after the last chunk of data has been received."""
- pass
- def on_connection_close(self) -> None:
- """Called if the connection is closed without finishing the request.
- If ``headers_received`` is called, either ``finish`` or
- ``on_connection_close`` will be called, but not both.
- """
- pass
- class HTTPConnection:
- """Applications use this interface to write their responses.
- .. versionadded:: 4.0
- """
- def write_headers(
- self,
- start_line: Union["RequestStartLine", "ResponseStartLine"],
- headers: HTTPHeaders,
- chunk: Optional[bytes] = None,
- ) -> "Future[None]":
- """Write an HTTP header block.
- :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
- :arg headers: a `.HTTPHeaders` instance.
- :arg chunk: the first (optional) chunk of data. This is an optimization
- so that small responses can be written in the same call as their
- headers.
- The ``version`` field of ``start_line`` is ignored.
- Returns a future for flow control.
- .. versionchanged:: 6.0
- The ``callback`` argument was removed.
- """
- raise NotImplementedError()
- def write(self, chunk: bytes) -> "Future[None]":
- """Writes a chunk of body data.
- Returns a future for flow control.
- .. versionchanged:: 6.0
- The ``callback`` argument was removed.
- """
- raise NotImplementedError()
- def finish(self) -> None:
- """Indicates that the last body data has been written."""
- raise NotImplementedError()
- def url_concat(
- url: str,
- args: Union[
- None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
- ],
- ) -> str:
- """Concatenate url and arguments regardless of whether
- url has existing query parameters.
- ``args`` may be either a dictionary or a list of key-value pairs
- (the latter allows for multiple values with the same key.
- >>> url_concat("http://example.com/foo", dict(c="d"))
- 'http://example.com/foo?c=d'
- >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
- 'http://example.com/foo?a=b&c=d'
- >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
- 'http://example.com/foo?a=b&c=d&c=d2'
- """
- if args is None:
- return url
- parsed_url = urlparse(url)
- if isinstance(args, dict):
- parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
- parsed_query.extend(args.items())
- elif isinstance(args, list) or isinstance(args, tuple):
- parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
- parsed_query.extend(args)
- else:
- err = "'args' parameter should be dict, list or tuple. Not {0}".format(
- type(args)
- )
- raise TypeError(err)
- final_query = urlencode(parsed_query)
- url = urlunparse(
- (
- parsed_url[0],
- parsed_url[1],
- parsed_url[2],
- parsed_url[3],
- final_query,
- parsed_url[5],
- )
- )
- return url
- class HTTPFile(ObjectDict):
- """Represents a file uploaded via a form.
- For backwards compatibility, its instance attributes are also
- accessible as dictionary keys.
- * ``filename``
- * ``body``
- * ``content_type``
- """
- filename: str
- body: bytes
- content_type: str
- def _parse_request_range(
- range_header: str,
- ) -> Optional[Tuple[Optional[int], Optional[int]]]:
- """Parses a Range header.
- Returns either ``None`` or tuple ``(start, end)``.
- Note that while the HTTP headers use inclusive byte positions,
- this method returns indexes suitable for use in slices.
- >>> start, end = _parse_request_range("bytes=1-2")
- >>> start, end
- (1, 3)
- >>> [0, 1, 2, 3, 4][start:end]
- [1, 2]
- >>> _parse_request_range("bytes=6-")
- (6, None)
- >>> _parse_request_range("bytes=-6")
- (-6, None)
- >>> _parse_request_range("bytes=-0")
- (None, 0)
- >>> _parse_request_range("bytes=")
- (None, None)
- >>> _parse_request_range("foo=42")
- >>> _parse_request_range("bytes=1-2,6-10")
- Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
- See [0] for the details of the range header.
- [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
- """
- unit, _, value = range_header.partition("=")
- unit, value = unit.strip(), value.strip()
- if unit != "bytes":
- return None
- start_b, _, end_b = value.partition("-")
- try:
- start = _int_or_none(start_b)
- end = _int_or_none(end_b)
- except ValueError:
- return None
- if end is not None:
- if start is None:
- if end != 0:
- start = -end
- end = None
- else:
- end += 1
- return (start, end)
- def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
- """Returns a suitable Content-Range header:
- >>> print(_get_content_range(None, 1, 4))
- bytes 0-0/4
- >>> print(_get_content_range(1, 3, 4))
- bytes 1-2/4
- >>> print(_get_content_range(None, None, 4))
- bytes 0-3/4
- """
- start = start or 0
- end = (end or total) - 1
- return f"bytes {start}-{end}/{total}"
- def _int_or_none(val: str) -> Optional[int]:
- val = val.strip()
- if val == "":
- return None
- return int(val)
- @dataclasses.dataclass
- class ParseMultipartConfig:
- """This class configures the parsing of ``multipart/form-data`` request bodies.
- Its primary purpose is to place limits on the size and complexity of request messages
- to avoid potential denial-of-service attacks.
- .. versionadded:: 6.5.5
- """
- enabled: bool = True
- """Set this to false to disable the parsing of ``multipart/form-data`` requests entirely.
- This may be desirable for applications that do not need to handle this format, since
- multipart request have a history of DoS vulnerabilities in Tornado. Multipart requests
- are used primarily for ``<input type="file">`` in HTML forms, or in APIs that mimic this
- format. File uploads that use the HTTP ``PUT`` method generally do not use the multipart
- format.
- """
- max_parts: int = 100
- """The maximum number of parts accepted in a multipart request.
- Each ``<input>`` element in an HTML form corresponds to at least one "part".
- """
- max_part_header_size: int = 10 * 1024
- """The maximum size of the headers for each part of a multipart request.
- The header for a part contains the name of the form field and optionally the filename
- and content type of the uploaded file.
- """
- @dataclasses.dataclass
- class ParseBodyConfig:
- """This class configures the parsing of request bodies.
- .. versionadded:: 6.5.5
- """
- multipart: ParseMultipartConfig = dataclasses.field(
- default_factory=ParseMultipartConfig
- )
- """Configuration for ``multipart/form-data`` request bodies."""
- _DEFAULT_PARSE_BODY_CONFIG = ParseBodyConfig()
- def set_parse_body_config(config: ParseBodyConfig) -> None:
- r"""Sets the **global** default configuration for parsing request bodies.
- This global setting is provided as a stopgap for applications that need to raise the limits
- introduced in Tornado 6.5.5, or who wish to disable the parsing of multipart/form-data bodies
- entirely. Non-global configuration for this functionality will be introduced in a future
- release.
- >>> content_type = "multipart/form-data; boundary=foo"
- >>> multipart_body = b"--foo--\r\n"
- >>> parse_body_arguments(content_type, multipart_body, {}, {})
- >>> multipart_config = ParseMultipartConfig(enabled=False)
- >>> config = ParseBodyConfig(multipart=multipart_config)
- >>> set_parse_body_config(config)
- >>> parse_body_arguments(content_type, multipart_body, {}, {})
- Traceback (most recent call last):
- ...
- tornado.httputil.HTTPInputError: ...: multipart/form-data parsing is disabled
- >>> set_parse_body_config(ParseBodyConfig()) # reset to defaults
- .. versionadded:: 6.5.5
- """
- global _DEFAULT_PARSE_BODY_CONFIG
- _DEFAULT_PARSE_BODY_CONFIG = config
- def parse_body_arguments(
- content_type: str,
- body: bytes,
- arguments: Dict[str, List[bytes]],
- files: Dict[str, List[HTTPFile]],
- headers: Optional[HTTPHeaders] = None,
- *,
- config: Optional[ParseBodyConfig] = None,
- ) -> None:
- """Parses a form request body.
- Supports ``application/x-www-form-urlencoded`` and
- ``multipart/form-data``. The ``content_type`` parameter should be
- a string and ``body`` should be a byte string. The ``arguments``
- and ``files`` parameters are dictionaries that will be updated
- with the parsed contents.
- """
- if config is None:
- config = _DEFAULT_PARSE_BODY_CONFIG
- if content_type.startswith("application/x-www-form-urlencoded"):
- if headers and "Content-Encoding" in headers:
- raise HTTPInputError(
- "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
- )
- try:
- # real charset decoding will happen in RequestHandler.decode_argument()
- uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
- except Exception as e:
- raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
- for name, values in uri_arguments.items():
- if values:
- arguments.setdefault(name, []).extend(values)
- elif content_type.startswith("multipart/form-data"):
- if headers and "Content-Encoding" in headers:
- raise HTTPInputError(
- "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
- )
- try:
- fields = content_type.split(";")
- if fields[0].strip() != "multipart/form-data":
- # This catches "Content-Type: multipart/form-dataxyz"
- raise HTTPInputError("Invalid content type")
- for field in fields:
- k, sep, v = field.strip().partition("=")
- if k == "boundary" and v:
- parse_multipart_form_data(
- utf8(v), body, arguments, files, config=config.multipart
- )
- break
- else:
- raise HTTPInputError("multipart boundary not found")
- except Exception as e:
- raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
- def parse_multipart_form_data(
- boundary: bytes,
- data: bytes,
- arguments: Dict[str, List[bytes]],
- files: Dict[str, List[HTTPFile]],
- *,
- config: Optional[ParseMultipartConfig] = None,
- ) -> None:
- """Parses a ``multipart/form-data`` body.
- The ``boundary`` and ``data`` parameters are both byte strings.
- The dictionaries given in the arguments and files parameters
- will be updated with the contents of the body.
- .. versionchanged:: 5.1
- Now recognizes non-ASCII filenames in RFC 2231/5987
- (``filename*=``) format.
- """
- if config is None:
- config = _DEFAULT_PARSE_BODY_CONFIG.multipart
- if not config.enabled:
- raise HTTPInputError("multipart/form-data parsing is disabled")
- # The standard allows for the boundary to be quoted in the header,
- # although it's rare (it happens at least for google app engine
- # xmpp). I think we're also supposed to handle backslash-escapes
- # here but I'll save that until we see a client that uses them
- # in the wild.
- if boundary.startswith(b'"') and boundary.endswith(b'"'):
- boundary = boundary[1:-1]
- final_boundary_index = data.rfind(b"--" + boundary + b"--")
- if final_boundary_index == -1:
- raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
- parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
- if len(parts) > config.max_parts:
- raise HTTPInputError("multipart/form-data has too many parts")
- for part in parts:
- if not part:
- continue
- eoh = part.find(b"\r\n\r\n")
- if eoh == -1:
- raise HTTPInputError("multipart/form-data missing headers")
- if eoh > config.max_part_header_size:
- raise HTTPInputError("multipart/form-data part header too large")
- headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
- disp_header = headers.get("Content-Disposition", "")
- disposition, disp_params = _parse_header(disp_header)
- if disposition != "form-data" or not part.endswith(b"\r\n"):
- raise HTTPInputError("Invalid multipart/form-data")
- value = part[eoh + 4 : -2]
- if not disp_params.get("name"):
- raise HTTPInputError("multipart/form-data missing name")
- name = disp_params["name"]
- if disp_params.get("filename"):
- ctype = headers.get("Content-Type", "application/unknown")
- files.setdefault(name, []).append(
- HTTPFile(
- filename=disp_params["filename"], body=value, content_type=ctype
- )
- )
- else:
- arguments.setdefault(name, []).append(value)
- def format_timestamp(
- ts: Union[int, float, tuple, time.struct_time, datetime.datetime],
- ) -> str:
- """Formats a timestamp in the format used by HTTP.
- The argument may be a numeric timestamp as returned by `time.time`,
- a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
- object. Naive `datetime.datetime` objects are assumed to represent
- UTC; aware objects are converted to UTC before formatting.
- >>> format_timestamp(1359312200)
- 'Sun, 27 Jan 2013 18:43:20 GMT'
- """
- if isinstance(ts, (int, float)):
- time_num = ts
- elif isinstance(ts, (tuple, time.struct_time)):
- time_num = calendar.timegm(ts)
- elif isinstance(ts, datetime.datetime):
- time_num = calendar.timegm(ts.utctimetuple())
- else:
- raise TypeError("unknown timestamp type: %r" % ts)
- return email.utils.formatdate(time_num, usegmt=True)
- class RequestStartLine(typing.NamedTuple):
- method: str
- path: str
- version: str
- def parse_request_start_line(line: str) -> RequestStartLine:
- """Returns a (method, path, version) tuple for an HTTP 1.x request line.
- The response is a `typing.NamedTuple`.
- >>> parse_request_start_line("GET /foo HTTP/1.1")
- RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
- """
- match = _ABNF.request_line.fullmatch(line)
- if not match:
- # https://tools.ietf.org/html/rfc7230#section-3.1.1
- # invalid request-line SHOULD respond with a 400 (Bad Request)
- raise HTTPInputError("Malformed HTTP request line")
- r = RequestStartLine(match.group(1), match.group(2), match.group(3))
- if not r.version.startswith("HTTP/1"):
- # HTTP/2 and above doesn't use parse_request_start_line.
- # This could be folded into the regex but we don't want to deviate
- # from the ABNF in the RFCs.
- raise HTTPInputError("Unexpected HTTP version %r" % r.version)
- return r
- class ResponseStartLine(typing.NamedTuple):
- version: str
- code: int
- reason: str
- def parse_response_start_line(line: str) -> ResponseStartLine:
- """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
- The response is a `typing.NamedTuple`.
- >>> parse_response_start_line("HTTP/1.1 200 OK")
- ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
- """
- match = _ABNF.status_line.fullmatch(line)
- if not match:
- raise HTTPInputError("Error parsing response start line")
- r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
- if not r.version.startswith("HTTP/1"):
- # HTTP/2 and above doesn't use parse_response_start_line.
- raise HTTPInputError("Unexpected HTTP version %r" % r.version)
- return r
- # _parseparam and _parse_header are copied and modified from python2.7's cgi.py
- # The original 2.7 version of this code did not correctly support some
- # combinations of semicolons and double quotes.
- # It has also been modified to support valueless parameters as seen in
- # websocket extension negotiations, and to support non-ascii values in
- # RFC 2231/5987 format.
- #
- # _parseparam has been further modified with the logic from
- # https://github.com/python/cpython/pull/136072/files
- # to avoid quadratic behavior when parsing semicolons in quoted strings.
- #
- # TODO: See if we can switch to email.message.Message for this functionality.
- # This is the suggested replacement for the cgi.py module now that cgi has
- # been removed from recent versions of Python. We need to verify that
- # the email module is consistent with our existing behavior (and all relevant
- # RFCs for multipart/form-data) before making this change.
- def _parseparam(s: str) -> Generator[str, None, None]:
- start = 0
- while s.find(";", start) == start:
- start += 1
- end = s.find(";", start)
- ind, diff = start, 0
- while end > 0:
- diff += s.count('"', ind, end) - s.count('\\"', ind, end)
- if diff % 2 == 0:
- break
- end, ind = ind, s.find(";", end + 1)
- if end < 0:
- end = len(s)
- f = s[start:end]
- yield f.strip()
- start = end
- def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
- r"""Parse a Content-type like header.
- Return the main content-type and a dictionary of options.
- >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
- >>> ct, d = _parse_header(d)
- >>> ct
- 'form-data'
- >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
- True
- >>> d['foo']
- 'b\\a"r'
- """
- parts = _parseparam(";" + line)
- key = next(parts)
- # decode_params treats first argument special, but we already stripped key
- params = [("Dummy", "value")]
- for p in parts:
- i = p.find("=")
- if i >= 0:
- name = p[:i].strip().lower()
- value = p[i + 1 :].strip()
- params.append((name, native_str(value)))
- decoded_params = email.utils.decode_params(params)
- decoded_params.pop(0) # get rid of the dummy again
- pdict = {}
- for name, decoded_value in decoded_params:
- value = email.utils.collapse_rfc2231_value(decoded_value)
- if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
- value = value[1:-1]
- pdict[name] = value
- return key, pdict
- def _encode_header(key: str, pdict: Dict[str, str]) -> str:
- """Inverse of _parse_header.
- >>> _encode_header('permessage-deflate',
- ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
- 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
- """
- if not pdict:
- return key
- out = [key]
- # Sort the parameters just to make it easy to test.
- for k, v in sorted(pdict.items()):
- if v is None:
- out.append(k)
- else:
- # TODO: quote if necessary.
- out.append(f"{k}={v}")
- return "; ".join(out)
- def encode_username_password(
- username: Union[str, bytes], password: Union[str, bytes]
- ) -> bytes:
- """Encodes a username/password pair in the format used by HTTP auth.
- The return value is a byte string in the form ``username:password``.
- .. versionadded:: 5.1
- """
- if isinstance(username, unicode_type):
- username = unicodedata.normalize("NFC", username)
- if isinstance(password, unicode_type):
- password = unicodedata.normalize("NFC", password)
- return utf8(username) + b":" + utf8(password)
- def doctests():
- # type: () -> unittest.TestSuite
- import doctest
- return doctest.DocTestSuite(optionflags=doctest.ELLIPSIS)
- _netloc_re = re.compile(r"^(.+):(\d+)$")
- def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
- """Returns ``(host, port)`` tuple from ``netloc``.
- Returned ``port`` will be ``None`` if not present.
- .. versionadded:: 4.1
- """
- match = _netloc_re.match(netloc)
- if match:
- host = match.group(1)
- port = int(match.group(2)) # type: Optional[int]
- else:
- host = netloc
- port = None
- return (host, port)
- def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
- """Generator converting a result of ``parse_qs`` back to name-value pairs.
- .. versionadded:: 5.0
- """
- for k, vs in qs.items():
- for v in vs:
- yield (k, v)
- _unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
- def _unquote_replace(m: re.Match) -> str:
- if m[1]:
- return chr(int(m[1], 8))
- else:
- return m[2]
- def _unquote_cookie(s: str) -> str:
- """Handle double quotes and escaping in cookie values.
- This method is copied verbatim from the Python 3.13 standard
- library (http.cookies._unquote) so we don't have to depend on
- non-public interfaces.
- """
- # If there aren't any doublequotes,
- # then there can't be any special characters. See RFC 2109.
- if s is None or len(s) < 2:
- return s
- if s[0] != '"' or s[-1] != '"':
- return s
- # We have to assume that we must decode this string.
- # Down to work.
- # Remove the "s
- s = s[1:-1]
- # Check for special sequences. Examples:
- # \012 --> \n
- # \" --> "
- #
- return _unquote_sub(_unquote_replace, s)
- def parse_cookie(cookie: str) -> Dict[str, str]:
- """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
- This function attempts to mimic browser cookie parsing behavior;
- it specifically does not follow any of the cookie-related RFCs
- (because browsers don't either).
- The algorithm used is identical to that used by Django version 1.9.10.
- .. versionadded:: 4.4.2
- """
- cookiedict = {}
- for chunk in cookie.split(";"):
- if "=" in chunk:
- key, val = chunk.split("=", 1)
- else:
- # Assume an empty name per
- # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
- key, val = "", chunk
- key, val = key.strip(), val.strip()
- if key or val:
- # unquote using Python's algorithm.
- cookiedict[key] = _unquote_cookie(val)
- return cookiedict
|