httputil_test.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. from tornado.httputil import (
  2. url_concat,
  3. parse_multipart_form_data,
  4. HTTPHeaders,
  5. format_timestamp,
  6. HTTPServerRequest,
  7. parse_request_start_line,
  8. parse_cookie,
  9. qs_to_qsl,
  10. HTTPInputError,
  11. HTTPFile,
  12. ParseMultipartConfig,
  13. )
  14. from tornado.escape import utf8, native_str
  15. from tornado.log import gen_log
  16. from tornado.test.util import ignore_deprecation
  17. import copy
  18. import datetime
  19. import logging
  20. import pickle
  21. import time
  22. import urllib.parse
  23. import unittest
  24. from typing import Tuple, Dict, List
  25. def form_data_args() -> Tuple[Dict[str, List[bytes]], Dict[str, List[HTTPFile]]]:
  26. """Return two empty dicts suitable for use with parse_multipart_form_data.
  27. mypy insists on type annotations for dict literals, so this lets us avoid
  28. the verbose types throughout this test.
  29. """
  30. return {}, {}
  31. class TestUrlConcat(unittest.TestCase):
  32. def test_url_concat_no_query_params(self):
  33. url = url_concat("https://localhost/path", [("y", "y"), ("z", "z")])
  34. self.assertEqual(url, "https://localhost/path?y=y&z=z")
  35. def test_url_concat_encode_args(self):
  36. url = url_concat("https://localhost/path", [("y", "/y"), ("z", "z")])
  37. self.assertEqual(url, "https://localhost/path?y=%2Fy&z=z")
  38. def test_url_concat_trailing_q(self):
  39. url = url_concat("https://localhost/path?", [("y", "y"), ("z", "z")])
  40. self.assertEqual(url, "https://localhost/path?y=y&z=z")
  41. def test_url_concat_q_with_no_trailing_amp(self):
  42. url = url_concat("https://localhost/path?x", [("y", "y"), ("z", "z")])
  43. self.assertEqual(url, "https://localhost/path?x=&y=y&z=z")
  44. def test_url_concat_trailing_amp(self):
  45. url = url_concat("https://localhost/path?x&", [("y", "y"), ("z", "z")])
  46. self.assertEqual(url, "https://localhost/path?x=&y=y&z=z")
  47. def test_url_concat_mult_params(self):
  48. url = url_concat("https://localhost/path?a=1&b=2", [("y", "y"), ("z", "z")])
  49. self.assertEqual(url, "https://localhost/path?a=1&b=2&y=y&z=z")
  50. def test_url_concat_no_params(self):
  51. url = url_concat("https://localhost/path?r=1&t=2", [])
  52. self.assertEqual(url, "https://localhost/path?r=1&t=2")
  53. def test_url_concat_none_params(self):
  54. url = url_concat("https://localhost/path?r=1&t=2", None)
  55. self.assertEqual(url, "https://localhost/path?r=1&t=2")
  56. def test_url_concat_with_frag(self):
  57. url = url_concat("https://localhost/path#tab", [("y", "y")])
  58. self.assertEqual(url, "https://localhost/path?y=y#tab")
  59. def test_url_concat_multi_same_params(self):
  60. url = url_concat("https://localhost/path", [("y", "y1"), ("y", "y2")])
  61. self.assertEqual(url, "https://localhost/path?y=y1&y=y2")
  62. def test_url_concat_multi_same_query_params(self):
  63. url = url_concat("https://localhost/path?r=1&r=2", [("y", "y")])
  64. self.assertEqual(url, "https://localhost/path?r=1&r=2&y=y")
  65. def test_url_concat_dict_params(self):
  66. url = url_concat("https://localhost/path", dict(y="y"))
  67. self.assertEqual(url, "https://localhost/path?y=y")
  68. class QsParseTest(unittest.TestCase):
  69. def test_parsing(self):
  70. qsstring = "a=1&b=2&a=3"
  71. qs = urllib.parse.parse_qs(qsstring)
  72. qsl = list(qs_to_qsl(qs))
  73. self.assertIn(("a", "1"), qsl)
  74. self.assertIn(("a", "3"), qsl)
  75. self.assertIn(("b", "2"), qsl)
  76. class MultipartFormDataTest(unittest.TestCase):
  77. def test_file_upload(self):
  78. data = b"""\
  79. --1234
  80. Content-Disposition: form-data; name="files"; filename="ab.txt"
  81. Foo
  82. --1234--""".replace(
  83. b"\n", b"\r\n"
  84. )
  85. args, files = form_data_args()
  86. parse_multipart_form_data(b"1234", data, args, files)
  87. file = files["files"][0]
  88. self.assertEqual(file["filename"], "ab.txt")
  89. self.assertEqual(file["body"], b"Foo")
  90. def test_unquoted_names(self):
  91. # quotes are optional unless special characters are present
  92. data = b"""\
  93. --1234
  94. Content-Disposition: form-data; name=files; filename=ab.txt
  95. Foo
  96. --1234--""".replace(
  97. b"\n", b"\r\n"
  98. )
  99. args, files = form_data_args()
  100. parse_multipart_form_data(b"1234", data, args, files)
  101. file = files["files"][0]
  102. self.assertEqual(file["filename"], "ab.txt")
  103. self.assertEqual(file["body"], b"Foo")
  104. def test_special_filenames(self):
  105. filenames = [
  106. "a;b.txt",
  107. 'a"b.txt',
  108. 'a";b.txt',
  109. 'a;"b.txt',
  110. 'a";";.txt',
  111. 'a\\"b.txt',
  112. "a\\b.txt",
  113. "a b.txt",
  114. "a\tb.txt",
  115. ]
  116. for filename in filenames:
  117. logging.debug("trying filename %r", filename)
  118. str_data = """\
  119. --1234
  120. Content-Disposition: form-data; name="files"; filename="%s"
  121. Foo
  122. --1234--""" % filename.replace(
  123. "\\", "\\\\"
  124. ).replace(
  125. '"', '\\"'
  126. )
  127. data = utf8(str_data.replace("\n", "\r\n"))
  128. args, files = form_data_args()
  129. parse_multipart_form_data(b"1234", data, args, files)
  130. file = files["files"][0]
  131. self.assertEqual(file["filename"], filename)
  132. self.assertEqual(file["body"], b"Foo")
  133. def test_invalid_chars(self):
  134. filenames = [
  135. "a\rb.txt",
  136. "a\0b.txt",
  137. "a\x08b.txt",
  138. ]
  139. for filename in filenames:
  140. str_data = """\
  141. --1234
  142. Content-Disposition: form-data; name="files"; filename="%s"
  143. Foo
  144. --1234--""" % filename.replace(
  145. "\\", "\\\\"
  146. ).replace(
  147. '"', '\\"'
  148. )
  149. data = utf8(str_data.replace("\n", "\r\n"))
  150. args, files = form_data_args()
  151. with self.assertRaises(HTTPInputError) as cm:
  152. parse_multipart_form_data(b"1234", data, args, files)
  153. self.assertIn("Invalid header value", str(cm.exception))
  154. def test_non_ascii_filename_rfc5987(self):
  155. data = b"""\
  156. --1234
  157. Content-Disposition: form-data; name="files"; filename="ab.txt"; filename*=UTF-8''%C3%A1b.txt
  158. Foo
  159. --1234--""".replace(
  160. b"\n", b"\r\n"
  161. )
  162. args, files = form_data_args()
  163. parse_multipart_form_data(b"1234", data, args, files)
  164. file = files["files"][0]
  165. self.assertEqual(file["filename"], "áb.txt")
  166. self.assertEqual(file["body"], b"Foo")
  167. def test_non_ascii_filename_raw(self):
  168. data = """\
  169. --1234
  170. Content-Disposition: form-data; name="files"; filename="测试.txt"
  171. Foo
  172. --1234--""".encode(
  173. "utf-8"
  174. ).replace(
  175. b"\n", b"\r\n"
  176. )
  177. args, files = form_data_args()
  178. parse_multipart_form_data(b"1234", data, args, files)
  179. file = files["files"][0]
  180. self.assertEqual(file["filename"], "测试.txt")
  181. self.assertEqual(file["body"], b"Foo")
  182. def test_boundary_starts_and_ends_with_quotes(self):
  183. data = b"""\
  184. --1234
  185. Content-Disposition: form-data; name="files"; filename="ab.txt"
  186. Foo
  187. --1234--""".replace(
  188. b"\n", b"\r\n"
  189. )
  190. args, files = form_data_args()
  191. parse_multipart_form_data(b'"1234"', data, args, files)
  192. file = files["files"][0]
  193. self.assertEqual(file["filename"], "ab.txt")
  194. self.assertEqual(file["body"], b"Foo")
  195. def test_missing_headers(self):
  196. data = b"""\
  197. --1234
  198. Foo
  199. --1234--""".replace(
  200. b"\n", b"\r\n"
  201. )
  202. args, files = form_data_args()
  203. with self.assertRaises(
  204. HTTPInputError, msg="multipart/form-data missing headers"
  205. ):
  206. parse_multipart_form_data(b"1234", data, args, files)
  207. self.assertEqual(files, {})
  208. def test_invalid_content_disposition(self):
  209. data = b"""\
  210. --1234
  211. Content-Disposition: invalid; name="files"; filename="ab.txt"
  212. Foo
  213. --1234--""".replace(
  214. b"\n", b"\r\n"
  215. )
  216. args, files = form_data_args()
  217. with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
  218. parse_multipart_form_data(b"1234", data, args, files)
  219. self.assertEqual(files, {})
  220. def test_line_does_not_end_with_correct_line_break(self):
  221. data = b"""\
  222. --1234
  223. Content-Disposition: form-data; name="files"; filename="ab.txt"
  224. Foo--1234--""".replace(
  225. b"\n", b"\r\n"
  226. )
  227. args, files = form_data_args()
  228. with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
  229. parse_multipart_form_data(b"1234", data, args, files)
  230. self.assertEqual(files, {})
  231. def test_content_disposition_header_without_name_parameter(self):
  232. data = b"""\
  233. --1234
  234. Content-Disposition: form-data; filename="ab.txt"
  235. Foo
  236. --1234--""".replace(
  237. b"\n", b"\r\n"
  238. )
  239. args, files = form_data_args()
  240. with self.assertRaises(
  241. HTTPInputError, msg="multipart/form-data value missing name"
  242. ):
  243. parse_multipart_form_data(b"1234", data, args, files)
  244. self.assertEqual(files, {})
  245. def test_data_after_final_boundary(self):
  246. # The spec requires that data after the final boundary be ignored.
  247. # http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
  248. # In practice, some libraries include an extra CRLF after the boundary.
  249. data = b"""\
  250. --1234
  251. Content-Disposition: form-data; name="files"; filename="ab.txt"
  252. Foo
  253. --1234--
  254. """.replace(
  255. b"\n", b"\r\n"
  256. )
  257. args, files = form_data_args()
  258. parse_multipart_form_data(b"1234", data, args, files)
  259. file = files["files"][0]
  260. self.assertEqual(file["filename"], "ab.txt")
  261. self.assertEqual(file["body"], b"Foo")
  262. def test_disposition_param_linear_performance(self):
  263. # This is a regression test for performance of parsing parameters
  264. # to the content-disposition header, specifically for semicolons within
  265. # quoted strings.
  266. def f(n):
  267. start = time.perf_counter()
  268. message = (
  269. b"--1234\r\nContent-Disposition: form-data; "
  270. + b'x="'
  271. + b";" * n
  272. + b'"; '
  273. + b'name="files"; filename="a.txt"\r\n\r\nFoo\r\n--1234--\r\n'
  274. )
  275. args: dict[str, list[bytes]] = {}
  276. files: dict[str, list[HTTPFile]] = {}
  277. parse_multipart_form_data(b"1234", message, args, files)
  278. return time.perf_counter() - start
  279. d1 = f(1_000)
  280. # Note that headers larger than this are blocked by the default configuration.
  281. d2 = f(10_000)
  282. if d2 / d1 > 20:
  283. self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")
  284. def test_multipart_config(self):
  285. boundary = b"1234"
  286. body = b"""--1234
  287. Content-Disposition: form-data; name="files"; filename="ab.txt"
  288. --1234--""".replace(
  289. b"\n", b"\r\n"
  290. )
  291. config = ParseMultipartConfig()
  292. args, files = form_data_args()
  293. parse_multipart_form_data(boundary, body, args, files, config=config)
  294. self.assertEqual(files["files"][0]["filename"], "ab.txt")
  295. config_no_parts = ParseMultipartConfig(max_parts=0)
  296. with self.assertRaises(HTTPInputError) as cm:
  297. parse_multipart_form_data(
  298. boundary, body, args, files, config=config_no_parts
  299. )
  300. self.assertIn("too many parts", str(cm.exception))
  301. config_small_headers = ParseMultipartConfig(max_part_header_size=10)
  302. with self.assertRaises(HTTPInputError) as cm:
  303. parse_multipart_form_data(
  304. boundary, body, args, files, config=config_small_headers
  305. )
  306. self.assertIn("header too large", str(cm.exception))
  307. config_disabled = ParseMultipartConfig(enabled=False)
  308. with self.assertRaises(HTTPInputError) as cm:
  309. parse_multipart_form_data(
  310. boundary, body, args, files, config=config_disabled
  311. )
  312. self.assertIn("multipart/form-data parsing is disabled", str(cm.exception))
  313. class HTTPHeadersTest(unittest.TestCase):
  314. def test_multi_line(self):
  315. # Lines beginning with whitespace are appended to the previous line
  316. # with any leading whitespace replaced by a single space.
  317. # Note that while multi-line headers are a part of the HTTP spec,
  318. # their use is strongly discouraged.
  319. data = """\
  320. Foo: bar
  321. baz
  322. Asdf: qwer
  323. \tzxcv
  324. Foo: even
  325. more
  326. lines
  327. """.replace(
  328. "\n", "\r\n"
  329. )
  330. headers = HTTPHeaders.parse(data)
  331. self.assertEqual(headers["asdf"], "qwer zxcv")
  332. self.assertEqual(headers.get_list("asdf"), ["qwer zxcv"])
  333. self.assertEqual(headers["Foo"], "bar baz,even more lines")
  334. self.assertEqual(headers.get_list("foo"), ["bar baz", "even more lines"])
  335. self.assertEqual(
  336. sorted(list(headers.get_all())),
  337. [("Asdf", "qwer zxcv"), ("Foo", "bar baz"), ("Foo", "even more lines")],
  338. )
  339. # Verify case insensitivity in-operator
  340. self.assertTrue("asdf" in headers)
  341. self.assertTrue("Asdf" in headers)
  342. def test_continuation(self):
  343. data = "Foo: bar\r\n\tasdf"
  344. headers = HTTPHeaders.parse(data)
  345. self.assertEqual(headers["Foo"], "bar asdf")
  346. # If the first line starts with whitespace, it's a
  347. # continuation line with nothing to continue, so reject it
  348. # (with a proper error).
  349. data = " Foo: bar"
  350. self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
  351. # \f (formfeed) is whitespace according to str.isspace, but
  352. # not according to the HTTP spec.
  353. data = "Foo: bar\r\n\fasdf"
  354. self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
  355. def test_forbidden_ascii_characters(self):
  356. # Control characters and ASCII whitespace other than space, tab, and CRLF are not allowed in
  357. # headers.
  358. for c in range(0xFF):
  359. data = f"Foo: bar{chr(c)}baz\r\n"
  360. if c == 0x09 or (c >= 0x20 and c != 0x7F):
  361. headers = HTTPHeaders.parse(data)
  362. self.assertEqual(headers["Foo"], f"bar{chr(c)}baz")
  363. else:
  364. self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
  365. def test_unicode_newlines(self):
  366. # Ensure that only \r\n is recognized as a header separator, and not
  367. # the other newline-like unicode characters.
  368. # Characters that are likely to be problematic can be found in
  369. # http://unicode.org/standard/reports/tr13/tr13-5.html
  370. # and cpython's unicodeobject.c (which defines the implementation
  371. # of unicode_type.splitlines(), and uses a different list than TR13).
  372. newlines = [
  373. # The following ascii characters are sometimes treated as newline-like,
  374. # but they're disallowed in HTTP headers. This test covers unicode
  375. # characters that are permitted in headers (under the obs-text rule).
  376. # "\u001b", # VERTICAL TAB
  377. # "\u001c", # FILE SEPARATOR
  378. # "\u001d", # GROUP SEPARATOR
  379. # "\u001e", # RECORD SEPARATOR
  380. "\u0085", # NEXT LINE
  381. "\u2028", # LINE SEPARATOR
  382. "\u2029", # PARAGRAPH SEPARATOR
  383. ]
  384. for newline in newlines:
  385. # Try the utf8 and latin1 representations of each newline
  386. for encoding in ["utf8", "latin1"]:
  387. try:
  388. try:
  389. encoded = newline.encode(encoding)
  390. except UnicodeEncodeError:
  391. # Some chars cannot be represented in latin1
  392. continue
  393. data = b"Cookie: foo=" + encoded + b"bar"
  394. # parse() wants a native_str, so decode through latin1
  395. # in the same way the real parser does.
  396. headers = HTTPHeaders.parse(native_str(data.decode("latin1")))
  397. expected = [
  398. (
  399. "Cookie",
  400. "foo=" + native_str(encoded.decode("latin1")) + "bar",
  401. )
  402. ]
  403. self.assertEqual(expected, list(headers.get_all()))
  404. except Exception:
  405. gen_log.warning("failed while trying %r in %s", newline, encoding)
  406. raise
  407. def test_unicode_whitespace(self):
  408. # Only tabs and spaces are to be stripped according to the HTTP standard.
  409. # Other unicode whitespace is to be left as-is. In the context of headers,
  410. # this specifically means the whitespace characters falling within the
  411. # latin1 charset.
  412. whitespace = [
  413. (" ", True), # SPACE
  414. ("\t", True), # TAB
  415. ("\u00a0", False), # NON-BREAKING SPACE
  416. ("\u0085", False), # NEXT LINE
  417. ]
  418. for c, stripped in whitespace:
  419. headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
  420. if stripped:
  421. expected = [("Transfer-Encoding", "chunked")]
  422. else:
  423. expected = [("Transfer-Encoding", "%schunked" % c)]
  424. self.assertEqual(expected, list(headers.get_all()))
  425. def test_optional_cr(self):
  426. # Bare CR is not a valid line separator
  427. with self.assertRaises(HTTPInputError):
  428. HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nCR: cr\rMore: more\r\n")
  429. # Both CRLF and LF should be accepted as separators. CR should not be
  430. # part of the data when followed by LF.
  431. headers = HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nMore: more\r\n")
  432. self.assertEqual(
  433. sorted(headers.get_all()),
  434. [("Crlf", "crlf"), ("Lf", "lf"), ("More", "more")],
  435. )
  436. def test_copy(self):
  437. all_pairs = [("A", "1"), ("A", "2"), ("B", "c")]
  438. h1 = HTTPHeaders()
  439. for k, v in all_pairs:
  440. h1.add(k, v)
  441. h2 = h1.copy()
  442. h3 = copy.copy(h1)
  443. h4 = copy.deepcopy(h1)
  444. for headers in [h1, h2, h3, h4]:
  445. # All the copies are identical, no matter how they were
  446. # constructed.
  447. self.assertEqual(list(sorted(headers.get_all())), all_pairs)
  448. for headers in [h2, h3, h4]:
  449. # Neither the dict or its member lists are reused.
  450. self.assertIsNot(headers, h1)
  451. self.assertIsNot(headers.get_list("A"), h1.get_list("A"))
  452. def test_pickle_roundtrip(self):
  453. headers = HTTPHeaders()
  454. headers.add("Set-Cookie", "a=b")
  455. headers.add("Set-Cookie", "c=d")
  456. headers.add("Content-Type", "text/html")
  457. pickled = pickle.dumps(headers)
  458. unpickled = pickle.loads(pickled)
  459. self.assertEqual(sorted(headers.get_all()), sorted(unpickled.get_all()))
  460. self.assertEqual(sorted(headers.items()), sorted(unpickled.items()))
  461. def test_setdefault(self):
  462. headers = HTTPHeaders()
  463. headers["foo"] = "bar"
  464. # If a value is present, setdefault returns it without changes.
  465. self.assertEqual(headers.setdefault("foo", "baz"), "bar")
  466. self.assertEqual(headers["foo"], "bar")
  467. # If a value is not present, setdefault sets it for future use.
  468. self.assertEqual(headers.setdefault("quux", "xyzzy"), "xyzzy")
  469. self.assertEqual(headers["quux"], "xyzzy")
  470. self.assertEqual(sorted(headers.get_all()), [("Foo", "bar"), ("Quux", "xyzzy")])
  471. def test_string(self):
  472. headers = HTTPHeaders()
  473. headers.add("Foo", "1")
  474. headers.add("Foo", "2")
  475. headers.add("Foo", "3")
  476. headers2 = HTTPHeaders.parse(str(headers))
  477. self.assertEqual(headers, headers2)
  478. def test_invalid_header_names(self):
  479. invalid_names = [
  480. "",
  481. "foo bar",
  482. "foo\tbar",
  483. "foo\nbar",
  484. "foo\x00bar",
  485. "foo ",
  486. " foo",
  487. "é",
  488. ]
  489. for name in invalid_names:
  490. headers = HTTPHeaders()
  491. with self.assertRaises(HTTPInputError):
  492. headers.add(name, "bar")
  493. def test_linear_performance(self):
  494. def f(n):
  495. start = time.perf_counter()
  496. headers = HTTPHeaders()
  497. for i in range(n):
  498. headers.add("X-Foo", "bar")
  499. return time.perf_counter() - start
  500. # This runs under 50ms on my laptop as of 2025-12-09.
  501. d1 = f(10_000)
  502. d2 = f(100_000)
  503. if d2 / d1 > 20:
  504. # d2 should be about 10x d1 but allow a wide margin for variability.
  505. self.fail(f"HTTPHeaders.add() does not scale linearly: {d1=} vs {d2=}")
  506. class FormatTimestampTest(unittest.TestCase):
  507. # Make sure that all the input types are supported.
  508. TIMESTAMP = 1359312200.503611
  509. EXPECTED = "Sun, 27 Jan 2013 18:43:20 GMT"
  510. def check(self, value):
  511. self.assertEqual(format_timestamp(value), self.EXPECTED)
  512. def test_unix_time_float(self):
  513. self.check(self.TIMESTAMP)
  514. def test_unix_time_int(self):
  515. self.check(int(self.TIMESTAMP))
  516. def test_struct_time(self):
  517. self.check(time.gmtime(self.TIMESTAMP))
  518. def test_time_tuple(self):
  519. tup = tuple(time.gmtime(self.TIMESTAMP))
  520. self.assertEqual(9, len(tup))
  521. self.check(tup)
  522. def test_utc_naive_datetime(self):
  523. self.check(
  524. datetime.datetime.fromtimestamp(
  525. self.TIMESTAMP, datetime.timezone.utc
  526. ).replace(tzinfo=None)
  527. )
  528. def test_utc_naive_datetime_deprecated(self):
  529. with ignore_deprecation():
  530. self.check(datetime.datetime.utcfromtimestamp(self.TIMESTAMP))
  531. def test_utc_aware_datetime(self):
  532. self.check(
  533. datetime.datetime.fromtimestamp(self.TIMESTAMP, datetime.timezone.utc)
  534. )
  535. def test_other_aware_datetime(self):
  536. # Other timezones are ignored; the timezone is always printed as GMT
  537. self.check(
  538. datetime.datetime.fromtimestamp(
  539. self.TIMESTAMP, datetime.timezone(datetime.timedelta(hours=-4))
  540. )
  541. )
  542. # HTTPServerRequest is mainly tested incidentally to the server itself,
  543. # but this tests the parts of the class that can be tested in isolation.
  544. class HTTPServerRequestTest(unittest.TestCase):
  545. def test_default_constructor(self):
  546. # All parameters are formally optional, but uri is required
  547. # (and has been for some time). This test ensures that no
  548. # more required parameters slip in.
  549. HTTPServerRequest(uri="/")
  550. def test_body_is_a_byte_string(self):
  551. request = HTTPServerRequest(uri="/")
  552. self.assertIsInstance(request.body, bytes)
  553. def test_repr_does_not_contain_headers(self):
  554. request = HTTPServerRequest(
  555. uri="/", headers=HTTPHeaders({"Canary": ["Coal Mine"]})
  556. )
  557. self.assertNotIn("Canary", repr(request))
  558. class ParseRequestStartLineTest(unittest.TestCase):
  559. METHOD = "GET"
  560. PATH = "/foo"
  561. VERSION = "HTTP/1.1"
  562. def test_parse_request_start_line(self):
  563. start_line = " ".join([self.METHOD, self.PATH, self.VERSION])
  564. parsed_start_line = parse_request_start_line(start_line)
  565. self.assertEqual(parsed_start_line.method, self.METHOD)
  566. self.assertEqual(parsed_start_line.path, self.PATH)
  567. self.assertEqual(parsed_start_line.version, self.VERSION)
  568. class ParseCookieTest(unittest.TestCase):
  569. # These tests copied from Django:
  570. # https://github.com/django/django/pull/6277/commits/da810901ada1cae9fc1f018f879f11a7fb467b28
  571. def test_python_cookies(self):
  572. """
  573. Test cases copied from Python's Lib/test/test_http_cookies.py
  574. """
  575. self.assertEqual(
  576. parse_cookie("chips=ahoy; vienna=finger"),
  577. {"chips": "ahoy", "vienna": "finger"},
  578. )
  579. # Here parse_cookie() differs from Python's cookie parsing in that it
  580. # treats all semicolons as delimiters, even within quotes.
  581. self.assertEqual(
  582. parse_cookie('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'),
  583. {"keebler": '"E=mc2', "L": '\\"Loves\\"', "fudge": "\\012", "": '"'},
  584. )
  585. # Illegal cookies that have an '=' char in an unquoted value.
  586. self.assertEqual(parse_cookie("keebler=E=mc2"), {"keebler": "E=mc2"})
  587. # Cookies with ':' character in their name.
  588. self.assertEqual(
  589. parse_cookie("key:term=value:term"), {"key:term": "value:term"}
  590. )
  591. # Cookies with '[' and ']'.
  592. self.assertEqual(
  593. parse_cookie("a=b; c=[; d=r; f=h"), {"a": "b", "c": "[", "d": "r", "f": "h"}
  594. )
  595. def test_cookie_edgecases(self):
  596. # Cookies that RFC6265 allows.
  597. self.assertEqual(
  598. parse_cookie("a=b; Domain=example.com"), {"a": "b", "Domain": "example.com"}
  599. )
  600. # parse_cookie() has historically kept only the last cookie with the
  601. # same name.
  602. self.assertEqual(parse_cookie("a=b; h=i; a=c"), {"a": "c", "h": "i"})
  603. def test_invalid_cookies(self):
  604. """
  605. Cookie strings that go against RFC6265 but browsers will send if set
  606. via document.cookie.
  607. """
  608. # Chunks without an equals sign appear as unnamed values per
  609. # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
  610. self.assertIn(
  611. "django_language",
  612. parse_cookie("abc=def; unnamed; django_language=en").keys(),
  613. )
  614. # Even a double quote may be an unamed value.
  615. self.assertEqual(parse_cookie('a=b; "; c=d'), {"a": "b", "": '"', "c": "d"})
  616. # Spaces in names and values, and an equals sign in values.
  617. self.assertEqual(
  618. parse_cookie("a b c=d e = f; gh=i"), {"a b c": "d e = f", "gh": "i"}
  619. )
  620. # More characters the spec forbids.
  621. self.assertEqual(
  622. parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'),
  623. {"a b,c<>@:/[]?{}": 'd " =e,f g'},
  624. )
  625. # Unicode characters. The spec only allows ASCII.
  626. self.assertEqual(
  627. parse_cookie("saint=André Bessette"),
  628. {"saint": native_str("André Bessette")},
  629. )
  630. # Browsers don't send extra whitespace or semicolons in Cookie headers,
  631. # but parse_cookie() should parse whitespace the same way
  632. # document.cookie parses whitespace.
  633. self.assertEqual(
  634. parse_cookie(" = b ; ; = ; c = ; "), {"": "b", "c": ""}
  635. )
  636. def test_unquote(self):
  637. # Copied from
  638. # https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L62
  639. cases = [
  640. (r'a="b=\""', 'b="'),
  641. (r'a="b=\\"', "b=\\"),
  642. (r'a="b=\="', "b=="),
  643. (r'a="b=\n"', "b=n"),
  644. (r'a="b=\042"', 'b="'),
  645. (r'a="b=\134"', "b=\\"),
  646. (r'a="b=\377"', "b=\xff"),
  647. (r'a="b=\400"', "b=400"),
  648. (r'a="b=\42"', "b=42"),
  649. (r'a="b=\\042"', "b=\\042"),
  650. (r'a="b=\\134"', "b=\\134"),
  651. (r'a="b=\\\""', 'b=\\"'),
  652. (r'a="b=\\\042"', 'b=\\"'),
  653. (r'a="b=\134\""', 'b=\\"'),
  654. (r'a="b=\134\042"', 'b=\\"'),
  655. ]
  656. for encoded, decoded in cases:
  657. with self.subTest(encoded):
  658. c = parse_cookie(encoded)
  659. self.assertEqual(c["a"], decoded)
  660. def test_unquote_large(self):
  661. # Adapted from
  662. # https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L87
  663. # Modified from that test because we handle semicolons differently from the stdlib.
  664. #
  665. # This is a performance regression test: prior to improvements in Tornado 6.4.2, this test
  666. # would take over a minute with n= 100k. Now it runs in tens of milliseconds.
  667. n = 100000
  668. for encoded in r"\\", r"\134":
  669. with self.subTest(encoded):
  670. start = time.time()
  671. data = 'a="b=' + encoded * n + '"'
  672. value = parse_cookie(data)["a"]
  673. end = time.time()
  674. self.assertEqual(value[:3], "b=\\")
  675. self.assertEqual(value[-3:], "\\\\\\")
  676. self.assertEqual(len(value), n + 2)
  677. # Very loose performance check to avoid false positives
  678. self.assertLess(end - start, 1, "Test took too long")