format.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt
  4. """Python code format's checker.
  5. By default, try to follow Guido's style guide :
  6. https://www.python.org/doc/essays/styleguide/
  7. Some parts of the process_token method is based from The Tab Nanny std module.
  8. """
  9. from __future__ import annotations
  10. import tokenize
  11. from functools import reduce
  12. from re import Match
  13. from typing import TYPE_CHECKING, Literal
  14. from astroid import nodes
  15. from pylint.checkers import BaseRawFileChecker, BaseTokenChecker
  16. from pylint.checkers.utils import only_required_for_messages
  17. from pylint.constants import WarningScope
  18. from pylint.interfaces import HIGH
  19. from pylint.typing import MessageDefinitionTuple
  20. from pylint.utils.pragma_parser import OPTION_PO, PragmaParserError, parse_pragma
  21. if TYPE_CHECKING:
  22. from pylint.lint import PyLinter
  23. _KEYWORD_TOKENS = {
  24. "assert",
  25. "del",
  26. "elif",
  27. "except",
  28. "for",
  29. "if",
  30. "in",
  31. "not",
  32. "raise",
  33. "return",
  34. "while",
  35. "yield",
  36. "with",
  37. "=",
  38. ":=",
  39. }
  40. _JUNK_TOKENS = {tokenize.COMMENT, tokenize.NL}
  41. MSGS: dict[str, MessageDefinitionTuple] = {
  42. "C0301": (
  43. "Line too long (%s/%s)",
  44. "line-too-long",
  45. "Used when a line is longer than a given number of characters.",
  46. ),
  47. "C0302": (
  48. "Too many lines in module (%s/%s)", # was W0302
  49. "too-many-lines",
  50. "Used when a module has too many lines, reducing its readability.",
  51. ),
  52. "C0303": (
  53. "Trailing whitespace",
  54. "trailing-whitespace",
  55. "Used when there is whitespace between the end of a line and the newline.",
  56. ),
  57. "C0304": (
  58. "Final newline missing",
  59. "missing-final-newline",
  60. "Used when the last line in a file is missing a newline.",
  61. ),
  62. "C0305": (
  63. "Trailing newlines",
  64. "trailing-newlines",
  65. "Used when there are trailing blank lines in a file.",
  66. ),
  67. "W0311": (
  68. "Bad indentation. Found %s %s, expected %s",
  69. "bad-indentation",
  70. "Used when an unexpected number of indentation's tabulations or "
  71. "spaces has been found.",
  72. ),
  73. "W0301": (
  74. "Unnecessary semicolon", # was W0106
  75. "unnecessary-semicolon",
  76. 'Used when a statement is ended by a semi-colon (";"), which '
  77. "isn't necessary (that's python, not C ;).",
  78. ),
  79. "C0321": (
  80. "More than one statement on a single line",
  81. "multiple-statements",
  82. "Used when more than on statement are found on the same line.",
  83. {"scope": WarningScope.NODE},
  84. ),
  85. "C0325": (
  86. "Unnecessary parens after %r keyword",
  87. "superfluous-parens",
  88. "Used when a single item in parentheses follows an if, for, or "
  89. "other keyword.",
  90. ),
  91. "C0327": (
  92. "Mixed line endings LF and CRLF",
  93. "mixed-line-endings",
  94. "Used when there are mixed (LF and CRLF) newline signs in a file.",
  95. ),
  96. "C0328": (
  97. "Unexpected line ending format. There is '%s' while it should be '%s'.",
  98. "unexpected-line-ending-format",
  99. "Used when there is different newline than expected.",
  100. ),
  101. }
  102. def _last_token_on_line_is(tokens: TokenWrapper, line_end: int, token: str) -> bool:
  103. return (line_end > 0 and tokens.token(line_end - 1) == token) or (
  104. line_end > 1
  105. and tokens.token(line_end - 2) == token
  106. and tokens.type(line_end - 1) == tokenize.COMMENT
  107. )
  108. class TokenWrapper:
  109. """A wrapper for readable access to token information."""
  110. def __init__(self, tokens: list[tokenize.TokenInfo]) -> None:
  111. self._tokens = tokens
  112. def token(self, idx: int) -> str:
  113. return self._tokens[idx][1]
  114. def type(self, idx: int) -> int:
  115. return self._tokens[idx][0]
  116. def start_line(self, idx: int) -> int:
  117. return self._tokens[idx][2][0]
  118. def start_col(self, idx: int) -> int:
  119. return self._tokens[idx][2][1]
  120. def line(self, idx: int) -> str:
  121. return self._tokens[idx][4]
  122. class FormatChecker(BaseTokenChecker, BaseRawFileChecker):
  123. """Formatting checker.
  124. Checks for :
  125. * unauthorized constructions
  126. * strict indentation
  127. * line length
  128. """
  129. # configuration section name
  130. name = "format"
  131. # messages
  132. msgs = MSGS
  133. # configuration options
  134. # for available dict keys/values see the optik parser 'add_option' method
  135. options = (
  136. (
  137. "max-line-length",
  138. {
  139. "default": 100,
  140. "type": "int",
  141. "metavar": "<int>",
  142. "help": (
  143. "Maximum number of characters on a single line. "
  144. "Pylint's default of 100 is based on PEP 8's guidance that teams "
  145. "may choose line lengths up to 99 characters."
  146. ),
  147. },
  148. ),
  149. (
  150. "ignore-long-lines",
  151. {
  152. "type": "regexp",
  153. "metavar": "<regexp>",
  154. "default": r"^\s*(# )?<?https?://\S+>?$",
  155. "help": (
  156. "Regexp for a line that is allowed to be longer than the limit."
  157. ),
  158. },
  159. ),
  160. (
  161. "single-line-if-stmt",
  162. {
  163. "default": False,
  164. "type": "yn",
  165. "metavar": "<y or n>",
  166. "help": (
  167. "Allow the body of an if to be on the same "
  168. "line as the test if there is no else."
  169. ),
  170. },
  171. ),
  172. (
  173. "single-line-class-stmt",
  174. {
  175. "default": False,
  176. "type": "yn",
  177. "metavar": "<y or n>",
  178. "help": (
  179. "Allow the body of a class to be on the same "
  180. "line as the declaration if body contains "
  181. "single statement."
  182. ),
  183. },
  184. ),
  185. (
  186. "max-module-lines",
  187. {
  188. "default": 1000,
  189. "type": "int",
  190. "metavar": "<int>",
  191. "help": "Maximum number of lines in a module.",
  192. },
  193. ),
  194. (
  195. "indent-string",
  196. {
  197. "default": " ",
  198. "type": "non_empty_string",
  199. "metavar": "<string>",
  200. "help": "String used as indentation unit. This is usually "
  201. '" " (4 spaces) or "\\t" (1 tab).',
  202. },
  203. ),
  204. (
  205. "indent-after-paren",
  206. {
  207. "type": "int",
  208. "metavar": "<int>",
  209. "default": 4,
  210. "help": "Number of spaces of indent required inside a hanging "
  211. "or continued line.",
  212. },
  213. ),
  214. (
  215. "expected-line-ending-format",
  216. {
  217. "type": "choice",
  218. "metavar": "<empty or LF or CRLF>",
  219. "default": "",
  220. "choices": ["", "LF", "CRLF"],
  221. "help": (
  222. "Expected format of line ending, "
  223. "e.g. empty (any line ending), LF or CRLF."
  224. ),
  225. },
  226. ),
  227. )
  228. def __init__(self, linter: PyLinter) -> None:
  229. super().__init__(linter)
  230. self._lines: dict[int, str] = {}
  231. self._visited_lines: dict[int, Literal[1, 2]] = {}
  232. def new_line(self, tokens: TokenWrapper, line_end: int, line_start: int) -> None:
  233. """A new line has been encountered, process it if necessary."""
  234. if _last_token_on_line_is(tokens, line_end, ";"):
  235. self.add_message("unnecessary-semicolon", line=tokens.start_line(line_end))
  236. line_num = tokens.start_line(line_start)
  237. line = tokens.line(line_start)
  238. if tokens.type(line_start) not in _JUNK_TOKENS:
  239. self._lines[line_num] = line.split("\n")[0]
  240. self.check_lines(tokens, line_start, line, line_num)
  241. def process_module(self, node: nodes.Module) -> None:
  242. pass
  243. # pylint: disable-next = too-many-return-statements, too-many-branches
  244. def _check_keyword_parentheses(
  245. self, tokens: list[tokenize.TokenInfo], start: int
  246. ) -> None:
  247. """Check that there are not unnecessary parentheses after a keyword.
  248. Parens are unnecessary if there is exactly one balanced outer pair on a
  249. line and contains no commas (i.e. is not a tuple).
  250. Args:
  251. tokens: The entire list of Tokens.
  252. start: The position of the keyword in the token list.
  253. """
  254. # If the next token is not a paren, we're fine.
  255. if tokens[start + 1].string != "(":
  256. return
  257. if (
  258. tokens[start].string == "not"
  259. and start > 0
  260. and tokens[start - 1].string == "is"
  261. ):
  262. # If this is part of an `is not` expression, we have a binary operator
  263. # so the parentheses are not necessarily redundant.
  264. return
  265. found_and_or = False
  266. contains_walrus_operator = False
  267. walrus_operator_depth = 0
  268. contains_double_parens = 0
  269. depth = 0
  270. keyword_token = str(tokens[start].string)
  271. line_num = tokens[start].start[0]
  272. for i in range(start, len(tokens) - 1):
  273. token = tokens[i]
  274. # If we hit a newline, then assume any parens were for continuation.
  275. if token.type == tokenize.NL:
  276. return
  277. # Since the walrus operator doesn't exist below python3.8, the tokenizer
  278. # generates independent tokens
  279. if (
  280. token.string == ":=" # <-- python3.8+ path
  281. or token.string + tokens[i + 1].string == ":="
  282. ):
  283. contains_walrus_operator = True
  284. walrus_operator_depth = depth
  285. if token.string == "(":
  286. depth += 1
  287. if tokens[i + 1].string == "(":
  288. contains_double_parens = 1
  289. elif token.string == ")":
  290. depth -= 1
  291. if depth:
  292. if contains_double_parens and tokens[i + 1].string == ")":
  293. # For walrus operators in `if (not)` conditions and comprehensions
  294. if keyword_token in {"in", "if", "not"}:
  295. continue
  296. return
  297. contains_double_parens -= 1
  298. continue
  299. # ')' can't happen after if (foo), since it would be a syntax error.
  300. if tokens[i + 1].string in {":", ")", "]", "}", "in"} or tokens[
  301. i + 1
  302. ].type in {tokenize.NEWLINE, tokenize.ENDMARKER, tokenize.COMMENT}:
  303. if contains_walrus_operator and walrus_operator_depth - 1 == depth:
  304. return
  305. # The empty tuple () is always accepted.
  306. if i == start + 2:
  307. return
  308. if found_and_or:
  309. return
  310. if keyword_token == "in":
  311. # This special case was added in https://github.com/pylint-dev/pylint/pull/4948
  312. # but it could be removed in the future. Avoid churn for now.
  313. return
  314. self.add_message(
  315. "superfluous-parens", line=line_num, args=keyword_token
  316. )
  317. return
  318. elif depth == 1:
  319. match token[1]:
  320. case ",":
  321. # This is a tuple, which is always acceptable.
  322. return
  323. case "and" | "or":
  324. # 'and' and 'or' are the only boolean operators with lower precedence
  325. # than 'not', so parens are only required when they are found.
  326. found_and_or = True
  327. case "yield":
  328. # A yield inside an expression must always be in parentheses,
  329. # quit early without error.
  330. return
  331. case "for":
  332. # A generator expression always has a 'for' token in it, and
  333. # the 'for' token is only legal inside parens when it is in a
  334. # generator expression. The parens are necessary here, so bail
  335. # without an error.
  336. return
  337. case "else":
  338. # A generator expression can have an 'else' token in it.
  339. # We check the rest of the tokens to see if any problems occur after
  340. # the 'else'.
  341. if "(" in (i.string for i in tokens[i:]):
  342. self._check_keyword_parentheses(tokens[i:], 0)
  343. return
  344. def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
  345. """Process tokens and search for:
  346. - too long lines (i.e. longer than <max_chars>)
  347. - optionally bad construct (if given, bad_construct must be a compiled
  348. regular expression).
  349. """
  350. indents = [0]
  351. check_equal = False
  352. line_num = 0
  353. self._lines = {}
  354. self._visited_lines = {}
  355. self._last_line_ending: str | None = None
  356. last_blank_line_num = 0
  357. for idx, (tok_type, string, start, _, line) in enumerate(tokens):
  358. if start[0] != line_num:
  359. line_num = start[0]
  360. # A tokenizer oddity: if an indented line contains a multi-line
  361. # docstring, the line member of the INDENT token does not contain
  362. # the full line; therefore we check the next token on the line.
  363. if tok_type == tokenize.INDENT:
  364. self.new_line(TokenWrapper(tokens), idx - 1, idx + 1)
  365. else:
  366. self.new_line(TokenWrapper(tokens), idx - 1, idx)
  367. match tok_type:
  368. case tokenize.NEWLINE:
  369. # a program statement, or ENDMARKER, will eventually follow,
  370. # after some (possibly empty) run of tokens of the form
  371. # (NL | COMMENT)* (INDENT | DEDENT+)?
  372. # If an INDENT appears, setting check_equal is wrong, and will
  373. # be undone when we see the INDENT.
  374. check_equal = True
  375. self._check_line_ending(string, line_num)
  376. case tokenize.INDENT:
  377. check_equal = False
  378. self.check_indent_level(string, indents[-1] + 1, line_num)
  379. indents.append(indents[-1] + 1)
  380. case tokenize.DEDENT:
  381. # there's nothing we need to check here! what's important is
  382. # that when the run of DEDENTs ends, the indentation of the
  383. # program statement (or ENDMARKER) that triggered the run is
  384. # equal to what's left at the top of the indents stack
  385. check_equal = True
  386. if len(indents) > 1:
  387. del indents[-1]
  388. case tokenize.NL:
  389. if not line.strip("\r\n"):
  390. last_blank_line_num = line_num
  391. case tokenize.COMMENT | tokenize.ENCODING:
  392. pass
  393. case _:
  394. # This is the first concrete token following a NEWLINE, so it
  395. # must be the first token of the next program statement, or an
  396. # ENDMARKER; the "line" argument exposes the leading white-space
  397. # for this statement; in the case of ENDMARKER, line is an empty
  398. # string, so will properly match the empty string with which the
  399. # "indents" stack was seeded
  400. if check_equal:
  401. check_equal = False
  402. self.check_indent_level(line, indents[-1], line_num)
  403. if tok_type == tokenize.NUMBER and string.endswith("l"):
  404. self.add_message("lowercase-l-suffix", line=line_num)
  405. if string in _KEYWORD_TOKENS:
  406. self._check_keyword_parentheses(tokens, idx)
  407. line_num -= 1 # to be ok with "wc -l"
  408. if line_num > self.linter.config.max_module_lines:
  409. # Get the line where the too-many-lines (or its message id)
  410. # was disabled or default to 1.
  411. message_definition = self.linter.msgs_store.get_message_definitions(
  412. "too-many-lines"
  413. )[0]
  414. names = (message_definition.msgid, "too-many-lines")
  415. lineno = next(
  416. filter(None, (self.linter._pragma_lineno.get(name) for name in names)),
  417. 1,
  418. )
  419. self.add_message(
  420. "too-many-lines",
  421. args=(line_num, self.linter.config.max_module_lines),
  422. line=lineno,
  423. )
  424. # See if there are any trailing lines. Do not complain about empty
  425. # files like __init__.py markers.
  426. if line_num == last_blank_line_num and line_num > 0:
  427. self.add_message("trailing-newlines", line=line_num)
  428. def _check_line_ending(self, line_ending: str, line_num: int) -> None:
  429. # check if line endings are mixed
  430. if self._last_line_ending is not None:
  431. # line_ending == "" indicates a synthetic newline added at
  432. # the end of a file that does not, in fact, end with a
  433. # newline.
  434. if line_ending and line_ending != self._last_line_ending:
  435. self.add_message("mixed-line-endings", line=line_num)
  436. self._last_line_ending = line_ending
  437. # check if line ending is as expected
  438. expected = self.linter.config.expected_line_ending_format
  439. if expected:
  440. # reduce multiple \n\n\n\n to one \n
  441. line_ending = reduce(lambda x, y: x + y if x != y else x, line_ending, "")
  442. line_ending = "LF" if line_ending == "\n" else "CRLF"
  443. if line_ending != expected:
  444. self.add_message(
  445. "unexpected-line-ending-format",
  446. args=(line_ending, expected),
  447. line=line_num,
  448. )
  449. @only_required_for_messages("multiple-statements")
  450. def visit_default(self, node: nodes.NodeNG) -> None:
  451. """Check the node line number and check it if not yet done."""
  452. if not node.is_statement:
  453. return
  454. if not node.root().pure_python:
  455. return
  456. prev_sibl = node.previous_sibling()
  457. if prev_sibl is not None:
  458. prev_line = prev_sibl.fromlineno
  459. elif isinstance(
  460. node.parent, nodes.Try
  461. ) and self._is_first_node_in_else_finally_body(node, node.parent):
  462. prev_line = self._infer_else_finally_line_number(node, node.parent)
  463. elif isinstance(node.parent, nodes.Module):
  464. prev_line = 0
  465. else:
  466. prev_line = node.parent.statement().fromlineno
  467. line = node.fromlineno
  468. assert line, node
  469. if prev_line == line and self._visited_lines.get(line) != 2:
  470. self._check_multi_statement_line(node, line)
  471. return
  472. if line in self._visited_lines:
  473. return
  474. try:
  475. tolineno = node.blockstart_tolineno
  476. except AttributeError:
  477. tolineno = node.tolineno
  478. assert tolineno, node
  479. lines: list[str] = []
  480. for line in range(line, tolineno + 1): # noqa: B020
  481. self._visited_lines[line] = 1
  482. try:
  483. lines.append(self._lines[line].rstrip())
  484. except KeyError:
  485. lines.append("")
  486. def _is_first_node_in_else_finally_body(
  487. self, node: nodes.NodeNG, parent: nodes.Try
  488. ) -> bool:
  489. if parent.orelse and node == parent.orelse[0]:
  490. return True
  491. if parent.finalbody and node == parent.finalbody[0]:
  492. return True
  493. return False
  494. def _infer_else_finally_line_number(
  495. self, node: nodes.NodeNG, parent: nodes.Try
  496. ) -> int:
  497. last_line_of_prev_block = 0
  498. if node in parent.finalbody and parent.orelse:
  499. last_line_of_prev_block = parent.orelse[-1].tolineno
  500. elif parent.handlers and parent.handlers[-1].body:
  501. last_line_of_prev_block = parent.handlers[-1].body[-1].tolineno
  502. elif parent.body:
  503. last_line_of_prev_block = parent.body[-1].tolineno
  504. return last_line_of_prev_block + 1 if last_line_of_prev_block else 0
  505. def _check_multi_statement_line(self, node: nodes.NodeNG, line: int) -> None:
  506. """Check for lines containing multiple statements."""
  507. match node:
  508. case nodes.With():
  509. # Do not warn about multiple nested context managers in with statements.
  510. return
  511. case nodes.NodeNG(
  512. parent=nodes.If(orelse=[])
  513. ) if self.linter.config.single_line_if_stmt:
  514. return
  515. case nodes.NodeNG(
  516. parent=nodes.ClassDef(body=[_])
  517. ) if self.linter.config.single_line_class_stmt:
  518. return
  519. case nodes.Expr(
  520. parent=nodes.FunctionDef() | nodes.ClassDef(),
  521. value=nodes.Const(value=value),
  522. ) if (
  523. value is Ellipsis
  524. ):
  525. # Functions stubs and class with ``Ellipsis`` as body are exempted.
  526. return
  527. self.add_message("multiple-statements", node=node, confidence=HIGH)
  528. self._visited_lines[line] = 2
  529. def check_trailing_whitespace_ending(self, line: str, i: int) -> None:
  530. """Check that there is no trailing white-space."""
  531. # exclude \f (formfeed) from the rstrip
  532. stripped_line = line.rstrip("\t\n\r\v ")
  533. if line[len(stripped_line) :] not in ("\n", "\r\n"):
  534. self.add_message(
  535. "trailing-whitespace",
  536. line=i,
  537. col_offset=len(stripped_line),
  538. confidence=HIGH,
  539. )
  540. def check_line_length(self, line: str, i: int, checker_off: bool) -> None:
  541. """Check that the line length is less than the authorized value."""
  542. max_chars = self.linter.config.max_line_length
  543. ignore_long_line = self.linter.config.ignore_long_lines
  544. line = line.rstrip()
  545. if len(line) > max_chars and not ignore_long_line.search(line):
  546. if checker_off:
  547. self.linter.add_ignored_message("line-too-long", i)
  548. else:
  549. self.add_message("line-too-long", line=i, args=(len(line), max_chars))
  550. @staticmethod
  551. def remove_pylint_option_from_lines(options_pattern_obj: Match[str]) -> str:
  552. """Remove the `# pylint ...` pattern from lines."""
  553. lines = options_pattern_obj.string
  554. purged_lines = (
  555. lines[: options_pattern_obj.start(1)].rstrip()
  556. + lines[options_pattern_obj.end(1) :]
  557. )
  558. return purged_lines
  559. @staticmethod
  560. def is_line_length_check_activated(pylint_pattern_match_object: Match[str]) -> bool:
  561. """Return True if the line length check is activated."""
  562. try:
  563. for pragma in parse_pragma(pylint_pattern_match_object.group(2)):
  564. if pragma.action == "disable" and "line-too-long" in pragma.messages:
  565. return False
  566. except PragmaParserError:
  567. # Printing useful information dealing with this error is done in the lint package
  568. pass
  569. return True
  570. @staticmethod
  571. def specific_splitlines(lines: str) -> list[str]:
  572. """Split lines according to universal newlines except those in a specific
  573. sets.
  574. """
  575. unsplit_ends = {
  576. "\x0b", # synonym of \v
  577. "\x0c", # synonym of \f
  578. "\x1c",
  579. "\x1d",
  580. "\x1e",
  581. "\x85",
  582. "\u2028",
  583. "\u2029",
  584. }
  585. res: list[str] = []
  586. buffer = ""
  587. for atomic_line in lines.splitlines(True):
  588. if atomic_line[-1] not in unsplit_ends:
  589. res.append(buffer + atomic_line)
  590. buffer = ""
  591. else:
  592. buffer += atomic_line
  593. return res
  594. def check_lines(
  595. self, tokens: TokenWrapper, line_start: int, lines: str, lineno: int
  596. ) -> None:
  597. """Check given lines for potential messages.
  598. Check if lines have:
  599. - a final newline
  600. - no trailing white-space
  601. - less than a maximum number of characters
  602. """
  603. # we're first going to do a rough check whether any lines in this set
  604. # go over the line limit. If none of them do, then we don't need to
  605. # parse out the pylint options later on and can just assume that these
  606. # lines are clean
  607. # we'll also handle the line ending check here to avoid double-iteration
  608. # unless the line lengths are suspect
  609. max_chars = self.linter.config.max_line_length
  610. split_lines = self.specific_splitlines(lines)
  611. for offset, line in enumerate(split_lines):
  612. if not line.endswith("\n"):
  613. self.add_message("missing-final-newline", line=lineno + offset)
  614. continue
  615. # We don't test for trailing whitespaces in strings
  616. # See https://github.com/pylint-dev/pylint/issues/6936
  617. # and https://github.com/pylint-dev/pylint/issues/3822
  618. if tokens.type(line_start) != tokenize.STRING:
  619. self.check_trailing_whitespace_ending(line, lineno + offset)
  620. # This check is purposefully simple and doesn't rstrip since this is running
  621. # on every line you're checking it's advantageous to avoid doing a lot of work
  622. potential_line_length_warning = any(
  623. len(line) > max_chars for line in split_lines
  624. )
  625. # if there were no lines passing the max_chars config, we don't bother
  626. # running the full line check (as we've met an even more strict condition)
  627. if not potential_line_length_warning:
  628. return
  629. # Line length check may be deactivated through `pylint: disable` comment
  630. mobj = OPTION_PO.search(lines)
  631. checker_off = False
  632. if mobj:
  633. if not self.is_line_length_check_activated(mobj):
  634. checker_off = True
  635. # The 'pylint: disable whatever' should not be taken into account for line length count
  636. lines = self.remove_pylint_option_from_lines(mobj)
  637. # here we re-run specific_splitlines since we have filtered out pylint options above
  638. for offset, line in enumerate(self.specific_splitlines(lines)):
  639. self.check_line_length(line, lineno + offset, checker_off)
  640. def check_indent_level(self, string: str, expected: int, line_num: int) -> None:
  641. """Return the indent level of the string."""
  642. indent = self.linter.config.indent_string
  643. if indent == "\\t": # \t is not interpreted in the configuration file
  644. indent = "\t"
  645. level = 0
  646. unit_size = len(indent)
  647. while string[:unit_size] == indent:
  648. string = string[unit_size:]
  649. level += 1
  650. suppl = ""
  651. while string and string[0] in " \t":
  652. suppl += string[0]
  653. string = string[1:]
  654. if level != expected or suppl:
  655. i_type = "spaces"
  656. if indent[0] == "\t":
  657. i_type = "tabs"
  658. self.add_message(
  659. "bad-indentation",
  660. line=line_num,
  661. args=(level * unit_size + len(suppl), i_type, expected * unit_size),
  662. )
  663. def register(linter: PyLinter) -> None:
  664. linter.register_checker(FormatChecker(linter))