misc.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt
  4. """Check source code is ascii only or has an encoding declaration (PEP 263)."""
  5. from __future__ import annotations
  6. import re
  7. import tokenize
  8. from typing import TYPE_CHECKING
  9. from astroid import nodes
  10. from pylint.checkers import BaseRawFileChecker, BaseTokenChecker
  11. from pylint.typing import ManagedMessage
  12. if TYPE_CHECKING:
  13. from pylint.lint import PyLinter
  14. class ByIdManagedMessagesChecker(BaseRawFileChecker):
  15. """Checks for messages that are enabled or disabled by id instead of symbol."""
  16. name = "miscellaneous"
  17. msgs = {
  18. "I0023": (
  19. "%s",
  20. "use-symbolic-message-instead",
  21. "Used when a message is enabled or disabled by id.",
  22. {"default_enabled": False},
  23. )
  24. }
  25. options = ()
  26. def _clear_by_id_managed_msgs(self) -> None:
  27. self.linter._by_id_managed_msgs.clear()
  28. def _get_by_id_managed_msgs(self) -> list[ManagedMessage]:
  29. return self.linter._by_id_managed_msgs
  30. def process_module(self, node: nodes.Module) -> None:
  31. """Inspect the source file to find messages activated or deactivated by id."""
  32. managed_msgs = self._get_by_id_managed_msgs()
  33. for mod_name, msgid, symbol, lineno, is_disabled in managed_msgs:
  34. if mod_name == node.name:
  35. verb = "disable" if is_disabled else "enable"
  36. txt = f"'{msgid}' is cryptic: use '# pylint: {verb}={symbol}' instead"
  37. self.add_message("use-symbolic-message-instead", line=lineno, args=txt)
  38. self._clear_by_id_managed_msgs()
  39. class EncodingChecker(BaseTokenChecker, BaseRawFileChecker):
  40. """BaseChecker for encoding issues and fixme notes.
  41. Checks for:
  42. * warning notes in the code like FIXME, XXX
  43. * encoding issues.
  44. """
  45. # configuration section name
  46. name = "miscellaneous"
  47. msgs = {
  48. "W0511": (
  49. "%s",
  50. "fixme",
  51. "Used when a warning note as FIXME or XXX is detected.",
  52. )
  53. }
  54. options = (
  55. (
  56. "notes",
  57. {
  58. "type": "csv",
  59. "metavar": "<comma separated values>",
  60. "default": ("FIXME", "XXX", "TODO"),
  61. "help": (
  62. "List of note tags to take in consideration, "
  63. "separated by a comma."
  64. ),
  65. },
  66. ),
  67. (
  68. "notes-rgx",
  69. {
  70. "type": "string",
  71. "metavar": "<regexp>",
  72. "help": "Regular expression of note tags to take in consideration.",
  73. "default": "",
  74. },
  75. ),
  76. (
  77. "check-fixme-in-docstring",
  78. {
  79. "type": "yn",
  80. "metavar": "<y or n>",
  81. "default": False,
  82. "help": "Whether or not to search for fixme's in docstrings.",
  83. },
  84. ),
  85. )
  86. def open(self) -> None:
  87. super().open()
  88. notes = "|".join(re.escape(note) for note in self.linter.config.notes)
  89. if self.linter.config.notes_rgx:
  90. notes += f"|{self.linter.config.notes_rgx}"
  91. comment_regex = rf"#\s*(?P<msg>({notes})(?=(:|\s|\Z)).*?$)"
  92. self._comment_fixme_pattern = re.compile(comment_regex, re.I)
  93. # single line docstring like '''this''' or """this"""
  94. docstring_regex = rf"((\"\"\")|(\'\'\'))\s*(?P<msg>({notes})(?=(:|\s|\Z)).*?)((\"\"\")|(\'\'\'))"
  95. self._docstring_fixme_pattern = re.compile(docstring_regex, re.I)
  96. # multiline docstrings which will be split into newlines
  97. # so we do not need to look for quotes/double-quotes
  98. multiline_docstring_regex = rf"^\s*(?P<msg>({notes})(?=(:|\s|\Z)).*$)"
  99. self._multiline_docstring_fixme_pattern = re.compile(
  100. multiline_docstring_regex, re.I
  101. )
  102. def _check_encoding(
  103. self, lineno: int, line: bytes, file_encoding: str
  104. ) -> str | None:
  105. try:
  106. return line.decode(file_encoding)
  107. except UnicodeDecodeError:
  108. pass
  109. except LookupError:
  110. if (
  111. line.startswith(b"#")
  112. and "coding" in str(line)
  113. and file_encoding in str(line)
  114. ):
  115. msg = f"Cannot decode using encoding '{file_encoding}', bad encoding"
  116. self.add_message("syntax-error", line=lineno, args=msg)
  117. return None
  118. def process_module(self, node: nodes.Module) -> None:
  119. """Inspect the source file to find encoding problem."""
  120. encoding = node.file_encoding if node.file_encoding else "ascii"
  121. with node.stream() as stream:
  122. for lineno, line in enumerate(stream):
  123. self._check_encoding(lineno + 1, line, encoding)
  124. def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
  125. """Inspect the source to find fixme problems."""
  126. if not self.linter.config.notes:
  127. return
  128. for token_info in tokens:
  129. if token_info.type == tokenize.COMMENT:
  130. if match := self._comment_fixme_pattern.match(token_info.string):
  131. self.add_message(
  132. "fixme",
  133. col_offset=token_info.start[1] + 1,
  134. args=match.group("msg"),
  135. line=token_info.start[0],
  136. )
  137. elif self.linter.config.check_fixme_in_docstring:
  138. if self._is_multiline_docstring(token_info):
  139. docstring_lines = token_info.string.split("\n")
  140. for line_no, line in enumerate(docstring_lines):
  141. if match := self._multiline_docstring_fixme_pattern.match(line):
  142. self.add_message(
  143. "fixme",
  144. col_offset=token_info.start[1] + 1,
  145. args=match.group("msg"),
  146. line=token_info.start[0] + line_no,
  147. )
  148. elif match := self._docstring_fixme_pattern.match(token_info.string):
  149. self.add_message(
  150. "fixme",
  151. col_offset=token_info.start[1] + 1,
  152. args=match.group("msg"),
  153. line=token_info.start[0],
  154. )
  155. def _is_multiline_docstring(self, token_info: tokenize.TokenInfo) -> bool:
  156. return (
  157. token_info.type == tokenize.STRING
  158. and (token_info.line.lstrip().startswith(('"""', "'''")))
  159. and "\n" in token_info.line.rstrip()
  160. )
  161. def register(linter: PyLinter) -> None:
  162. linter.register_checker(EncodingChecker(linter))
  163. linter.register_checker(ByIdManagedMessagesChecker(linter))