recommendation_checker.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt
  4. from __future__ import annotations
  5. import astroid
  6. from astroid import nodes
  7. from pylint import checkers
  8. from pylint.checkers import utils
  9. from pylint.interfaces import HIGH, INFERENCE
  10. class RecommendationChecker(checkers.BaseChecker):
  11. name = "refactoring"
  12. msgs = {
  13. "C0200": (
  14. "Consider using enumerate instead of iterating with range and len",
  15. "consider-using-enumerate",
  16. "Emitted when code that iterates with range and len is "
  17. "encountered. Such code can be simplified by using the "
  18. "enumerate builtin.",
  19. ),
  20. "C0201": (
  21. "Consider iterating the dictionary directly instead of calling .keys()",
  22. "consider-iterating-dictionary",
  23. "Emitted when the keys of a dictionary are iterated through the ``.keys()`` "
  24. "method or when ``.keys()`` is used for a membership check. "
  25. "It is enough to iterate through the dictionary itself, "
  26. "``for key in dictionary``. For membership checks, "
  27. "``if key in dictionary`` is faster.",
  28. ),
  29. "C0206": (
  30. "Consider iterating with .items()",
  31. "consider-using-dict-items",
  32. "Emitted when iterating over the keys of a dictionary and accessing the "
  33. "value by index lookup. "
  34. "Both the key and value can be accessed by iterating using the .items() "
  35. "method of the dictionary instead.",
  36. ),
  37. "C0207": (
  38. "Use %s instead",
  39. "use-maxsplit-arg",
  40. "Emitted when accessing only the first or last element of str.split(). "
  41. "The first and last element can be accessed by using "
  42. "str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] "
  43. "instead.",
  44. ),
  45. "C0208": (
  46. "Use a sequence type when iterating over values",
  47. "use-sequence-for-iteration",
  48. "When iterating over values, sequence types (e.g., ``lists``, ``tuples``, ``ranges``) "
  49. "are more efficient than ``sets``.",
  50. ),
  51. "C0209": (
  52. "Formatting a regular string which could be an f-string",
  53. "consider-using-f-string",
  54. "Used when we detect a string that is being formatted with format() or % "
  55. "which could potentially be an f-string. The use of f-strings is preferred. "
  56. "Requires Python 3.6 and ``py-version >= 3.6``.",
  57. ),
  58. }
  59. def open(self) -> None:
  60. py_version = self.linter.config.py_version
  61. self._py36_plus = py_version >= (3, 6)
  62. @staticmethod
  63. def _is_builtin(node: nodes.NodeNG, function: str) -> bool:
  64. inferred = utils.safe_infer(node)
  65. if not inferred:
  66. return False
  67. return utils.is_builtin_object(inferred) and inferred.name == function
  68. @utils.only_required_for_messages(
  69. "consider-iterating-dictionary", "use-maxsplit-arg"
  70. )
  71. def visit_call(self, node: nodes.Call) -> None:
  72. self._check_consider_iterating_dictionary(node)
  73. self._check_use_maxsplit_arg(node)
  74. def _check_consider_iterating_dictionary(self, node: nodes.Call) -> None:
  75. if not isinstance(node.func, nodes.Attribute):
  76. return
  77. if node.func.attrname != "keys":
  78. return
  79. if isinstance(node.parent, nodes.BinOp) and node.parent.op in {"&", "|", "^"}:
  80. return
  81. comp_ancestor = utils.get_node_first_ancestor_of_type(node, nodes.Compare)
  82. if isinstance(node.parent, (nodes.For, nodes.Comprehension)) or (
  83. comp_ancestor
  84. and any(
  85. op
  86. for op, comparator in comp_ancestor.ops
  87. if op in {"in", "not in"}
  88. and (comparator in node.node_ancestors() or comparator is node)
  89. )
  90. ):
  91. match utils.safe_infer(node.func):
  92. case astroid.BoundMethod(bound=nodes.Dict()):
  93. self.add_message(
  94. "consider-iterating-dictionary", node=node, confidence=INFERENCE
  95. )
  96. def _check_use_maxsplit_arg(self, node: nodes.Call) -> None:
  97. """Add message when accessing first or last elements of a str.split() or
  98. str.rsplit().
  99. """
  100. # Check if call is split() or rsplit()
  101. if not (
  102. isinstance(node.func, nodes.Attribute)
  103. and node.func.attrname in {"split", "rsplit"}
  104. and isinstance(utils.safe_infer(node.func), astroid.BoundMethod)
  105. ):
  106. return
  107. inferred_expr = utils.safe_infer(node.func.expr)
  108. if isinstance(inferred_expr, astroid.Instance) and any(
  109. inferred_expr.nodes_of_class(nodes.ClassDef)
  110. ):
  111. return
  112. confidence = HIGH
  113. try:
  114. sep = utils.get_argument_from_call(node, 0, "sep")
  115. except utils.NoSuchArgumentError:
  116. sep = utils.infer_kwarg_from_call(node, keyword="sep")
  117. confidence = INFERENCE
  118. if not sep:
  119. return
  120. try:
  121. # Ignore if maxsplit arg has been set
  122. utils.get_argument_from_call(node, 1, "maxsplit")
  123. return
  124. except utils.NoSuchArgumentError:
  125. if utils.infer_kwarg_from_call(node, keyword="maxsplit"):
  126. return
  127. if isinstance(node.parent, nodes.Subscript):
  128. try:
  129. subscript_value = utils.get_subscript_const_value(node.parent).value
  130. except utils.InferredTypeError:
  131. return
  132. # Check for cases where variable (Name) subscripts may be mutated within a loop
  133. if isinstance(node.parent.slice, nodes.Name):
  134. # Check if loop present within the scope of the node
  135. scope = node.scope()
  136. for loop_node in scope.nodes_of_class((nodes.For, nodes.While)):
  137. if not loop_node.parent_of(node):
  138. continue
  139. # Check if var is mutated within loop (Assign/AugAssign)
  140. for assignment_node in loop_node.nodes_of_class(nodes.AugAssign):
  141. if node.parent.slice.name == assignment_node.target.name:
  142. return
  143. for assignment_node in loop_node.nodes_of_class(nodes.Assign):
  144. if node.parent.slice.name in [
  145. n.name for n in assignment_node.targets
  146. ]:
  147. return
  148. if subscript_value in (-1, 0):
  149. fn_name = node.func.attrname
  150. new_fn = "rsplit" if subscript_value == -1 else "split"
  151. new_name = (
  152. node.func.as_string().rsplit(fn_name, maxsplit=1)[0]
  153. + new_fn
  154. + f"({sep.as_string()}, maxsplit=1)[{subscript_value}]"
  155. )
  156. self.add_message(
  157. "use-maxsplit-arg",
  158. node=node,
  159. args=(new_name,),
  160. confidence=confidence,
  161. )
  162. @utils.only_required_for_messages(
  163. "consider-using-enumerate",
  164. "consider-using-dict-items",
  165. "use-sequence-for-iteration",
  166. )
  167. def visit_for(self, node: nodes.For) -> None:
  168. self._check_consider_using_enumerate(node)
  169. self._check_consider_using_dict_items(node)
  170. self._check_use_sequence_for_iteration(node)
  171. def _check_consider_using_enumerate(self, node: nodes.For) -> None:
  172. """Emit a convention whenever range and len are used for indexing."""
  173. # Verify that we have a `range([start], len(...), [stop])` call and
  174. # that the object which is iterated is used as a subscript in the
  175. # body of the for.
  176. # Is it a proper range call?
  177. if not isinstance(node.iter, nodes.Call):
  178. return
  179. if not self._is_builtin(node.iter.func, "range"):
  180. return
  181. if not node.iter.args:
  182. return
  183. is_constant_zero = (
  184. isinstance(node.iter.args[0], nodes.Const) and node.iter.args[0].value == 0
  185. )
  186. if len(node.iter.args) == 2 and not is_constant_zero:
  187. return
  188. if len(node.iter.args) > 2:
  189. return
  190. # Is it a proper len call?
  191. match node.iter.args:
  192. case [
  193. *_,
  194. nodes.Call(func=second_func, args=[iterating_object]),
  195. ] if self._is_builtin(second_func, "len"):
  196. pass
  197. case _:
  198. return
  199. if isinstance(iterating_object, nodes.Name):
  200. expected_subscript_val_type = nodes.Name
  201. elif isinstance(iterating_object, nodes.Attribute):
  202. expected_subscript_val_type = nodes.Attribute
  203. else:
  204. return
  205. # If we're defining __iter__ on self, enumerate won't work
  206. scope = node.scope()
  207. match iterating_object:
  208. case nodes.Name(name="self") if scope.name == "__iter__":
  209. return
  210. # Verify that the body of the for loop uses a subscript
  211. # with the object that was iterated. This uses some heuristics
  212. # in order to make sure that the same object is used in the
  213. # for body.
  214. for child in node.body:
  215. for subscript in child.nodes_of_class(nodes.Subscript):
  216. if not isinstance(subscript.value, expected_subscript_val_type):
  217. continue
  218. value = subscript.slice
  219. if not isinstance(value, nodes.Name):
  220. continue
  221. if subscript.value.scope() != node.scope():
  222. # Ignore this subscript if it's not in the same
  223. # scope. This means that in the body of the for
  224. # loop, another scope was created, where the same
  225. # name for the iterating object was used.
  226. continue
  227. if value.name == node.target.name and (
  228. (
  229. isinstance(subscript.value, nodes.Name)
  230. and iterating_object.name == subscript.value.name
  231. )
  232. or (
  233. isinstance(subscript.value, nodes.Attribute)
  234. and iterating_object.attrname == subscript.value.attrname
  235. )
  236. ):
  237. self.add_message("consider-using-enumerate", node=node)
  238. return
  239. def _check_consider_using_dict_items(self, node: nodes.For) -> None:
  240. """Add message when accessing dict values by index lookup."""
  241. # Verify that we have a .keys() call and
  242. # that the object which is iterated is used as a subscript in the
  243. # body of the for.
  244. iterating_object_name = utils.get_iterating_dictionary_name(node)
  245. if iterating_object_name is None:
  246. return
  247. # Verify that the body of the for loop uses a subscript
  248. # with the object that was iterated. This uses some heuristics
  249. # in order to make sure that the same object is used in the
  250. # for body.
  251. for child in node.body:
  252. for subscript in child.nodes_of_class(nodes.Subscript):
  253. if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)):
  254. continue
  255. value = subscript.slice
  256. if not (
  257. isinstance(value, nodes.Name)
  258. and value.name == node.target.name
  259. and iterating_object_name == subscript.value.as_string()
  260. ):
  261. continue
  262. last_definition_lineno = value.lookup(value.name)[1][-1].lineno
  263. if last_definition_lineno > node.lineno:
  264. # Ignore this subscript if it has been redefined after
  265. # the for loop. This checks for the line number using .lookup()
  266. # to get the line number where the iterating object was last
  267. # defined and compare that to the for loop's line number
  268. continue
  269. if (
  270. isinstance(subscript.parent, nodes.Assign)
  271. and subscript in subscript.parent.targets
  272. ) or (
  273. isinstance(subscript.parent, nodes.AugAssign)
  274. and subscript == subscript.parent.target
  275. ):
  276. # Ignore this subscript if it is the target of an assignment
  277. # Early termination as dict index lookup is necessary
  278. return
  279. if isinstance(subscript.parent, nodes.Delete):
  280. # Ignore this subscript if the index is used to delete a
  281. # dictionary item.
  282. return
  283. self.add_message("consider-using-dict-items", node=node)
  284. return
  285. @utils.only_required_for_messages(
  286. "consider-using-dict-items",
  287. "use-sequence-for-iteration",
  288. )
  289. def visit_comprehension(self, node: nodes.Comprehension) -> None:
  290. self._check_consider_using_dict_items_comprehension(node)
  291. self._check_use_sequence_for_iteration(node)
  292. def _check_consider_using_dict_items_comprehension(
  293. self, node: nodes.Comprehension
  294. ) -> None:
  295. """Add message when accessing dict values by index lookup."""
  296. iterating_object_name = utils.get_iterating_dictionary_name(node)
  297. if iterating_object_name is None:
  298. return
  299. for child in node.parent.get_children():
  300. for subscript in child.nodes_of_class(nodes.Subscript):
  301. if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)):
  302. continue
  303. value = subscript.slice
  304. if not (
  305. isinstance(value, nodes.Name)
  306. and value.name == node.target.name
  307. and iterating_object_name == subscript.value.as_string()
  308. ):
  309. continue
  310. self.add_message("consider-using-dict-items", node=node)
  311. return
  312. def _check_use_sequence_for_iteration(
  313. self, node: nodes.For | nodes.Comprehension
  314. ) -> None:
  315. """Check if code iterates over an in-place defined set.
  316. Sets using `*` are not considered in-place.
  317. """
  318. if isinstance(node.iter, nodes.Set) and not any(
  319. utils.has_starred_node_recursive(node)
  320. ):
  321. self.add_message(
  322. "use-sequence-for-iteration", node=node.iter, confidence=HIGH
  323. )
  324. @utils.only_required_for_messages("consider-using-f-string")
  325. def visit_const(self, node: nodes.Const) -> None:
  326. if self._py36_plus:
  327. # f-strings require Python 3.6
  328. if node.pytype() == "builtins.str" and not isinstance(
  329. node.parent, nodes.JoinedStr
  330. ):
  331. self._detect_replacable_format_call(node)
  332. def _detect_replacable_format_call(self, node: nodes.Const) -> None:
  333. """Check whether a string is used in a call to format() or '%' and whether it
  334. can be replaced by an f-string.
  335. """
  336. if (
  337. isinstance(node.parent, nodes.Attribute)
  338. and node.parent.attrname == "format"
  339. ):
  340. # Don't warn on referencing / assigning .format without calling it
  341. if not isinstance(node.parent.parent, nodes.Call):
  342. return
  343. # Don't raise message on bad format string
  344. try:
  345. keyword_args = [
  346. i[0] for i in utils.parse_format_method_string(node.value)[0]
  347. ]
  348. except utils.IncompleteFormatString:
  349. return
  350. if node.parent.parent.args:
  351. for arg in node.parent.parent.args:
  352. # If star expressions with more than 1 element are being used
  353. if isinstance(arg, nodes.Starred):
  354. inferred = utils.safe_infer(arg.value)
  355. if isinstance(inferred, nodes.List) and len(inferred.elts) > 1:
  356. return
  357. # Backslashes can't be in f-string expressions
  358. if "\\" in arg.as_string():
  359. return
  360. elif node.parent.parent.keywords:
  361. for keyword in node.parent.parent.keywords:
  362. # If keyword is used multiple times
  363. if keyword_args.count(keyword.arg) > 1:
  364. return
  365. keyword = utils.safe_infer(keyword.value)
  366. # If lists of more than one element are being unpacked
  367. if (
  368. isinstance(keyword, nodes.Dict)
  369. and len(keyword.items) > 1
  370. and len(keyword_args) > 1
  371. ):
  372. return
  373. # If all tests pass, then raise message
  374. self.add_message(
  375. "consider-using-f-string",
  376. node=node,
  377. line=node.lineno,
  378. col_offset=node.col_offset,
  379. )
  380. elif isinstance(node.parent, nodes.BinOp) and node.parent.op == "%":
  381. # Backslashes can't be in f-string expressions
  382. if "\\" in node.parent.right.as_string():
  383. return
  384. # If % applied to another type than str, it's modulo and can't be replaced by formatting
  385. if not (
  386. hasattr(node.parent.left, "value")
  387. and isinstance(node.parent.left.value, str)
  388. ):
  389. return
  390. # Brackets can be inconvenient in f-string expressions
  391. if "{" in node.parent.left.value or "}" in node.parent.left.value:
  392. return
  393. # If dicts or lists of length > 1 are used
  394. match utils.safe_infer(node.parent.right):
  395. case nodes.Dict(items=items) | nodes.List(elts=items) if len(items) > 1:
  396. return
  397. # If all tests pass, then raise message
  398. self.add_message(
  399. "consider-using-f-string",
  400. node=node,
  401. line=node.lineno,
  402. col_offset=node.col_offset,
  403. )