parse.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. """Defines parsing functions used by isort for parsing import definitions"""
  2. import re
  3. from collections import OrderedDict, defaultdict
  4. from functools import partial
  5. from itertools import chain
  6. from typing import TYPE_CHECKING, Any, NamedTuple, TypedDict
  7. from warnings import warn
  8. from . import place
  9. from .comments import parse as parse_comments
  10. from .exceptions import MissingSection
  11. from .settings import DEFAULT_CONFIG, Config
  12. if TYPE_CHECKING:
  13. CommentsAboveDict = TypedDict(
  14. "CommentsAboveDict", {"straight": dict[str, Any], "from": dict[str, Any]}
  15. )
  16. CommentsDict = TypedDict(
  17. "CommentsDict",
  18. {
  19. "from": dict[str, Any],
  20. "straight": dict[str, Any],
  21. "nested": dict[str, Any],
  22. "above": CommentsAboveDict,
  23. },
  24. )
  25. def _infer_line_separator(contents: str) -> str:
  26. if "\r\n" in contents:
  27. return "\r\n"
  28. if "\r" in contents:
  29. return "\r"
  30. return "\n"
  31. def normalize_line(raw_line: str) -> tuple[str, str]:
  32. """Normalizes import related statements in the provided line.
  33. Returns (normalized_line: str, raw_line: str)
  34. """
  35. line = re.sub(r"from(\.+)cimport ", r"from \g<1> cimport ", raw_line)
  36. line = re.sub(r"from(\.+)import ", r"from \g<1> import ", line)
  37. line = line.replace("import*", "import *")
  38. line = re.sub(r" (\.+)import ", r" \g<1> import ", line)
  39. line = re.sub(r" (\.+)cimport ", r" \g<1> cimport ", line)
  40. line = line.replace("\t", " ")
  41. return line, raw_line
  42. def import_type(line: str, config: Config = DEFAULT_CONFIG) -> str | None:
  43. """If the current line is an import line it will return its type (from or straight)"""
  44. if config.honor_noqa and line.lower().rstrip().endswith("noqa"):
  45. return None
  46. if "isort:skip" in line or "isort: skip" in line or "isort: split" in line:
  47. return None
  48. if line.startswith(("import ", "cimport ")):
  49. return "straight"
  50. if line.startswith("from "):
  51. return "from"
  52. return None
  53. def strip_syntax(import_string: str) -> str:
  54. import_string = import_string.replace("_import", "[[i]]")
  55. import_string = import_string.replace("_cimport", "[[ci]]")
  56. for remove_syntax in ["\\", "(", ")", ","]:
  57. import_string = import_string.replace(remove_syntax, " ")
  58. import_list = import_string.split()
  59. for key in ("from", "import", "cimport"):
  60. if key in import_list:
  61. import_list.remove(key)
  62. import_string = " ".join(import_list)
  63. import_string = import_string.replace("[[i]]", "_import")
  64. import_string = import_string.replace("[[ci]]", "_cimport")
  65. return import_string.replace("{ ", "{|").replace(" }", "|}")
  66. def skip_line(
  67. line: str,
  68. in_quote: str,
  69. index: int,
  70. section_comments: tuple[str, ...],
  71. needs_import: bool = True,
  72. ) -> tuple[bool, str]:
  73. """Determine if a given line should be skipped.
  74. Returns back a tuple containing:
  75. (skip_line: bool,
  76. in_quote: str,)
  77. """
  78. should_skip = bool(in_quote)
  79. if '"' in line or "'" in line:
  80. char_index = 0
  81. while char_index < len(line):
  82. if line[char_index] == "\\":
  83. char_index += 1
  84. elif in_quote:
  85. if line[char_index : char_index + len(in_quote)] == in_quote:
  86. in_quote = ""
  87. elif line[char_index] in ("'", '"'):
  88. long_quote = line[char_index : char_index + 3]
  89. if long_quote in ('"""', "'''"):
  90. in_quote = long_quote
  91. char_index += 2
  92. else:
  93. in_quote = line[char_index]
  94. elif line[char_index] == "#":
  95. break
  96. char_index += 1
  97. if ";" in line.split("#")[0] and needs_import:
  98. for part in (part.strip() for part in line.split(";")):
  99. if (
  100. part
  101. and not part.startswith("from ")
  102. and not part.startswith(("import ", "cimport "))
  103. ):
  104. should_skip = True
  105. return (bool(should_skip or in_quote), in_quote)
  106. class ParsedContent(NamedTuple):
  107. in_lines: list[str]
  108. lines_without_imports: list[str]
  109. import_index: int
  110. place_imports: dict[str, list[str]]
  111. import_placements: dict[str, str]
  112. as_map: dict[str, dict[str, list[str]]]
  113. imports: dict[str, dict[str, Any]]
  114. categorized_comments: "CommentsDict"
  115. change_count: int
  116. original_line_count: int
  117. line_separator: str
  118. sections: Any
  119. verbose_output: list[str]
  120. trailing_commas: set[str]
  121. # Ignore DeepSource cyclomatic complexity check for this function. It is one
  122. # the main entrypoints so sort of expected to be complex.
  123. # skipcq: PY-R1000
  124. def file_contents(contents: str, config: Config = DEFAULT_CONFIG) -> ParsedContent:
  125. """Parses a python file taking out and categorizing imports."""
  126. line_separator: str = config.line_ending or _infer_line_separator(contents)
  127. in_lines = contents.splitlines()
  128. if contents and contents[-1] in ("\n", "\r"):
  129. in_lines.append("")
  130. out_lines = []
  131. original_line_count = len(in_lines)
  132. finder = partial(place.module, config=config)
  133. line_count = len(in_lines)
  134. place_imports: dict[str, list[str]] = {}
  135. import_placements: dict[str, str] = {}
  136. as_map: dict[str, dict[str, list[str]]] = {
  137. "straight": defaultdict(list),
  138. "from": defaultdict(list),
  139. }
  140. imports: OrderedDict[str, dict[str, Any]] = OrderedDict()
  141. verbose_output: list[str] = []
  142. for section in chain(config.sections, config.forced_separate):
  143. imports[section] = {"straight": OrderedDict(), "from": OrderedDict()}
  144. categorized_comments: CommentsDict = {
  145. "from": {},
  146. "straight": {},
  147. "nested": {},
  148. "above": {"straight": {}, "from": {}},
  149. }
  150. trailing_commas: set[str] = set()
  151. index = 0
  152. import_index = -1
  153. in_quote = ""
  154. while index < line_count:
  155. line = in_lines[index]
  156. index += 1
  157. statement_index = index
  158. (skipping_line, in_quote) = skip_line(
  159. line, in_quote=in_quote, index=index, section_comments=config.section_comments
  160. )
  161. if (
  162. line in config.section_comments or line in config.section_comments_end
  163. ) and not skipping_line:
  164. if import_index == -1: # pragma: no branch
  165. import_index = index - 1
  166. continue
  167. if "isort:imports-" in line and line.startswith("#"):
  168. section = line.split("isort:imports-")[-1].split()[0].upper()
  169. place_imports[section] = []
  170. import_placements[line] = section
  171. elif "isort: imports-" in line and line.startswith("#"):
  172. section = line.split("isort: imports-")[-1].split()[0].upper()
  173. place_imports[section] = []
  174. import_placements[line] = section
  175. if skipping_line:
  176. out_lines.append(line)
  177. continue
  178. lstripped_line = line.lstrip()
  179. if (
  180. config.float_to_top
  181. and import_index == -1
  182. and line
  183. and not in_quote
  184. and not lstripped_line.startswith("#")
  185. and not lstripped_line.startswith("'''")
  186. and not lstripped_line.startswith('"""')
  187. ):
  188. if not lstripped_line.startswith("import") and not lstripped_line.startswith("from"):
  189. import_index = index - 1
  190. while import_index and not in_lines[import_index - 1]:
  191. import_index -= 1
  192. else:
  193. commentless = line.split("#", 1)[0].strip()
  194. if (
  195. ("isort:skip" in line or "isort: skip" in line)
  196. and "(" in commentless
  197. and ")" not in commentless
  198. ):
  199. import_index = index
  200. starting_line = line
  201. while "isort:skip" in starting_line or "isort: skip" in starting_line:
  202. commentless = starting_line.split("#", 1)[0]
  203. if (
  204. "(" in commentless
  205. and not commentless.rstrip().endswith(")")
  206. and import_index < line_count
  207. ):
  208. while import_index < line_count and not commentless.rstrip().endswith(
  209. ")"
  210. ):
  211. commentless = in_lines[import_index].split("#", 1)[0]
  212. import_index += 1
  213. else:
  214. import_index += 1
  215. if import_index >= line_count:
  216. break
  217. starting_line = in_lines[import_index]
  218. line, *end_of_line_comment = line.split("#", 1)
  219. if ";" in line:
  220. statements = [line.strip() for line in line.split(";")]
  221. else:
  222. statements = [line]
  223. if end_of_line_comment:
  224. statements[-1] = f"{statements[-1]}#{end_of_line_comment[0]}"
  225. for statement in statements:
  226. line, raw_line = normalize_line(statement)
  227. type_of_import = import_type(line, config) or ""
  228. raw_lines = [raw_line]
  229. if not type_of_import:
  230. out_lines.append(raw_line)
  231. continue
  232. if import_index == -1:
  233. import_index = index - 1
  234. nested_comments = {}
  235. import_string, comment = parse_comments(line)
  236. comments = [comment] if comment else []
  237. line_parts = [part for part in strip_syntax(import_string).strip().split(" ") if part]
  238. if type_of_import == "from" and len(line_parts) == 2 and comments:
  239. nested_comments[line_parts[-1]] = comments[0]
  240. if "(" in line.split("#", 1)[0] and index < line_count:
  241. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  242. line, new_comment = parse_comments(in_lines[index])
  243. index += 1
  244. if new_comment:
  245. comments.append(new_comment)
  246. stripped_line = strip_syntax(line).strip()
  247. if (
  248. type_of_import == "from"
  249. and stripped_line
  250. and " " not in stripped_line.replace(" as ", "")
  251. and new_comment
  252. ):
  253. nested_comments[stripped_line] = comments[-1]
  254. import_string += line_separator + line
  255. raw_lines.append(line)
  256. else:
  257. while line.strip().endswith("\\"):
  258. line, new_comment = parse_comments(in_lines[index])
  259. line = line.lstrip()
  260. index += 1
  261. if new_comment:
  262. comments.append(new_comment)
  263. # Still need to check for parentheses after an escaped line
  264. if (
  265. "(" in line.split("#")[0]
  266. and ")" not in line.split("#")[0]
  267. and index < line_count
  268. ):
  269. stripped_line = strip_syntax(line).strip()
  270. if (
  271. type_of_import == "from"
  272. and stripped_line
  273. and " " not in stripped_line.replace(" as ", "")
  274. and new_comment
  275. ):
  276. nested_comments[stripped_line] = comments[-1]
  277. import_string += line_separator + line
  278. raw_lines.append(line)
  279. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  280. line, new_comment = parse_comments(in_lines[index])
  281. index += 1
  282. if new_comment:
  283. comments.append(new_comment)
  284. stripped_line = strip_syntax(line).strip()
  285. if (
  286. type_of_import == "from"
  287. and stripped_line
  288. and " " not in stripped_line.replace(" as ", "")
  289. and new_comment
  290. ):
  291. nested_comments[stripped_line] = comments[-1]
  292. import_string += line_separator + line
  293. raw_lines.append(line)
  294. stripped_line = strip_syntax(line).strip()
  295. if (
  296. type_of_import == "from"
  297. and stripped_line
  298. and " " not in stripped_line.replace(" as ", "")
  299. and new_comment
  300. ):
  301. nested_comments[stripped_line] = comments[-1]
  302. if import_string.strip().endswith(
  303. (" import", " cimport")
  304. ) or line.strip().startswith(("import ", "cimport ")):
  305. import_string += line_separator + line
  306. else:
  307. import_string = import_string.rstrip().rstrip("\\") + " " + line.lstrip()
  308. if type_of_import == "from":
  309. cimports: bool
  310. import_string = (
  311. import_string.replace("import(", "import (")
  312. .replace("\\", " ")
  313. .replace("\n", " ")
  314. )
  315. if "import " not in import_string:
  316. out_lines.extend(raw_lines)
  317. continue
  318. if " cimport " in import_string:
  319. parts = import_string.split(" cimport ")
  320. cimports = True
  321. else:
  322. parts = import_string.split(" import ")
  323. cimports = False
  324. from_import = parts[0].split(" ")
  325. import_string = (" cimport " if cimports else " import ").join(
  326. [from_import[0] + " " + "".join(from_import[1:]), *parts[1:]]
  327. )
  328. just_imports = [
  329. item.replace("{|", "{ ").replace("|}", " }")
  330. for item in strip_syntax(import_string).split()
  331. ]
  332. attach_comments_to: list[Any] | None = None
  333. direct_imports = just_imports[1:]
  334. straight_import = True
  335. top_level_module = ""
  336. if "as" in just_imports and (just_imports.index("as") + 1) < len(just_imports):
  337. straight_import = False
  338. while "as" in just_imports:
  339. nested_module = None
  340. as_index = just_imports.index("as")
  341. if type_of_import == "from":
  342. nested_module = just_imports[as_index - 1]
  343. top_level_module = just_imports[0]
  344. module = top_level_module + "." + nested_module
  345. as_name = just_imports[as_index + 1]
  346. direct_imports.remove(nested_module)
  347. direct_imports.remove(as_name)
  348. direct_imports.remove("as")
  349. if nested_module == as_name and config.remove_redundant_aliases:
  350. pass
  351. elif as_name not in as_map["from"][module]: # pragma: no branch
  352. as_map["from"][module].append(as_name)
  353. full_name = f"{nested_module} as {as_name}"
  354. associated_comment = nested_comments.get(full_name)
  355. if associated_comment:
  356. categorized_comments["nested"].setdefault(top_level_module, {})[
  357. full_name
  358. ] = associated_comment
  359. if associated_comment in comments: # pragma: no branch
  360. comments.pop(comments.index(associated_comment))
  361. else:
  362. module = just_imports[as_index - 1]
  363. as_name = just_imports[as_index + 1]
  364. if module == as_name and config.remove_redundant_aliases:
  365. pass
  366. elif as_name not in as_map["straight"][module]:
  367. as_map["straight"][module].append(as_name)
  368. if comments and attach_comments_to is None:
  369. if nested_module and config.combine_as_imports:
  370. attach_comments_to = categorized_comments["from"].setdefault(
  371. f"{top_level_module}.__combined_as__", []
  372. )
  373. else:
  374. if type_of_import == "from" or (
  375. config.remove_redundant_aliases and as_name == module.split(".")[-1]
  376. ):
  377. attach_comments_to = categorized_comments["straight"].setdefault(
  378. module, []
  379. )
  380. else:
  381. attach_comments_to = categorized_comments["straight"].setdefault(
  382. f"{module} as {as_name}", []
  383. )
  384. del just_imports[as_index : as_index + 2]
  385. if type_of_import == "from":
  386. import_from = just_imports.pop(0)
  387. placed_module = finder(import_from)
  388. if config.verbose and not config.only_modified:
  389. print(f"from-type place_module for {import_from} returned {placed_module}")
  390. elif config.verbose:
  391. verbose_output.append(
  392. f"from-type place_module for {import_from} returned {placed_module}"
  393. )
  394. if placed_module == "":
  395. warn(
  396. f"could not place module {import_from} of line {line} --"
  397. " Do you need to define a default section?",
  398. stacklevel=2,
  399. )
  400. if placed_module and placed_module not in imports:
  401. raise MissingSection(import_module=import_from, section=placed_module)
  402. root = imports[placed_module][type_of_import]
  403. for import_name in just_imports:
  404. associated_comment = nested_comments.get(import_name)
  405. if associated_comment:
  406. categorized_comments["nested"].setdefault(import_from, {})[import_name] = (
  407. associated_comment
  408. )
  409. if associated_comment in comments: # pragma: no branch
  410. comments.pop(comments.index(associated_comment))
  411. if (
  412. config.force_single_line
  413. and comments
  414. and attach_comments_to is None
  415. and len(just_imports) == 1
  416. ):
  417. nested_from_comments = categorized_comments["nested"].setdefault(
  418. import_from, {}
  419. )
  420. existing_comment = nested_from_comments.get(just_imports[0], "")
  421. nested_from_comments[just_imports[0]] = (
  422. f"{existing_comment}{'; ' if existing_comment else ''}{'; '.join(comments)}"
  423. )
  424. comments = []
  425. if comments and attach_comments_to is None:
  426. attach_comments_to = categorized_comments["from"].setdefault(import_from, [])
  427. if len(out_lines) > max(import_index, 1) - 1:
  428. last = out_lines[-1].rstrip() if out_lines else ""
  429. while (
  430. last.startswith("#")
  431. and not last.endswith('"""')
  432. and not last.endswith("'''")
  433. and "isort:imports-" not in last
  434. and "isort: imports-" not in last
  435. and not config.treat_all_comments_as_code
  436. and last.strip() not in config.treat_comments_as_code
  437. ):
  438. categorized_comments["above"]["from"].setdefault(import_from, []).insert(
  439. 0, out_lines.pop(-1)
  440. )
  441. if out_lines:
  442. last = out_lines[-1].rstrip()
  443. else:
  444. last = ""
  445. if statement_index - 1 == import_index: # pragma: no cover
  446. import_index -= len(
  447. categorized_comments["above"]["from"].get(import_from, [])
  448. )
  449. if import_from not in root:
  450. root[import_from] = OrderedDict(
  451. (module, module in direct_imports) for module in just_imports
  452. )
  453. else:
  454. root[import_from].update(
  455. (module, root[import_from].get(module, False) or module in direct_imports)
  456. for module in just_imports
  457. )
  458. if comments and attach_comments_to is not None:
  459. attach_comments_to.extend(comments)
  460. if (
  461. just_imports
  462. and just_imports[-1]
  463. and "," in import_string.split(just_imports[-1])[-1]
  464. ):
  465. trailing_commas.add(import_from)
  466. else:
  467. if comments and attach_comments_to is not None:
  468. attach_comments_to.extend(comments)
  469. comments = []
  470. for module in just_imports:
  471. if comments:
  472. categorized_comments["straight"][module] = comments
  473. comments = []
  474. if len(out_lines) > max(import_index, +1, 1) - 1:
  475. last = out_lines[-1].rstrip() if out_lines else ""
  476. while (
  477. last.startswith("#")
  478. and not last.endswith('"""')
  479. and not last.endswith("'''")
  480. and "isort:imports-" not in last
  481. and "isort: imports-" not in last
  482. and not config.treat_all_comments_as_code
  483. and last.strip() not in config.treat_comments_as_code
  484. ):
  485. categorized_comments["above"]["straight"].setdefault(module, []).insert(
  486. 0, out_lines.pop(-1)
  487. )
  488. if out_lines:
  489. last = out_lines[-1].rstrip()
  490. else:
  491. last = ""
  492. if index - 1 == import_index:
  493. import_index -= len(
  494. categorized_comments["above"]["straight"].get(module, [])
  495. )
  496. placed_module = finder(module)
  497. if config.verbose and not config.only_modified:
  498. print(f"else-type place_module for {module} returned {placed_module}")
  499. elif config.verbose:
  500. verbose_output.append(
  501. f"else-type place_module for {module} returned {placed_module}"
  502. )
  503. if placed_module == "":
  504. warn(
  505. f"could not place module {module} of line {line} --"
  506. " Do you need to define a default section?",
  507. stacklevel=2,
  508. )
  509. imports.setdefault("", {"straight": OrderedDict(), "from": OrderedDict()})
  510. if placed_module and placed_module not in imports:
  511. raise MissingSection(import_module=module, section=placed_module)
  512. straight_import |= imports[placed_module][type_of_import].get(module, False)
  513. imports[placed_module][type_of_import][module] = straight_import
  514. change_count = len(out_lines) - original_line_count
  515. return ParsedContent(
  516. in_lines=in_lines,
  517. lines_without_imports=out_lines,
  518. import_index=import_index,
  519. place_imports=place_imports,
  520. import_placements=import_placements,
  521. as_map=as_map,
  522. imports=imports,
  523. categorized_comments=categorized_comments,
  524. change_count=change_count,
  525. original_line_count=original_line_count,
  526. line_separator=line_separator,
  527. sections=config.sections,
  528. verbose_output=verbose_output,
  529. trailing_commas=trailing_commas,
  530. )