_docs_extraction.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. """Utilities related to attribute docstring extraction."""
  2. from __future__ import annotations
  3. import ast
  4. import inspect
  5. import sys
  6. import textwrap
  7. from typing import Any
  8. class DocstringVisitor(ast.NodeVisitor):
  9. def __init__(self) -> None:
  10. super().__init__()
  11. self.target: str | None = None
  12. self.attrs: dict[str, str] = {}
  13. self.previous_node_type: type[ast.AST] | None = None
  14. def visit(self, node: ast.AST) -> Any:
  15. node_result = super().visit(node)
  16. self.previous_node_type = type(node)
  17. return node_result
  18. def visit_AnnAssign(self, node: ast.AnnAssign) -> Any:
  19. if isinstance(node.target, ast.Name):
  20. self.target = node.target.id
  21. def visit_Expr(self, node: ast.Expr) -> Any:
  22. if (
  23. isinstance(node.value, ast.Constant)
  24. and isinstance(node.value.value, str)
  25. and self.previous_node_type is ast.AnnAssign
  26. ):
  27. docstring = inspect.cleandoc(node.value.value)
  28. if self.target:
  29. self.attrs[self.target] = docstring
  30. self.target = None
  31. def _dedent_source_lines(source: list[str]) -> str:
  32. # Required for nested class definitions, e.g. in a function block
  33. dedent_source = textwrap.dedent(''.join(source))
  34. if dedent_source.startswith((' ', '\t')):
  35. # We are in the case where there's a dedented (usually multiline) string
  36. # at a lower indentation level than the class itself. We wrap our class
  37. # in a function as a workaround.
  38. dedent_source = f'def dedent_workaround():\n{dedent_source}'
  39. return dedent_source
  40. def _extract_source_from_frame(cls: type[Any]) -> list[str] | None:
  41. frame = inspect.currentframe()
  42. while frame:
  43. if inspect.getmodule(frame) is inspect.getmodule(cls):
  44. lnum = frame.f_lineno
  45. try:
  46. lines, _ = inspect.findsource(frame)
  47. except OSError: # pragma: no cover
  48. # Source can't be retrieved (maybe because running in an interactive terminal),
  49. # we don't want to error here.
  50. pass
  51. else:
  52. block_lines = inspect.getblock(lines[lnum - 1 :])
  53. dedent_source = _dedent_source_lines(block_lines)
  54. try:
  55. block_tree = ast.parse(dedent_source)
  56. except SyntaxError:
  57. pass
  58. else:
  59. stmt = block_tree.body[0]
  60. if isinstance(stmt, ast.FunctionDef) and stmt.name == 'dedent_workaround':
  61. # `_dedent_source_lines` wrapped the class around the workaround function
  62. stmt = stmt.body[0]
  63. if isinstance(stmt, ast.ClassDef) and stmt.name == cls.__name__:
  64. return block_lines
  65. frame = frame.f_back
  66. def extract_docstrings_from_cls(cls: type[Any], use_inspect: bool = False) -> dict[str, str]:
  67. """Map model attributes and their corresponding docstring.
  68. Args:
  69. cls: The class of the Pydantic model to inspect.
  70. use_inspect: Whether to skip usage of frames to find the object and use
  71. the `inspect` module instead.
  72. Returns:
  73. A mapping containing attribute names and their corresponding docstring.
  74. """
  75. if use_inspect or sys.version_info >= (3, 13):
  76. # On Python < 3.13, `inspect.getsourcelines()` might not work as expected
  77. # if two classes have the same name in the same source file.
  78. # On Python 3.13+, it will use the new `__firstlineno__` class attribute,
  79. # making it way more robust.
  80. try:
  81. source, _ = inspect.getsourcelines(cls)
  82. except OSError: # pragma: no cover
  83. return {}
  84. else:
  85. # TODO remove this implementation when we drop support for Python 3.12:
  86. source = _extract_source_from_frame(cls)
  87. if not source:
  88. return {}
  89. dedent_source = _dedent_source_lines(source)
  90. visitor = DocstringVisitor()
  91. visitor.visit(ast.parse(dedent_source))
  92. return visitor.attrs