exc.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877
  1. from __future__ import annotations
  2. """Exception handling and error reporting for TorchDynamo.
  3. This module provides a comprehensive set of exception classes and utilities for error
  4. handling in TorchDynamo. It includes:
  5. Base Exceptions:
  6. - TorchDynamoException: Base class for all TorchDynamo-specific exceptions
  7. - Various specialized subclasses for different error scenarios
  8. User Error Handling:
  9. - UserError: Exceptions for user-facing errors in TorchDynamo usage
  10. - UserErrorType: Enumeration of different categories of user errors
  11. - Formatted error messages with debugging information
  12. Observed Exceptions:
  13. - Classes for handling exceptions observed during tracing
  14. - Special handling for StopIteration, LookupError, etc.
  15. - Exception state management during compilation
  16. Error Formatting:
  17. - Stack trace filtering and formatting
  18. - Error message augmentation
  19. - Debugging utilities for error reporting
  20. """
  21. import json
  22. import logging
  23. import re
  24. import textwrap
  25. import typing
  26. from enum import auto, Enum
  27. from functools import lru_cache
  28. from pathlib import Path
  29. from traceback import extract_stack, format_exc, format_list, FrameSummary, StackSummary
  30. from typing import Any, NoReturn, Optional, TYPE_CHECKING
  31. import torch._guards
  32. from torch._utils_internal import get_file_path_2
  33. from . import config
  34. from .utils import counters
  35. if TYPE_CHECKING:
  36. import types
  37. from torch._guards import CompileId
  38. from .output_graph import DynamoTracerOutput
  39. from .symbolic_convert import InstructionTranslatorBase
  40. from .types import DynamoFrameType, FrameExecStrategy
  41. def exportdb_error_message(case_name: str) -> str:
  42. return (
  43. "For more information about this error, see: "
  44. + "https://pytorch.org/docs/main/generated/exportdb/index.html#"
  45. + case_name.replace("_", "-")
  46. )
  47. log = logging.getLogger(__name__)
  48. graph_breaks_log = torch._logging.getArtifactLogger(__name__, "graph_breaks")
  49. class TorchDynamoException(RuntimeError):
  50. """Base exception class for all TorchDynamo-specific exceptions.
  51. Attributes:
  52. _torch_dynamo_tracer_output: Optional tracer output attached to the exception
  53. frame_exec_strategy: Optional frame execution strategy to control how convert_frame
  54. should handle this exception. When set, convert_frame will use this strategy
  55. instead of the default behavior. This allows exceptions to signal specific
  56. execution strategies (e.g., SKIP, RUN_ONLY) without requiring separate
  57. exception types for control flow.
  58. """
  59. def __init__(self, *args: Any, **kwargs: Any) -> None:
  60. super().__init__(*args, **kwargs)
  61. self._torch_dynamo_tracer_output: Optional[DynamoTracerOutput] = None
  62. self.frame_exec_strategy: FrameExecStrategy | None = None
  63. class InternalTorchDynamoError(TorchDynamoException):
  64. pass
  65. class ResumePrologueTracingError(TorchDynamoException):
  66. pass
  67. class RestartAnalysis(TorchDynamoException):
  68. restart_reason: Optional[str]
  69. def __init__(self, *args: Any, restart_reason: Optional[str] = None) -> None:
  70. self.restart_reason = restart_reason
  71. super().__init__(*args)
  72. class SpeculationRestartAnalysis(RestartAnalysis):
  73. pass
  74. class AutogradGradRestartAnalysis(RestartAnalysis):
  75. """Raised when autograd.grad consumed grad_fns that are returned.
  76. On restart, autograd.grad will graph break instead of being traced.
  77. """
  78. class UnspecializeRestartAnalysis(RestartAnalysis):
  79. pass
  80. class CompileCollectiveRestartAnalysis(RestartAnalysis):
  81. pass
  82. class TensorifyScalarRestartAnalysis(RestartAnalysis):
  83. pass
  84. # Used (primarily for backends) to skip tracing the current frame
  85. # and all future invocations of it.
  86. # NOTE: this does NOT cause a graph break, and thus no graph break messages
  87. # will be issued!
  88. class SkipFrame(TorchDynamoException):
  89. pass
  90. class TorchRuntimeError(TorchDynamoException):
  91. def __init__(self, msg: str, real_stack: StackSummary | None = None) -> None:
  92. super().__init__(msg)
  93. self.msg = msg
  94. self.real_stack = (
  95. real_stack
  96. if real_stack is not None
  97. else torch._guards.TracingContext.extract_stack()
  98. )
  99. class InvalidBackend(TorchDynamoException):
  100. def __init__(self, name: str) -> None:
  101. super().__init__(
  102. f"Invalid backend: {name!r}, see `torch._dynamo.list_backends()` for available backends."
  103. )
  104. class ResetRequired(TorchDynamoException):
  105. def __init__(self) -> None:
  106. super().__init__(
  107. textwrap.dedent(
  108. """
  109. Must call `torch._dynamo.reset()` before changing backends. Detected two calls to
  110. `torch.compile()` with a different backend compiler arguments.
  111. """
  112. )
  113. )
  114. class ShortenTraceback(TorchDynamoException):
  115. def __init__(
  116. self, *args: Any, first_useful_frame: Optional[types.FrameType], **kwargs: Any
  117. ) -> None:
  118. super().__init__(*args, **kwargs)
  119. self.first_useful_frame = first_useful_frame
  120. def remove_dynamo_frames(self) -> typing.Self:
  121. tb = self.__traceback__
  122. if self.first_useful_frame is None or tb is None or config.verbose:
  123. return self
  124. while tb.tb_frame is not self.first_useful_frame:
  125. tb = tb.tb_next
  126. assert tb is not None, "internal error, please report a bug"
  127. return self.with_traceback(tb)
  128. class BackendCompilerFailed(ShortenTraceback):
  129. def __init__(
  130. self,
  131. backend_fn: Any,
  132. inner_exception: Exception,
  133. first_useful_frame: Optional[types.FrameType],
  134. ) -> None:
  135. self.backend_name = getattr(backend_fn, "__name__", "?")
  136. self.inner_exception = inner_exception
  137. msg = f"backend={self.backend_name!r} raised:\n{type(inner_exception).__name__}: {inner_exception}"
  138. super().__init__(msg, first_useful_frame=first_useful_frame)
  139. # NOTE: important invariant! Almost any exception handler that handles Unsupported
  140. # should NOT suppress the exception if skip_frame is set!
  141. # skip_frame is used by symbolic_convert.py to bubble up Unsupported exceptions to convert_frame to cause
  142. # a frame skip. Once the Unsupported exn is in convert_frame, we will always skip, so skip_frame
  143. # won't be checked
  144. class Unsupported(TorchDynamoException):
  145. def __init__(
  146. self,
  147. msg: str,
  148. # TODO: make this argument required once we remove Unsupported subclasses
  149. gb_type: str = "",
  150. skip_frame: bool = False,
  151. *,
  152. case_name: Optional[str] = None,
  153. real_stack: StackSummary | None = None,
  154. ) -> None:
  155. super().__init__(msg)
  156. if not real_stack:
  157. real_stack = torch._guards.TracingContext.extract_stack()
  158. self.real_stack = real_stack
  159. self.msg = msg
  160. self.skip_frame = skip_frame
  161. self.category: Optional[str] = None
  162. self.add_to_stats()
  163. self.gb_type: str | None = gb_type
  164. self.logged = False
  165. def remove_from_stats(self) -> None:
  166. assert self.category is not None
  167. counters[self.category][self.msg] -= 1
  168. if counters[self.category][self.msg] <= 0:
  169. del counters[self.category][self.msg]
  170. def add_to_stats(self, category: str = "unimplemented") -> None:
  171. self.category = category
  172. counters[category][self.msg] += 1
  173. class UnknownPropertiesDuringBackwardTrace(TorchDynamoException):
  174. pass
  175. class RecompileError(TorchDynamoException):
  176. pass
  177. class InfiniteGeneratorError(TorchDynamoException):
  178. # Raised when the number of yielded values is greater than MAX_ITERATOR_LIMIT
  179. pass
  180. class CondOpArgsMismatchError(TorchDynamoException):
  181. """
  182. Internal error from cond() due to arguments mismatch.
  183. """
  184. class UserErrorType(Enum):
  185. DYNAMIC_CONTROL_FLOW = auto()
  186. ANTI_PATTERN = auto()
  187. STANDARD_LIBRARY = auto()
  188. CONSTRAINT_VIOLATION = auto()
  189. DYNAMIC_DIM = auto()
  190. INVALID_INPUT = auto()
  191. INVALID_OUTPUT = auto()
  192. UNSUPPORTED_ALIASED_MUTATED_DYNAMIC_INPUTS = auto()
  193. class UserError(Unsupported):
  194. def __init__(
  195. self, error_type: UserErrorType, msg: str, case_name: Optional[str] = None
  196. ) -> None:
  197. """
  198. Type of errors that would be valid in Eager, but not supported in TorchDynamo.
  199. The error message should tell user about next actions.
  200. error_type: Type of user error
  201. msg: Actionable error message
  202. case_name: (Optional) Unique name (snake case) for the usage example in exportdb.
  203. """
  204. if case_name is not None:
  205. assert isinstance(case_name, str)
  206. if msg.endswith("."):
  207. msg += " "
  208. else:
  209. msg += "\n"
  210. msg += exportdb_error_message(case_name)
  211. super().__init__(msg, case_name if case_name else "UserError")
  212. self.error_type = error_type
  213. self.message = msg
  214. # debug exception thrown when tracing torch._dynamo.step_unsupported()
  215. class StepUnsupported(TorchDynamoException):
  216. def __init__(self, msg: str, real_stack: StackSummary | None = None) -> None:
  217. super().__init__(msg)
  218. self.msg = msg
  219. if not real_stack:
  220. real_stack = torch._guards.TracingContext.extract_stack()
  221. self.real_stack = real_stack
  222. self.logged = False
  223. class UnsafeScriptObjectError(TorchDynamoException):
  224. pass
  225. class UncapturedHigherOrderOpError(TorchDynamoException):
  226. def __init__(self, msg: str, real_stack: StackSummary | None = None) -> None:
  227. super().__init__(msg)
  228. self.msg = msg
  229. self.real_stack = (
  230. real_stack
  231. if real_stack is not None
  232. else torch._guards.TracingContext.extract_stack()
  233. )
  234. class IncorrectUsage(Exception):
  235. pass
  236. # TODO: I'm a little uncertain about what error classification we should have
  237. # for this. This is potentially a user error, but regressions in
  238. # specialization in PyTorch proper could also trigger this problem
  239. class FailOnRecompileLimitHit(Exception):
  240. pass
  241. class PackageError(TorchDynamoException):
  242. pass
  243. class ObservedException(TorchDynamoException):
  244. # An exception observed during the tracing. This exception is used by Dynamo to handle exceptions.
  245. def __init__(
  246. self, *args: Any, real_stack: Optional[StackSummary] = None, **kwargs: Any
  247. ) -> None:
  248. super().__init__(*args, **kwargs)
  249. self.real_stack: StackSummary = (
  250. real_stack
  251. if real_stack is not None
  252. else torch._guards.TracingContext.extract_stack()
  253. )
  254. class ObservedUserStopIteration(ObservedException):
  255. # An UserStopIteration exception observed during the Dynamo tracing (e.g Dynamo tracing __next__)
  256. value: Optional[Any]
  257. # Reference `StopIteration_init` in CPython
  258. # https://github.com/python/cpython/blob/3.11/Objects/exceptions.c#L568-L584
  259. def __init__(
  260. self, *args: Any, real_stack: Optional[StackSummary] = None, **kwargs: Any
  261. ) -> None:
  262. super().__init__("unhandled `raise StopIteration`", real_stack=real_stack)
  263. if len(args) > 0:
  264. self.value = args[0]
  265. else:
  266. self.value = None
  267. class ObservedLookupError(ObservedException):
  268. # A LookupError exception to be raised from inside Dynamo tracing. This can happen on __getitem__
  269. pass
  270. class ObservedIndexError(ObservedLookupError):
  271. # An IndexError exception to be raised from inside Dynamo tracing. This can happen on list __getitem__
  272. pass
  273. class ObservedKeyError(ObservedLookupError):
  274. # A KeyError exception to be raised from inside Dynamo tracing. This can happen on dict __getitem__
  275. pass
  276. class ObservedGeneratorExit(ObservedException):
  277. pass
  278. class ObservedAttributeError(ObservedException):
  279. # An AttributeError exception to be raised from inside Dynamo tracing. This can happen on user defined object __getattr__
  280. pass
  281. class ObservedRuntimeError(ObservedException):
  282. # A RuntimeError exception to be raised from inside Dynamo tracing. This can happen on generator.throw(..) method
  283. pass
  284. class ObservedNotImplementedError(ObservedException):
  285. pass
  286. class ObservedTypeError(ObservedException):
  287. # A TypeError exception to be raised from inside Dynamo tracing. This can happen on generator.send(..) method
  288. pass
  289. observed_exception_map = {
  290. StopIteration: ObservedUserStopIteration,
  291. LookupError: ObservedLookupError,
  292. IndexError: ObservedIndexError,
  293. GeneratorExit: ObservedGeneratorExit,
  294. KeyError: ObservedKeyError,
  295. AttributeError: ObservedAttributeError,
  296. RuntimeError: ObservedRuntimeError,
  297. NotImplementedError: ObservedNotImplementedError,
  298. TypeError: ObservedTypeError,
  299. }
  300. def get_dynamo_observed_exception(exc_type: type[Exception]) -> type[ObservedException]:
  301. if exc_type not in observed_exception_map:
  302. name = getattr(exc_type, "__name__", str(exc_type))
  303. observed_exception_map[exc_type] = type( # type: ignore[assignment]
  304. f"Observed{name}Error", (ObservedException,), {}
  305. )
  306. # pyrefly: ignore [bad-index, index-error]
  307. return observed_exception_map[exc_type]
  308. def raise_observed_exception(
  309. exc_type: type[Exception],
  310. tx: InstructionTranslatorBase,
  311. *,
  312. args: Optional[list[Any]] = None,
  313. kwargs: Optional[dict[str, Any]] = None,
  314. ) -> NoReturn:
  315. from .symbolic_convert import ExceptionVals
  316. from .variables.builder import SourcelessBuilder
  317. # CPython here raises an exception. Since there is no python code, we have to manually setup the exception
  318. # stack and raise the exception.
  319. exception_vt = SourcelessBuilder.create(tx, exc_type).call_function(
  320. tx, # pyrefly: ignore[bad-argument-type]
  321. [SourcelessBuilder.create(tx, a) for a in args] if args else [],
  322. kwargs or {},
  323. )
  324. assert isinstance(exception_vt, ExceptionVals)
  325. tx._attach_traceback_to_exception(exception_vt)
  326. tx.exn_vt_stack.set_current_exception(exception_vt) # type: ignore[arg-type]
  327. raised_exc = get_dynamo_observed_exception(exc_type)
  328. # Store the original exception arguments for better error messages
  329. if args:
  330. raise raised_exc(*args)
  331. raise raised_exc
  332. def handle_observed_exception(tx: Any) -> None:
  333. # This is essentially exception handling code, equivalent of this pseudo code
  334. #
  335. # try:
  336. # ... somebody raising StopIteration
  337. # except StopIteration
  338. # pass
  339. #
  340. # If this was going through the python code, we would have called exception_handler method, but FOR_ITER
  341. # handles the exception completely in CPython. For example for 3.11, the resulting bytecode is
  342. #
  343. #
  344. # 6 46 LOAD_GLOBAL 2 (StopIteration)
  345. # 58 RAISE_VARARGS 1
  346. # >> 60 PUSH_EXC_INFO
  347. # 7 62 LOAD_GLOBAL 2 (StopIteration)
  348. # 74 CHECK_EXC_MATCH
  349. # 76 POP_JUMP_FORWARD_IF_FALSE 3 (to 84)
  350. # 78 POP_TOP
  351. # 8 80 POP_EXCEPT
  352. #
  353. # Fortunately this translates to a simple pop from the exn_vt_stack
  354. tx.exn_vt_stack.clear_current_exception()
  355. # These exceptions are ok to fallback to eager/graph_break.
  356. exceptions_allowed_to_be_fallback = (
  357. torch._subclasses.fake_tensor.DataDependentOutputException,
  358. torch._subclasses.fake_tensor.DynamicOutputShapeException,
  359. torch._subclasses.fake_tensor.UnsupportedOperatorException,
  360. torch._subclasses.fake_tensor.UnsupportedFakeTensorException,
  361. torch._subclasses.fake_tensor.UnsupportedMutationAliasingException,
  362. )
  363. def unimplemented_with_warning(
  364. e: Exception,
  365. code: types.CodeType,
  366. *,
  367. gb_type: str,
  368. context: str,
  369. explanation: str,
  370. hints: list[str],
  371. ) -> NoReturn:
  372. # This function calls unimplemented internally and eventually graph breaks
  373. # or falls to eager. unimplemented itself does not print any user warnings,
  374. # i.e., its very silent. This helper function is intended when an error is
  375. # encountered in the torch.compile stack which is worth showing as warning
  376. # to the user. For example, if AOT Autograd backend fails with a fake tensor
  377. # exception, its ok to fallback to eager but not silently. Here, we can use
  378. # this function to log the message and the stack trace.
  379. graph_break_msg = format_error_msg_verbose(e, code)
  380. torch._logging.trace_structured(
  381. "artifact",
  382. metadata_fn=lambda: {
  383. "name": "dynamo_graph_break_reason",
  384. "encoding": "string",
  385. },
  386. payload_fn=lambda: graph_break_msg,
  387. )
  388. graph_breaks_log.debug("%s", graph_break_msg)
  389. _unimplemented = unimplemented
  390. # to prevent a graph break registry entry
  391. _unimplemented(
  392. gb_type=gb_type,
  393. context=context,
  394. explanation=explanation,
  395. hints=hints,
  396. from_exc=e,
  397. log_warning=True,
  398. )
  399. def format_graph_break_message(
  400. gb_type: str,
  401. context: str,
  402. explanation: str,
  403. hints: list[str],
  404. ) -> str:
  405. explanation = textwrap.indent(explanation, " ").lstrip()
  406. hints_str = "\n".join(
  407. " Hint: " + textwrap.indent(hint, " ").lstrip() for hint in hints
  408. )
  409. context = textwrap.indent(context, " ").lstrip()
  410. msg = f"""\
  411. {gb_type}
  412. Explanation: {explanation}
  413. {hints_str}
  414. Developer debug context: {context}"""
  415. documentation_link = get_gbid_documentation_link(gb_type)
  416. if documentation_link:
  417. msg += f"\n\n For more details about this graph break, please visit: {documentation_link}"
  418. return msg
  419. @lru_cache(maxsize=1)
  420. def _load_gb_type_to_gb_id_map() -> dict[str, Any]:
  421. """
  422. Loads the gb_type to gb_id map from the graph break registry from JSON file with caching.
  423. Includes historical gb_type (mapping behavior of duplicate gb_types with different gb_ids is undefined).
  424. """
  425. try:
  426. script_dir = Path(__file__).resolve().parent
  427. registry_path = get_file_path_2(
  428. "", str(script_dir), "graph_break_registry.json"
  429. )
  430. with open(registry_path) as f:
  431. registry = json.load(f)
  432. except Exception:
  433. log.exception("Error accessing the registry file")
  434. # pyrefly: ignore [implicit-any]
  435. registry = {}
  436. mapping = {}
  437. for k, v in registry.items():
  438. for entry in v:
  439. mapping[entry["Gb_type"]] = k
  440. return mapping
  441. def get_gbid_documentation_link(gb_type: str) -> Optional[str]:
  442. """
  443. Retrieves the GBID documentation link for a given graph break type.
  444. Args:
  445. gb_type: The graph break type to look up.
  446. Returns:
  447. A string containing the documentation URL if found, otherwise None.
  448. """
  449. GRAPH_BREAK_SITE_URL = (
  450. "https://meta-pytorch.github.io/compile-graph-break-site/gb/" # @lint-ignore
  451. )
  452. gb_type_to_gb_id_map = _load_gb_type_to_gb_id_map()
  453. if gb_type in gb_type_to_gb_id_map:
  454. return (
  455. f"{GRAPH_BREAK_SITE_URL}gb{gb_type_to_gb_id_map[gb_type].lstrip('GB')}.html"
  456. )
  457. return None
  458. _NOTHING = object()
  459. def unimplemented(
  460. *,
  461. gb_type: str,
  462. context: str,
  463. explanation: str,
  464. hints: list[str],
  465. from_exc: Any = _NOTHING,
  466. log_warning: bool = False,
  467. skip_frame: bool = False,
  468. ) -> NoReturn:
  469. """
  470. Called within dynamo to cause a graph break.
  471. Args:
  472. gb_type: Context-free graph break type. It should be a short string without any
  473. information specific to the tracing context (i.e. no dynamically-generated strings)
  474. context: Developer context for the graph break. It can contain tracing context/dynamic strings.
  475. explanation: User-facing context-dependent explanation for the graph break. Can be dynamic.
  476. hints: List of user-facing hints for the graph break.
  477. """
  478. msg = format_graph_break_message(gb_type, context, explanation, hints)
  479. if log_warning:
  480. log.warning(msg)
  481. if from_exc is not _NOTHING:
  482. past_real_stack = None
  483. if hasattr(from_exc, "real_stack"):
  484. past_real_stack = from_exc.real_stack
  485. if isinstance(from_exc, Unsupported):
  486. msg = f"{from_exc.msg}\n\n*** While handling this graph break, another graph break occurred: ***\n\n{msg}"
  487. raise Unsupported(msg, gb_type, skip_frame, real_stack=past_real_stack)
  488. raise Unsupported(
  489. msg, gb_type, skip_frame, real_stack=past_real_stack
  490. ) from from_exc
  491. raise Unsupported(msg, gb_type, skip_frame)
  492. # KeyError has special handling for its args
  493. # see https://github.com/python/cpython/blob/3.11/Objects/exceptions.c#L2534 for details
  494. class KeyErrorMsg:
  495. def __init__(self, value: Any) -> None:
  496. self.value = value
  497. def __str__(self) -> str:
  498. return str(self.value)
  499. def __repr__(self) -> str:
  500. return self.__str__()
  501. def augment_exc_message_with_hop_name(exc: Exception, msg: str) -> str:
  502. # Add HOP context right after before the explanation if present;
  503. # otherwise after the message
  504. if hasattr(exc, "_hop_name"):
  505. lines = msg.partition("\n Explanation:")
  506. msg = (
  507. f"{lines[0]}\n Higher Order Operator: {exc._hop_name}{lines[1]}{lines[2]}" # type: ignore[attr-defined]
  508. )
  509. return msg
  510. def augment_exc_message(exc: Exception, msg: str = "\n", export: bool = False) -> None:
  511. import traceback
  512. exc.innermost_user_frame_summary = None # type: ignore[attr-defined]
  513. real_stack = get_real_stack(exc)
  514. if real_stack is not None and len(real_stack) > 0:
  515. exc.innermost_user_frame_summary = real_stack[-1] # type: ignore[attr-defined]
  516. msg += f"\nfrom user code:\n {''.join(traceback.format_list(real_stack))}"
  517. if config.replay_record_enabled and hasattr(exc, "record_filename"):
  518. msg += (
  519. f"\nLast frame execution written to {exc.record_filename}. To run only this frame while debugging, run\
  520. torch._dynamo.replay('{exc.record_filename}').\n"
  521. )
  522. if not config.verbose and hasattr(exc, "real_stack"):
  523. msg += (
  524. "\nSet TORCHDYNAMO_VERBOSE=1 for the internal stack trace "
  525. "(please do this especially if you're reporting a bug to PyTorch). "
  526. 'For even more developer context, set TORCH_LOGS="+dynamo"\n'
  527. )
  528. if hasattr(exc, "inner_exception") and hasattr(
  529. exc.inner_exception, "minifier_path"
  530. ):
  531. if hasattr(exc.inner_exception, "buck_command"):
  532. msg += (
  533. f"\nMinifier script written to {exc.inner_exception.minifier_path}. Run "
  534. f"this buck command to find the smallest traced graph "
  535. f"which reproduces this error: {exc.inner_exception.buck_command}\n"
  536. )
  537. else:
  538. msg += (
  539. f"\nMinifier script written to {exc.inner_exception.minifier_path}. Run "
  540. "this script to find the smallest traced graph which reproduces this error.\n"
  541. )
  542. old_msg = "" if len(exc.args) == 0 else str(exc.args[0])
  543. old_msg = augment_exc_message_with_hop_name(exc, old_msg)
  544. if isinstance(exc, KeyError):
  545. exc.args = (KeyErrorMsg(old_msg + msg),) + exc.args[1:]
  546. else:
  547. new_msg = old_msg + msg
  548. exc.args = (new_msg,) + exc.args[1:]
  549. def get_exc_message(
  550. e: Exception, compile_id: CompileId
  551. ) -> tuple[Optional[str], Optional[int]]:
  552. filename = None
  553. lineno = None
  554. if e.innermost_user_frame_summary is not None: # type: ignore[attr-defined]
  555. filename = e.innermost_user_frame_summary.filename # type: ignore[attr-defined]
  556. lineno = e.innermost_user_frame_summary.lineno # type: ignore[attr-defined]
  557. e.compile_id = compile_id # type: ignore[attr-defined]
  558. return filename, lineno
  559. def get_stack_above_dynamo() -> StackSummary:
  560. return filter_stack(extract_stack())
  561. def get_real_stack(
  562. exc: Exception, frame: Optional[DynamoFrameType] = None
  563. ) -> Optional[StackSummary]:
  564. real_stack = getattr(exc, "real_stack", None)
  565. if real_stack is None:
  566. return None
  567. # NB: it's possible for real_stack to be []; we still attempt to
  568. # report a stack anyway because the stack_above_dynamo may still
  569. # be useful for debugging
  570. if frame is not None:
  571. # NB: frame is PyInterpreterFrame on Python 3.11 and later,
  572. # not a TRUE frame object. You can't actually feed it
  573. # to traceback because it doesn't have enough information.
  574. # To solve this problem, we technically should just materialize
  575. # the frame, the same way _PyFrame_GetFrameObject would do
  576. # (but we cannot actually do this, because this populates
  577. # frame_obj field, which default eval frame doesn't like).
  578. #
  579. # Fortunately, in this case, we can hack it: there's no need
  580. # to actually use the truly top frame, we can just extract
  581. # from where we are right now and rely on filter_stack to
  582. # get rid of all the dynamo frames. For ease of testing
  583. # we apply this behavior to ALL Python versions
  584. stack_above_dynamo = get_stack_above_dynamo()
  585. else:
  586. stack_above_dynamo = StackSummary()
  587. return StackSummary.from_list(stack_above_dynamo + real_stack)
  588. # filter out all frames after entering dynamo
  589. def filter_stack(stack: StackSummary) -> StackSummary:
  590. user_stack = StackSummary()
  591. for frame in stack:
  592. if frame.filename is None:
  593. continue
  594. if "convert_frame" in frame.filename:
  595. break
  596. if "eval_frame" in frame.filename or (
  597. frame.line and "torch._dynamo.optimize(" in frame.line
  598. ):
  599. continue
  600. user_stack.append(frame)
  601. return user_stack
  602. def remove_resume_prefix(name: str) -> Optional[str]:
  603. from .resume_execution import TORCH_DYNAMO_RESUME_IN_PREFIX
  604. match = re.match(f"{TORCH_DYNAMO_RESUME_IN_PREFIX}_(\\w+)_at_\\d+", name)
  605. if match:
  606. return match.group(1)
  607. return None
  608. def collapse_resume_frames(stack: StackSummary | list[FrameSummary]) -> StackSummary:
  609. """
  610. When we graph break, we create a resume function and make a regular Python call
  611. to it, which gets intercepted by Dynamo. This behavior is normally shown in the
  612. traceback, which can be confusing to a user. So we can filter out resume frames
  613. for better traceback clarity.
  614. Example:
  615. File "..." line 3, in f
  616. <line 3>
  617. File "..." line 5, in torch_dynamo_resume_in_f_at_80
  618. <line 5>
  619. File "..." line 10, in torch_dynamo_resume_in_f_at_120
  620. <line 10>
  621. becomes
  622. File "..." line 10, in f
  623. <line 10>
  624. """
  625. new_stack = StackSummary()
  626. for frame in stack:
  627. if frame.filename is None:
  628. continue
  629. name = remove_resume_prefix(frame.name)
  630. if new_stack and name and new_stack[-1].name == name:
  631. new_stack[-1] = frame
  632. frame.name = name
  633. else:
  634. new_stack.append(frame)
  635. return new_stack
  636. def format_error_msg_verbose(
  637. exc: Exception,
  638. code: types.CodeType,
  639. record_filename: Optional[str] = None,
  640. frame: Optional[DynamoFrameType] = None,
  641. ) -> str:
  642. msg = (
  643. f"WON'T CONVERT {code.co_name} {code.co_filename} line {code.co_firstlineno}\n"
  644. )
  645. msg += "=" * 10 + " TorchDynamo Stack Trace " + "=" * 10 + "\n"
  646. msg += format_exc()
  647. real_stack = get_real_stack(exc, frame)
  648. if real_stack is not None:
  649. msg += (
  650. "\n"
  651. + "=" * 10
  652. + " The above exception occurred while processing the following code "
  653. + "=" * 10
  654. + "\n\n"
  655. )
  656. msg += "".join(format_list(real_stack))
  657. msg += "\n"
  658. msg += "=" * 10
  659. return msg
  660. def format_frame_info(code: types.CodeType) -> str:
  661. return (
  662. f"{getattr(code, 'co_name', '<unknown>')} "
  663. f"({getattr(code, 'co_filename', '<unknown>')} "
  664. f"line {getattr(code, 'co_firstlineno', 0)})"
  665. )
  666. def format_skip_frame_message(code: Optional[types.CodeType], reason: str) -> str:
  667. if code is not None:
  668. frame_info = format_frame_info(code)
  669. return (
  670. f"torch.compile intentionally decided to skip the frame {frame_info} and fall back to eager.\n"
  671. f"Reason: {reason}"
  672. )
  673. else:
  674. return (
  675. f"torch.compile intentionally decided to skip the frame and fall back to eager.\n"
  676. f"Reason: {reason}"
  677. )
  678. def format_error_msg(
  679. exc: Exception,
  680. code: types.CodeType,
  681. record_filename: Optional[str] = None,
  682. frame: Optional[DynamoFrameType] = None,
  683. ) -> str:
  684. if config.verbose:
  685. return format_error_msg_verbose(exc, code, record_filename, frame)
  686. return f"WON'T CONVERT {code.co_name} {code.co_filename}\
  687. line {code.co_firstlineno} \ndue to: \n{format_exc()}"