utils.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. from collections import defaultdict
  2. from functools import lru_cache
  3. import boto3
  4. from boto3.exceptions import ResourceNotExistsError
  5. from boto3.resources.base import ServiceResource
  6. from botocore.client import BaseClient
  7. from botocore.config import Config
  8. from ray.autoscaler._private.cli_logger import cf, cli_logger
  9. from ray.autoscaler._private.constants import BOTO_MAX_RETRIES
  10. class LazyDefaultDict(defaultdict):
  11. """
  12. LazyDefaultDict(default_factory[, ...]) --> dict with default factory
  13. The default factory is call with the key argument to produce
  14. a new value when a key is not present, in __getitem__ only.
  15. A LazyDefaultDict compares equal to a dict with the same items.
  16. All remaining arguments are treated the same as if they were
  17. passed to the dict constructor, including keyword arguments.
  18. """
  19. def __missing__(self, key):
  20. """
  21. __missing__(key) # Called by __getitem__ for missing key; pseudo-code:
  22. if self.default_factory is None: raise KeyError((key,))
  23. self[key] = value = self.default_factory(key)
  24. return value
  25. """
  26. self[key] = self.default_factory(key)
  27. return self[key]
  28. def handle_boto_error(exc, msg, *args, **kwargs):
  29. error_code = None
  30. error_info = None
  31. # todo: not sure if these exceptions always have response
  32. if hasattr(exc, "response"):
  33. error_info = exc.response.get("Error", None)
  34. if error_info is not None:
  35. error_code = error_info.get("Code", None)
  36. generic_message_args = [
  37. "{}\nError code: {}",
  38. msg.format(*args, **kwargs),
  39. cf.bold(error_code),
  40. ]
  41. # apparently
  42. # ExpiredTokenException
  43. # ExpiredToken
  44. # RequestExpired
  45. # are all the same pretty much
  46. credentials_expiration_codes = [
  47. "ExpiredTokenException",
  48. "ExpiredToken",
  49. "RequestExpired",
  50. ]
  51. if error_code in credentials_expiration_codes:
  52. # "An error occurred (ExpiredToken) when calling the
  53. # GetInstanceProfile operation: The security token
  54. # included in the request is expired"
  55. # "An error occurred (RequestExpired) when calling the
  56. # DescribeKeyPairs operation: Request has expired."
  57. token_command = (
  58. "aws sts get-session-token "
  59. "--serial-number arn:aws:iam::"
  60. + cf.underlined("ROOT_ACCOUNT_ID")
  61. + ":mfa/"
  62. + cf.underlined("AWS_USERNAME")
  63. + " --token-code "
  64. + cf.underlined("TWO_FACTOR_AUTH_CODE")
  65. )
  66. secret_key_var = (
  67. "export AWS_SECRET_ACCESS_KEY = "
  68. + cf.underlined("REPLACE_ME")
  69. + " # found at Credentials.SecretAccessKey"
  70. )
  71. session_token_var = (
  72. "export AWS_SESSION_TOKEN = "
  73. + cf.underlined("REPLACE_ME")
  74. + " # found at Credentials.SessionToken"
  75. )
  76. access_key_id_var = (
  77. "export AWS_ACCESS_KEY_ID = "
  78. + cf.underlined("REPLACE_ME")
  79. + " # found at Credentials.AccessKeyId"
  80. )
  81. # fixme: replace with a Github URL that points
  82. # to our repo
  83. aws_session_script_url = (
  84. "https://gist.github.com/maximsmol/a0284e1d97b25d417bd9ae02e5f450cf"
  85. )
  86. cli_logger.verbose_error(*generic_message_args)
  87. cli_logger.verbose(vars(exc))
  88. cli_logger.panic("Your AWS session has expired.")
  89. cli_logger.newline()
  90. cli_logger.panic("You can request a new one using")
  91. cli_logger.panic(cf.bold(token_command))
  92. cli_logger.panic("then expose it to Ray by setting")
  93. cli_logger.panic(cf.bold(secret_key_var))
  94. cli_logger.panic(cf.bold(session_token_var))
  95. cli_logger.panic(cf.bold(access_key_id_var))
  96. cli_logger.newline()
  97. cli_logger.panic("You can find a script that automates this at:")
  98. cli_logger.panic(cf.underlined(aws_session_script_url))
  99. # Do not re-raise the exception here because it looks awful
  100. # and we already print all the info in verbose
  101. cli_logger.abort()
  102. # todo: any other errors that we should catch separately?
  103. cli_logger.panic(*generic_message_args)
  104. cli_logger.newline()
  105. with cli_logger.verbatim_error_ctx("Boto3 error:"):
  106. cli_logger.verbose("{}", str(vars(exc)))
  107. cli_logger.panic("{}", str(exc))
  108. cli_logger.abort()
  109. def boto_exception_handler(msg, *args, **kwargs):
  110. # todo: implement timer
  111. class ExceptionHandlerContextManager:
  112. def __enter__(self):
  113. pass
  114. def __exit__(self, type, value, tb):
  115. import botocore
  116. if type is botocore.exceptions.ClientError:
  117. handle_boto_error(value, msg, *args, **kwargs)
  118. return ExceptionHandlerContextManager()
  119. @lru_cache()
  120. def resource_cache(
  121. name, region, max_retries=BOTO_MAX_RETRIES, **kwargs
  122. ) -> ServiceResource:
  123. cli_logger.verbose(
  124. "Creating AWS resource `{}` in `{}`", cf.bold(name), cf.bold(region)
  125. )
  126. kwargs.setdefault(
  127. "config",
  128. Config(retries={"max_attempts": max_retries}),
  129. )
  130. return boto3.resource(
  131. name,
  132. region,
  133. **kwargs,
  134. )
  135. @lru_cache()
  136. def client_cache(name, region, max_retries=BOTO_MAX_RETRIES, **kwargs) -> BaseClient:
  137. try:
  138. # try to re-use a client from the resource cache first
  139. return resource_cache(name, region, max_retries, **kwargs).meta.client
  140. except ResourceNotExistsError:
  141. # fall back for clients without an associated resource
  142. cli_logger.verbose(
  143. "Creating AWS client `{}` in `{}`", cf.bold(name), cf.bold(region)
  144. )
  145. kwargs.setdefault(
  146. "config",
  147. Config(retries={"max_attempts": max_retries}),
  148. )
  149. return boto3.client(
  150. name,
  151. region,
  152. **kwargs,
  153. )