validation.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. import logging
  2. import sys
  3. from collections import OrderedDict
  4. from pathlib import Path
  5. from typing import Dict, List, Optional, Union
  6. import yaml
  7. from ray._private.path_utils import is_path
  8. from ray._private.runtime_env.packaging import parse_path
  9. logger = logging.getLogger(__name__)
  10. def validate_path(path: str) -> None:
  11. """Parse the path to ensure it is well-formed and exists."""
  12. parse_path(path)
  13. def validate_uri(uri: str):
  14. try:
  15. from ray._private.runtime_env.packaging import Protocol, parse_uri
  16. protocol, path = parse_uri(uri)
  17. except ValueError:
  18. raise ValueError(
  19. f"{uri} is not a valid URI. Passing directories or modules to "
  20. "be dynamically uploaded is only supported at the job level "
  21. "(i.e., passed to `ray.init`)."
  22. )
  23. if (
  24. protocol in Protocol.remote_protocols()
  25. and not path.endswith(".zip")
  26. and not path.endswith(".whl")
  27. ):
  28. raise ValueError("Only .zip or .whl files supported for remote URIs.")
  29. def _handle_local_deps_requirement_file(requirements_file: str):
  30. """Read the given [requirements_file], and return all required dependencies."""
  31. requirements_path = Path(requirements_file)
  32. if not requirements_path.is_file():
  33. raise ValueError(f"{requirements_path} is not a valid file")
  34. return requirements_path.read_text().strip().split("\n")
  35. def validate_py_modules_uris(py_modules_uris: List[str]) -> List[str]:
  36. """Parses and validates a 'py_modules' option.
  37. Expects py_modules to be a list of URIs.
  38. """
  39. if not isinstance(py_modules_uris, list):
  40. raise TypeError(
  41. "`py_modules` must be a list of strings, got " f"{type(py_modules_uris)}."
  42. )
  43. for module in py_modules_uris:
  44. if not isinstance(module, str):
  45. raise TypeError("`py_module` must be a string, got " f"{type(module)}.")
  46. validate_uri(module)
  47. def parse_and_validate_py_modules(py_modules: List[str]) -> List[str]:
  48. """Parses and validates a 'py_modules' option.
  49. Expects py_modules to be a list of local paths or URIs.
  50. """
  51. if not isinstance(py_modules, list):
  52. raise TypeError(
  53. "`py_modules` must be a list of strings, got " f"{type(py_modules)}."
  54. )
  55. for module in py_modules:
  56. if not isinstance(module, str):
  57. raise TypeError("`py_module` must be a string, got " f"{type(module)}.")
  58. if is_path(module):
  59. validate_path(module)
  60. else:
  61. validate_uri(module)
  62. return py_modules
  63. def validate_working_dir_uri(working_dir_uri: str) -> str:
  64. """Parses and validates a 'working_dir' option."""
  65. if not isinstance(working_dir_uri, str):
  66. raise TypeError(
  67. "`working_dir` must be a string, got " f"{type(working_dir_uri)}."
  68. )
  69. validate_uri(working_dir_uri)
  70. def parse_and_validate_working_dir(working_dir: str) -> str:
  71. """Parses and validates a 'working_dir' option.
  72. This can be a URI or a path.
  73. """
  74. assert working_dir is not None
  75. if not isinstance(working_dir, str):
  76. raise TypeError("`working_dir` must be a string, got " f"{type(working_dir)}.")
  77. if is_path(working_dir):
  78. validate_path(working_dir)
  79. else:
  80. validate_uri(working_dir)
  81. return working_dir
  82. def parse_and_validate_conda(conda: Union[str, dict]) -> Union[str, dict]:
  83. """Parses and validates a user-provided 'conda' option.
  84. Conda can be one of three cases:
  85. 1) A dictionary describing the env. This is passed through directly.
  86. 2) A string referring to the name of a preinstalled conda env.
  87. 3) A string pointing to a local conda YAML file. This is detected
  88. by looking for a '.yaml' or '.yml' suffix. In this case, the file
  89. will be read as YAML and passed through as a dictionary.
  90. """
  91. assert conda is not None
  92. if sys.platform == "win32":
  93. logger.warning(
  94. "runtime environment support is experimental on Windows. "
  95. "If you run into issues please file a report at "
  96. "https://github.com/ray-project/ray/issues."
  97. )
  98. result = conda
  99. if isinstance(conda, str):
  100. file_path = Path(conda)
  101. if file_path.suffix in (".yaml", ".yml"):
  102. if not file_path.is_file():
  103. raise ValueError(f"Can't find conda YAML file {file_path}.")
  104. try:
  105. result = yaml.safe_load(file_path.read_text())
  106. except Exception as e:
  107. raise ValueError(f"Failed to read conda file {file_path}: {e}.")
  108. elif file_path.is_absolute():
  109. if not file_path.is_dir():
  110. raise ValueError(f"Can't find conda env directory {file_path}.")
  111. result = str(file_path)
  112. elif isinstance(conda, dict):
  113. result = conda
  114. else:
  115. raise TypeError(
  116. "runtime_env['conda'] must be of type str or " f"dict, got {type(conda)}."
  117. )
  118. return result
  119. def parse_and_validate_uv(uv: Union[str, List[str], Dict]) -> Optional[Dict]:
  120. """Parses and validates a user-provided 'uv' option.
  121. The value of the input 'uv' field can be one of two cases:
  122. 1) A List[str] describing the requirements. This is passed through.
  123. Example usage: ["tensorflow", "requests"]
  124. 2) a string containing the path to a local pip “requirements.txt” file.
  125. 3) A python dictionary that has one field:
  126. a) packages (required, List[str]): a list of uv packages, it same as 1).
  127. b) uv_check (optional, bool): whether to enable pip check at the end of uv
  128. install, default to False.
  129. c) uv_version (optional, str): user provides a specific uv to use; if
  130. unspecified, default version of uv will be used.
  131. d) uv_pip_install_options (optional, List[str]): user-provided options for
  132. `uv pip install` command, default to ["--no-cache"].
  133. The returned parsed value will be a list of packages. If a Ray library
  134. (e.g. "ray[serve]") is specified, it will be deleted and replaced by its
  135. dependencies (e.g. "uvicorn", "requests").
  136. """
  137. assert uv is not None
  138. if sys.platform == "win32":
  139. logger.warning(
  140. "runtime environment support is experimental on Windows. "
  141. "If you run into issues please file a report at "
  142. "https://github.com/ray-project/ray/issues."
  143. )
  144. result: str = ""
  145. if isinstance(uv, str):
  146. uv_list = _handle_local_deps_requirement_file(uv)
  147. result = dict(packages=uv_list, uv_check=False)
  148. elif isinstance(uv, list) and all(isinstance(dep, str) for dep in uv):
  149. result = dict(packages=uv, uv_check=False)
  150. elif isinstance(uv, dict):
  151. if set(uv.keys()) - {
  152. "packages",
  153. "uv_check",
  154. "uv_version",
  155. "uv_pip_install_options",
  156. }:
  157. raise ValueError(
  158. "runtime_env['uv'] can only have these fields: "
  159. "packages, uv_check, uv_version and uv_pip_install_options, but got: "
  160. f"{list(uv.keys())}"
  161. )
  162. if "packages" not in uv:
  163. raise ValueError(
  164. f"runtime_env['uv'] must include field 'packages', but got {uv}"
  165. )
  166. if "uv_check" in uv and not isinstance(uv["uv_check"], bool):
  167. raise TypeError(
  168. "runtime_env['uv']['uv_check'] must be of type bool, "
  169. f"got {type(uv['uv_check'])}"
  170. )
  171. if "uv_version" in uv and not isinstance(uv["uv_version"], str):
  172. raise TypeError(
  173. "runtime_env['uv']['uv_version'] must be of type str, "
  174. f"got {type(uv['uv_version'])}"
  175. )
  176. if "uv_pip_install_options" in uv:
  177. if not isinstance(uv["uv_pip_install_options"], list):
  178. raise TypeError(
  179. "runtime_env['uv']['uv_pip_install_options'] must be of type "
  180. f"list[str] got {type(uv['uv_pip_install_options'])}"
  181. )
  182. # Check each item in installation option.
  183. for idx, cur_opt in enumerate(uv["uv_pip_install_options"]):
  184. if not isinstance(cur_opt, str):
  185. raise TypeError(
  186. "runtime_env['uv']['uv_pip_install_options'] must be of type "
  187. f"list[str] got {type(cur_opt)} for {idx}-th item."
  188. )
  189. result = uv.copy()
  190. result["uv_check"] = uv.get("uv_check", False)
  191. result["uv_pip_install_options"] = uv.get(
  192. "uv_pip_install_options", ["--no-cache"]
  193. )
  194. if not isinstance(uv["packages"], list):
  195. raise ValueError(
  196. "runtime_env['uv']['packages'] must be of type list, "
  197. f"got: {type(uv['packages'])}"
  198. )
  199. else:
  200. raise TypeError(
  201. "runtime_env['uv'] must be of type " f"List[str], or dict, got {type(uv)}"
  202. )
  203. # Deduplicate packages for package lists.
  204. result["packages"] = list(OrderedDict.fromkeys(result["packages"]))
  205. if len(result["packages"]) == 0:
  206. result = None
  207. logger.debug(f"Rewrote runtime_env `uv` field from {uv} to {result}.")
  208. return result
  209. def parse_and_validate_pip(pip: Union[str, List[str], Dict]) -> Optional[Dict]:
  210. """Parses and validates a user-provided 'pip' option.
  211. The value of the input 'pip' field can be one of two cases:
  212. 1) A List[str] describing the requirements. This is passed through.
  213. 2) A string pointing to a local requirements file. In this case, the
  214. file contents will be read split into a list.
  215. 3) A python dictionary that has three fields:
  216. a) packages (required, List[str]): a list of pip packages, it same as 1).
  217. b) pip_check (optional, bool): whether to enable pip check at the end of pip
  218. install, default to False.
  219. c) pip_version (optional, str): the version of pip, ray will spell
  220. the package name 'pip' in front of the `pip_version` to form the final
  221. requirement string, the syntax of a requirement specifier is defined in
  222. full in PEP 508.
  223. d) pip_install_options (optional, List[str]): user-provided options for
  224. `pip install` command, defaults to ["--disable-pip-version-check", "--no-cache-dir"].
  225. The returned parsed value will be a list of pip packages. If a Ray library
  226. (e.g. "ray[serve]") is specified, it will be deleted and replaced by its
  227. dependencies (e.g. "uvicorn", "requests").
  228. """
  229. assert pip is not None
  230. result = None
  231. if sys.platform == "win32":
  232. logger.warning(
  233. "runtime environment support is experimental on Windows. "
  234. "If you run into issues please file a report at "
  235. "https://github.com/ray-project/ray/issues."
  236. )
  237. if isinstance(pip, str):
  238. # We have been given a path to a requirements.txt file.
  239. pip_list = _handle_local_deps_requirement_file(pip)
  240. result = dict(
  241. packages=pip_list,
  242. pip_check=False,
  243. )
  244. elif isinstance(pip, list) and all(isinstance(dep, str) for dep in pip):
  245. result = dict(packages=pip, pip_check=False)
  246. elif isinstance(pip, dict):
  247. if set(pip.keys()) - {
  248. "packages",
  249. "pip_check",
  250. "pip_install_options",
  251. "pip_version",
  252. }:
  253. raise ValueError(
  254. "runtime_env['pip'] can only have these fields: "
  255. "packages, pip_check, pip_install_options and pip_version, but got: "
  256. f"{list(pip.keys())}"
  257. )
  258. if "pip_check" in pip and not isinstance(pip["pip_check"], bool):
  259. raise TypeError(
  260. "runtime_env['pip']['pip_check'] must be of type bool, "
  261. f"got {type(pip['pip_check'])}"
  262. )
  263. if "pip_version" in pip:
  264. if not isinstance(pip["pip_version"], str):
  265. raise TypeError(
  266. "runtime_env['pip']['pip_version'] must be of type str, "
  267. f"got {type(pip['pip_version'])}"
  268. )
  269. if "pip_install_options" in pip:
  270. if not isinstance(pip["pip_install_options"], list):
  271. raise TypeError(
  272. "runtime_env['pip']['pip_install_options'] must be of type "
  273. f"list[str] got {type(pip['pip_install_options'])}"
  274. )
  275. # Check each item in installation option.
  276. for idx, cur_opt in enumerate(pip["pip_install_options"]):
  277. if not isinstance(cur_opt, str):
  278. raise TypeError(
  279. "runtime_env['pip']['pip_install_options'] must be of type "
  280. f"list[str] got {type(cur_opt)} for {idx}-th item."
  281. )
  282. result = pip.copy()
  283. # Contrary to pip_check, we do not insert the default value of pip_install_options.
  284. # This is to maintain backwards compatibility with ray==2.0.1
  285. result["pip_check"] = pip.get("pip_check", False)
  286. if "packages" not in pip:
  287. raise ValueError(
  288. f"runtime_env['pip'] must include field 'packages', but got {pip}"
  289. )
  290. elif isinstance(pip["packages"], str):
  291. result["packages"] = _handle_local_deps_requirement_file(pip["packages"])
  292. elif not isinstance(pip["packages"], list):
  293. raise ValueError(
  294. "runtime_env['pip']['packages'] must be of type str of list, "
  295. f"got: {type(pip['packages'])}"
  296. )
  297. else:
  298. raise TypeError(
  299. "runtime_env['pip'] must be of type str or " f"List[str], got {type(pip)}"
  300. )
  301. # Eliminate duplicates to prevent `pip install` from erroring. Use
  302. # OrderedDict to preserve the order of the list. This makes the output
  303. # deterministic and easier to debug, because pip install can have
  304. # different behavior depending on the order of the input.
  305. result["packages"] = list(OrderedDict.fromkeys(result["packages"]))
  306. if len(result["packages"]) == 0:
  307. result = None
  308. logger.debug(f"Rewrote runtime_env `pip` field from {pip} to {result}.")
  309. return result
  310. def parse_and_validate_container(container: List[str]) -> List[str]:
  311. """Parses and validates a user-provided 'container' option.
  312. This is passed through without validation (for now).
  313. """
  314. assert container is not None
  315. return container
  316. def parse_and_validate_excludes(excludes: List[str]) -> List[str]:
  317. """Parses and validates a user-provided 'excludes' option.
  318. This is validated to verify that it is of type List[str].
  319. If an empty list is passed, we return `None` for consistency.
  320. """
  321. assert excludes is not None
  322. if isinstance(excludes, list) and len(excludes) == 0:
  323. return None
  324. if isinstance(excludes, list) and all(isinstance(path, str) for path in excludes):
  325. return excludes
  326. else:
  327. raise TypeError(
  328. "runtime_env['excludes'] must be of type "
  329. f"List[str], got {type(excludes)}"
  330. )
  331. def parse_and_validate_env_vars(env_vars: Dict[str, str]) -> Optional[Dict[str, str]]:
  332. """Parses and validates a user-provided 'env_vars' option.
  333. This is validated to verify that all keys and vals are strings.
  334. If an empty dictionary is passed, we return `None` for consistency.
  335. Args:
  336. env_vars: A dictionary of environment variables to set in the
  337. runtime environment.
  338. Returns:
  339. The validated env_vars dictionary, or None if it was empty.
  340. Raises:
  341. TypeError: If the env_vars is not a dictionary of strings. The error message
  342. will include the type of the invalid value.
  343. """
  344. assert env_vars is not None
  345. if len(env_vars) == 0:
  346. return None
  347. if not isinstance(env_vars, dict):
  348. raise TypeError(
  349. "runtime_env['env_vars'] must be of type "
  350. f"Dict[str, str], got {type(env_vars)}"
  351. )
  352. for key, val in env_vars.items():
  353. if not isinstance(key, str):
  354. raise TypeError(
  355. "runtime_env['env_vars'] must be of type "
  356. f"Dict[str, str], but the key {key} is of type {type(key)}"
  357. )
  358. if not isinstance(val, str):
  359. raise TypeError(
  360. "runtime_env['env_vars'] must be of type "
  361. f"Dict[str, str], but the value {val} is of type {type(val)}"
  362. )
  363. return env_vars
  364. # Dictionary mapping runtime_env options with the function to parse and
  365. # validate them.
  366. OPTION_TO_VALIDATION_FN = {
  367. "py_modules": parse_and_validate_py_modules,
  368. "working_dir": parse_and_validate_working_dir,
  369. "excludes": parse_and_validate_excludes,
  370. "conda": parse_and_validate_conda,
  371. "pip": parse_and_validate_pip,
  372. "uv": parse_and_validate_uv,
  373. "env_vars": parse_and_validate_env_vars,
  374. "container": parse_and_validate_container,
  375. }
  376. # RuntimeEnv can be created with local paths
  377. # for these options. However, after the packages
  378. # for these options have been uploaded to GCS,
  379. # they must be URIs. These functions provide the ability
  380. # to validate that these options only contain well-formed URIs.
  381. OPTION_TO_NO_PATH_VALIDATION_FN = {
  382. "working_dir": validate_working_dir_uri,
  383. "py_modules": validate_py_modules_uris,
  384. }