label_utils.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. import json
  2. import re
  3. from typing import (
  4. Any,
  5. Dict,
  6. List,
  7. Optional,
  8. )
  9. import yaml
  10. import ray._private.ray_constants as ray_constants
  11. # Regex patterns used to validate that labels conform to Kubernetes label syntax rules.
  12. # https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
  13. # Regex for mandatory name (DNS label) or value
  14. # Examples:
  15. # Valid matches: "a", "label-name", "a-._b", "123", "this_is_a_valid_label"
  16. # Invalid matches: "-abc", "abc-", "my@label"
  17. LABEL_REGEX = re.compile(r"([a-zA-Z0-9]([a-zA-Z0-9_.-]{0,61}[a-zA-Z0-9])?)")
  18. # Regex for optional prefix (DNS subdomain)
  19. # Examples:
  20. # Valid matches: "abc", "sub.domain.example", "my-label", "123.456.789"
  21. # Invalid matches: "-abc", "prefix_", "sub..domain", sub.$$.example
  22. LABEL_PREFIX_REGEX = rf"^({LABEL_REGEX.pattern}?(\.{LABEL_REGEX.pattern}?)*)$"
  23. # Supported operators for label selector conditions. Not (!) conditions are handled separately.
  24. LABEL_OPERATORS = {"in"}
  25. # Create a pattern string dynamically based on the LABEL_OPERATORS
  26. OPERATOR_PATTERN = "|".join([re.escape(operator) for operator in LABEL_OPERATORS])
  27. # Regex to match valid label selector operators and values
  28. # Examples:
  29. # Valid matches: "spot", "!GPU", "213521", "in(A123, B456, C789)", "!in(spot, on-demand)", "valid-value"
  30. # Invalid matches: "-spot", "spot_", "in()", "in(spot,", "in(H100, TPU!GPU)", "!!!in(H100, TPU)"
  31. LABEL_SELECTOR_REGEX = re.compile(
  32. rf"^!?(?:{OPERATOR_PATTERN})?\({LABEL_REGEX.pattern}(?:, ?{LABEL_REGEX.pattern})*\)$|^!?{LABEL_REGEX.pattern}$"
  33. )
  34. def parse_node_labels_json(labels_json: str) -> Dict[str, str]:
  35. labels = json.loads(labels_json)
  36. if not isinstance(labels, dict):
  37. raise ValueError("The format after deserialization is not a key-value pair map")
  38. for key, value in labels.items():
  39. if not isinstance(key, str):
  40. raise ValueError("The key is not string type.")
  41. if not isinstance(value, str):
  42. raise ValueError(f'The value of the "{key}" is not string type')
  43. # Validate parsed custom node labels don't begin with ray.io prefix
  44. validate_node_labels(labels)
  45. return labels
  46. def parse_node_labels_string(labels_str: str) -> Dict[str, str]:
  47. labels = {}
  48. # Remove surrounding quotes if they exist
  49. if len(labels_str) > 1 and labels_str.startswith('"') and labels_str.endswith('"'):
  50. labels_str = labels_str[1:-1]
  51. if labels_str == "":
  52. return labels
  53. # Labels argument should consist of a string of key=value pairs
  54. # separated by commas. Labels follow Kubernetes label syntax.
  55. label_pairs = labels_str.split(",")
  56. for pair in label_pairs:
  57. # Split each pair by `=`
  58. key_value = pair.split("=")
  59. if len(key_value) != 2:
  60. raise ValueError("Label string is not a key-value pair.")
  61. key = key_value[0].strip()
  62. value = key_value[1].strip()
  63. labels[key] = value
  64. # Validate parsed node labels follow expected Kubernetes label syntax
  65. validate_node_label_syntax(labels)
  66. return labels
  67. def parse_node_labels_from_yaml_file(path: str) -> Dict[str, str]:
  68. if path == "":
  69. return {}
  70. with open(path, "r") as file:
  71. # Expects valid YAML content
  72. labels = yaml.safe_load(file)
  73. if not isinstance(labels, dict):
  74. raise ValueError(
  75. "The format after deserialization is not a key-value pair map."
  76. )
  77. for key, value in labels.items():
  78. if not isinstance(key, str):
  79. raise ValueError("The key is not string type.")
  80. if not isinstance(value, str):
  81. raise ValueError(f'The value of "{key}" is not string type.')
  82. # Validate parsed node labels follow expected Kubernetes label syntax
  83. validate_node_label_syntax(labels)
  84. return labels
  85. # TODO (ryanaoleary@): This function will be removed after the migration to the label
  86. # selector API from NodeLabelSchedulingPolicy is complete.
  87. def validate_node_labels(labels: Dict[str, str]):
  88. if labels is None:
  89. return
  90. for key in labels.keys():
  91. if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX):
  92. raise ValueError(
  93. f"Custom label keys `{key}` cannot start with the prefix "
  94. f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. "
  95. f"This is reserved for Ray defined labels."
  96. )
  97. def validate_label_key(key: str) -> Optional[str]:
  98. if "/" in key:
  99. prefix, name = key.rsplit("/", 1)
  100. if len(prefix) > 253 or not re.fullmatch(LABEL_PREFIX_REGEX, prefix):
  101. return str(
  102. f"Invalid label key prefix `{prefix}`. Prefix must be a series of DNS labels "
  103. f"separated by dots (.), not longer than 253 characters in total."
  104. )
  105. else:
  106. name = key
  107. if len(name) > 63 or not re.fullmatch(LABEL_REGEX, name):
  108. return str(
  109. f"Invalid label key name `{name}`. Name must be 63 chars or less beginning and ending "
  110. f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
  111. f"dots (.), and alphanumerics between."
  112. )
  113. return None
  114. def validate_label_value(value: str):
  115. if value == "":
  116. return
  117. if len(value) > 63 or not re.fullmatch(LABEL_REGEX, value):
  118. raise ValueError(
  119. f"Invalid label key value `{value}`. Value must be 63 chars or less beginning and ending "
  120. f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
  121. f"dots (.), and alphanumerics between."
  122. )
  123. def validate_label_selector(label_selector: Optional[Dict[str, str]]) -> Optional[str]:
  124. if label_selector is None:
  125. return None
  126. for key, value in label_selector.items():
  127. possible_error_message = validate_label_key(key)
  128. if possible_error_message:
  129. return possible_error_message
  130. if value is not None:
  131. possible_error_message = validate_label_selector_value(value)
  132. if possible_error_message:
  133. return possible_error_message
  134. return None
  135. def validate_label_selector_value(selector: str) -> Optional[str]:
  136. if selector == "":
  137. return None
  138. if not re.fullmatch(LABEL_SELECTOR_REGEX, selector):
  139. return str(
  140. f"Invalid label selector value `{selector}`. The label selector value should contain optional operators and a label value. Supported operators are: ! and {LABEL_OPERATORS}. "
  141. f"Value must be 63 chars or less beginning and ending "
  142. f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
  143. f"dots (.), and alphanumerics between."
  144. )
  145. return None
  146. # TODO (ryanaoleary@): This function will replace `validate_node_labels` after
  147. # the migration from NodeLabelSchedulingPolicy to the Label Selector API is complete.
  148. def validate_node_label_syntax(labels: Dict[str, str]):
  149. if labels is None:
  150. return
  151. for key, value in labels.items():
  152. possible_error_message = validate_label_key(key)
  153. if possible_error_message:
  154. raise ValueError(possible_error_message)
  155. if value is not None:
  156. validate_label_value(value)
  157. def validate_fallback_strategy(
  158. fallback_strategy: Optional[List[Dict[str, Any]]]
  159. ) -> Optional[str]:
  160. if fallback_strategy is None:
  161. return None
  162. # Supported options for `fallback_strategy` scheduling.
  163. supported_options = {"label_selector"}
  164. for strategy in fallback_strategy:
  165. if not isinstance(strategy, dict):
  166. return "Each element in fallback_strategy must be a dictionary."
  167. if not strategy:
  168. return "Empty dictionary found in `fallback_strategy`."
  169. # Validate `fallback_strategy` only contains supported options.
  170. for option in strategy:
  171. if option not in supported_options:
  172. return (
  173. f"Unsupported option found: '{option}'. "
  174. f"Only {list(supported_options)} is currently supported."
  175. )
  176. # Validate the 'label_selector' dictionary.
  177. label_selector = strategy.get("label_selector")
  178. if label_selector:
  179. if not isinstance(label_selector, dict):
  180. return 'The value of "label_selector" must be a dictionary.'
  181. error_message = validate_label_selector(label_selector)
  182. if error_message:
  183. return error_message
  184. return None