numpy.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. from collections import OrderedDict
  2. from types import MappingProxyType
  3. from typing import List, Optional
  4. import numpy as np
  5. import tree # pip install dm_tree
  6. from gymnasium.spaces import Discrete, MultiDiscrete
  7. from ray._common.deprecation import Deprecated
  8. from ray.rllib.utils.annotations import PublicAPI
  9. from ray.rllib.utils.framework import try_import_tf, try_import_torch
  10. from ray.rllib.utils.typing import SpaceStruct, TensorStructType, TensorType, Union
  11. tf1, tf, tfv = try_import_tf()
  12. torch, _ = try_import_torch()
  13. SMALL_NUMBER = 1e-6
  14. # Some large int number. May be increased here, if needed.
  15. LARGE_INTEGER = 100000000
  16. # Min and Max outputs (clipped) from an NN-output layer interpreted as the
  17. # log(x) of some x (e.g. a stddev of a normal
  18. # distribution).
  19. MIN_LOG_NN_OUTPUT = -5
  20. MAX_LOG_NN_OUTPUT = 2
  21. @PublicAPI
  22. @Deprecated(
  23. help="RLlib itself has no use for this anymore.",
  24. error=False,
  25. )
  26. def aligned_array(size: int, dtype, align: int = 64) -> np.ndarray:
  27. """Returns an array of a given size that is 64-byte aligned.
  28. The returned array can be efficiently copied into GPU memory by TensorFlow.
  29. Args:
  30. size: The size (total number of items) of the array. For example,
  31. array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
  32. dtype: The numpy dtype of the array.
  33. align: The alignment to use.
  34. Returns:
  35. A np.ndarray with the given specifications.
  36. """
  37. n = size * dtype.itemsize
  38. empty = np.empty(n + (align - 1), dtype=np.uint8)
  39. data_align = empty.ctypes.data % align
  40. offset = 0 if data_align == 0 else (align - data_align)
  41. if n == 0:
  42. # stop np from optimising out empty slice reference
  43. output = empty[offset : offset + 1][0:0].view(dtype)
  44. else:
  45. output = empty[offset : offset + n].view(dtype)
  46. assert len(output) == size, len(output)
  47. assert output.ctypes.data % align == 0, output.ctypes.data
  48. return output
  49. @PublicAPI
  50. @Deprecated(
  51. help="RLlib itself has no use for this anymore.",
  52. error=False,
  53. )
  54. def concat_aligned(
  55. items: List[np.ndarray], time_major: Optional[bool] = None
  56. ) -> np.ndarray:
  57. """Concatenate arrays, ensuring the output is 64-byte aligned.
  58. We only align float arrays; other arrays are concatenated as normal.
  59. This should be used instead of np.concatenate() to improve performance
  60. when the output array is likely to be fed into TensorFlow.
  61. Args:
  62. items: The list of items to concatenate and align.
  63. time_major: Whether the data in items is time-major, in which
  64. case, we will concatenate along axis=1.
  65. Returns:
  66. The concat'd and aligned array.
  67. """
  68. if len(items) == 0:
  69. return []
  70. elif len(items) == 1:
  71. # we assume the input is aligned. In any case, it doesn't help
  72. # performance to force align it since that incurs a needless copy.
  73. return items[0]
  74. elif isinstance(items[0], np.ndarray) and items[0].dtype in [
  75. np.float32,
  76. np.float64,
  77. np.uint8,
  78. ]:
  79. dtype = items[0].dtype
  80. flat = aligned_array(sum(s.size for s in items), dtype)
  81. if time_major is not None:
  82. if time_major is True:
  83. batch_dim = sum(s.shape[1] for s in items)
  84. new_shape = (items[0].shape[0], batch_dim,) + items[
  85. 0
  86. ].shape[2:]
  87. else:
  88. batch_dim = sum(s.shape[0] for s in items)
  89. new_shape = (batch_dim, items[0].shape[1],) + items[
  90. 0
  91. ].shape[2:]
  92. else:
  93. batch_dim = sum(s.shape[0] for s in items)
  94. new_shape = (batch_dim,) + items[0].shape[1:]
  95. output = flat.reshape(new_shape)
  96. assert output.ctypes.data % 64 == 0, output.ctypes.data
  97. np.concatenate(items, out=output, axis=1 if time_major else 0)
  98. return output
  99. else:
  100. return np.concatenate(items, axis=1 if time_major else 0)
  101. @PublicAPI
  102. def convert_to_numpy(x: TensorStructType, reduce_type: bool = True) -> TensorStructType:
  103. """Converts values in `stats` to non-Tensor numpy or python types.
  104. Args:
  105. x: Any (possibly nested) struct, the values in which will be
  106. converted and returned as a new struct with all torch/tf tensors
  107. being converted to numpy types.
  108. reduce_type: Whether to automatically reduce all float64 and int64 data
  109. into float32 and int32 data, respectively.
  110. Returns:
  111. A new struct with the same structure as `x`, but with all
  112. values converted to numpy arrays (on CPU).
  113. """
  114. # The mapping function used to numpyize torch/tf Tensors (and move them
  115. # to the CPU beforehand).
  116. def mapping(item):
  117. if torch and isinstance(item, torch.Tensor):
  118. ret = (
  119. item.cpu().item()
  120. if len(item.size()) == 0
  121. else item.detach().cpu().numpy()
  122. )
  123. elif (
  124. tf and isinstance(item, (tf.Tensor, tf.Variable)) and hasattr(item, "numpy")
  125. ):
  126. assert tf.executing_eagerly()
  127. ret = item.numpy()
  128. else:
  129. ret = item
  130. if reduce_type and isinstance(ret, np.ndarray):
  131. if np.issubdtype(ret.dtype, np.floating):
  132. ret = ret.astype(np.float32)
  133. elif np.issubdtype(ret.dtype, int):
  134. ret = ret.astype(np.int32)
  135. return ret
  136. return tree.map_structure(mapping, x)
  137. @PublicAPI
  138. def fc(
  139. x: np.ndarray,
  140. weights: np.ndarray,
  141. biases: Optional[np.ndarray] = None,
  142. framework: Optional[str] = None,
  143. ) -> np.ndarray:
  144. """Calculates FC (dense) layer outputs given weights/biases and input.
  145. Args:
  146. x: The input to the dense layer.
  147. weights: The weights matrix.
  148. biases: The biases vector. All 0s if None.
  149. framework: An optional framework hint (to figure out,
  150. e.g. whether to transpose torch weight matrices).
  151. Returns:
  152. The dense layer's output.
  153. """
  154. def map_(data, transpose=False):
  155. if torch:
  156. if isinstance(data, torch.Tensor):
  157. data = data.cpu().detach().numpy()
  158. if tf and tf.executing_eagerly():
  159. if isinstance(data, tf.Variable):
  160. data = data.numpy()
  161. if transpose:
  162. data = np.transpose(data)
  163. return data
  164. x = map_(x)
  165. # Torch stores matrices in transpose (faster for backprop).
  166. transpose = framework == "torch" and (
  167. x.shape[1] != weights.shape[0] and x.shape[1] == weights.shape[1]
  168. )
  169. weights = map_(weights, transpose=transpose)
  170. biases = map_(biases)
  171. return np.matmul(x, weights) + (0.0 if biases is None else biases)
  172. @PublicAPI
  173. def flatten_inputs_to_1d_tensor(
  174. inputs: TensorStructType,
  175. spaces_struct: Optional[SpaceStruct] = None,
  176. time_axis: bool = False,
  177. batch_axis: bool = True,
  178. ) -> TensorType:
  179. """Flattens arbitrary input structs according to the given spaces struct.
  180. Returns a single 1D tensor resulting from the different input
  181. components' values.
  182. Thereby:
  183. - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
  184. are not treated differently from other types of Boxes and get
  185. flattened as well.
  186. - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
  187. Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
  188. - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
  189. [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
  190. [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1]].
  191. Args:
  192. inputs: The inputs to be flattened.
  193. spaces_struct: The (possibly nested) structure of the spaces that `inputs`
  194. belongs to.
  195. time_axis: Whether all inputs have a time-axis (after the batch axis).
  196. If True, will keep not only the batch axis (0th), but the time axis
  197. (1st) as-is and flatten everything from the 2nd axis up.
  198. batch_axis: Whether all inputs have a batch axis.
  199. If True, will keep that batch axis as-is and flatten everything from the
  200. other dims up.
  201. Returns:
  202. A single 1D tensor resulting from concatenating all
  203. flattened/one-hot'd input components. Depending on the time_axis flag,
  204. the shape is (B, n) or (B, T, n).
  205. .. testcode::
  206. :skipif: True
  207. # B=2
  208. from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor
  209. from gymnasium.spaces import Discrete, Box
  210. out = flatten_inputs_to_1d_tensor(
  211. {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
  212. spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
  213. )
  214. print(out)
  215. # B=2; T=2
  216. out = flatten_inputs_to_1d_tensor(
  217. ([[1, 0], [0, 1]],
  218. [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
  219. spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
  220. time_axis=True
  221. )
  222. print(out)
  223. .. testoutput::
  224. [[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]] # B=2 n=4
  225. [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
  226. [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]] # B=2 T=2 n=4
  227. """
  228. # `time_axis` must not be True if `batch_axis` is False.
  229. assert not (time_axis and not batch_axis)
  230. flat_inputs = tree.flatten(inputs)
  231. flat_spaces = (
  232. tree.flatten(spaces_struct)
  233. if spaces_struct is not None
  234. else [None] * len(flat_inputs)
  235. )
  236. B = None
  237. T = None
  238. out = []
  239. for input_, space in zip(flat_inputs, flat_spaces):
  240. # Store batch and (if applicable) time dimension.
  241. if B is None and batch_axis:
  242. B = input_.shape[0]
  243. if time_axis:
  244. T = input_.shape[1]
  245. # One-hot encoding.
  246. if isinstance(space, Discrete):
  247. if time_axis:
  248. input_ = np.reshape(input_, [B * T])
  249. out.append(one_hot(input_, depth=space.n).astype(np.float32))
  250. # Multi one-hot encoding.
  251. elif isinstance(space, MultiDiscrete):
  252. if time_axis:
  253. input_ = np.reshape(input_, [B * T, -1])
  254. if batch_axis:
  255. out.append(
  256. np.concatenate(
  257. [
  258. one_hot(input_[:, i], depth=n).astype(np.float32)
  259. for i, n in enumerate(space.nvec)
  260. ],
  261. axis=-1,
  262. )
  263. )
  264. else:
  265. out.append(
  266. np.concatenate(
  267. [
  268. one_hot(input_[i], depth=n).astype(np.float32)
  269. for i, n in enumerate(space.nvec)
  270. ],
  271. axis=-1,
  272. )
  273. )
  274. # Box: Flatten.
  275. else:
  276. # Special case for spaces: Box(.., shape=(), ..)
  277. if isinstance(input_, float):
  278. input_ = np.array([input_])
  279. if time_axis:
  280. input_ = np.reshape(input_, [B * T, -1])
  281. elif batch_axis:
  282. input_ = np.reshape(input_, [B, -1])
  283. else:
  284. input_ = np.reshape(input_, [-1])
  285. out.append(input_.astype(np.float32))
  286. merged = np.concatenate(out, axis=-1)
  287. # Restore the time-dimension, if applicable.
  288. if time_axis:
  289. merged = np.reshape(merged, [B, T, -1])
  290. return merged
  291. @PublicAPI
  292. def make_action_immutable(obj):
  293. """Flags actions immutable to notify users when trying to change them.
  294. Can also be used with any tree-like structure containing either
  295. dictionaries, numpy arrays or already immutable objects per se.
  296. Note, however that `tree.map_structure()` will in general not
  297. include the shallow object containing all others and therefore
  298. immutability will hold only for all objects contained in it.
  299. Use `tree.traverse(fun, action, top_down=False)` to include
  300. also the containing object.
  301. Args:
  302. obj: The object to be made immutable.
  303. Returns:
  304. The immutable object.
  305. .. testcode::
  306. :skipif: True
  307. import tree
  308. import numpy as np
  309. from ray.rllib.utils.numpy import make_action_immutable
  310. arr = np.arange(1,10)
  311. d = dict(a = 1, b = (arr, arr))
  312. tree.traverse(make_action_immutable, d, top_down=False)
  313. """
  314. if isinstance(obj, np.ndarray):
  315. obj.setflags(write=False)
  316. return obj
  317. elif isinstance(obj, OrderedDict):
  318. return MappingProxyType(dict(obj))
  319. elif isinstance(obj, dict):
  320. return MappingProxyType(obj)
  321. else:
  322. return obj
  323. @PublicAPI
  324. def huber_loss(x: np.ndarray, delta: float = 1.0) -> np.ndarray:
  325. """Reference: https://en.wikipedia.org/wiki/Huber_loss."""
  326. return np.where(
  327. np.abs(x) < delta, np.power(x, 2.0) * 0.5, delta * (np.abs(x) - 0.5 * delta)
  328. )
  329. @PublicAPI
  330. def l2_loss(x: np.ndarray) -> np.ndarray:
  331. """Computes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.
  332. Args:
  333. x: The input tensor.
  334. Returns:
  335. The l2-loss output according to the above formula given `x`.
  336. """
  337. return np.sum(np.square(x)) / 2.0
  338. @PublicAPI
  339. def lstm(
  340. x,
  341. weights: np.ndarray,
  342. biases: Optional[np.ndarray] = None,
  343. initial_internal_states: Optional[np.ndarray] = None,
  344. time_major: bool = False,
  345. forget_bias: float = 1.0,
  346. ):
  347. """Calculates LSTM layer output given weights/biases, states, and input.
  348. Args:
  349. x: The inputs to the LSTM layer including time-rank
  350. (0th if time-major, else 1st) and the batch-rank
  351. (1st if time-major, else 0th).
  352. weights: The weights matrix.
  353. biases: The biases vector. All 0s if None.
  354. initial_internal_states: The initial internal
  355. states to pass into the layer. All 0s if None.
  356. time_major: Whether to use time-major or not. Default: False.
  357. forget_bias: Gets added to first sigmoid (forget gate) output.
  358. Default: 1.0.
  359. Returns:
  360. Tuple consisting of 1) The LSTM layer's output and
  361. 2) Tuple: Last (c-state, h-state).
  362. """
  363. sequence_length = x.shape[0 if time_major else 1]
  364. batch_size = x.shape[1 if time_major else 0]
  365. units = weights.shape[1] // 4 # 4 internal layers (3x sigmoid, 1x tanh)
  366. if initial_internal_states is None:
  367. c_states = np.zeros(shape=(batch_size, units))
  368. h_states = np.zeros(shape=(batch_size, units))
  369. else:
  370. c_states = initial_internal_states[0]
  371. h_states = initial_internal_states[1]
  372. # Create a placeholder for all n-time step outputs.
  373. if time_major:
  374. unrolled_outputs = np.zeros(shape=(sequence_length, batch_size, units))
  375. else:
  376. unrolled_outputs = np.zeros(shape=(batch_size, sequence_length, units))
  377. # Push the batch 4 times through the LSTM cell and capture the outputs plus
  378. # the final h- and c-states.
  379. for t in range(sequence_length):
  380. input_matrix = x[t, :, :] if time_major else x[:, t, :]
  381. input_matrix = np.concatenate((input_matrix, h_states), axis=1)
  382. input_matmul_matrix = np.matmul(input_matrix, weights) + biases
  383. # Forget gate (3rd slot in tf output matrix). Add static forget bias.
  384. sigmoid_1 = sigmoid(input_matmul_matrix[:, units * 2 : units * 3] + forget_bias)
  385. c_states = np.multiply(c_states, sigmoid_1)
  386. # Add gate (1st and 2nd slots in tf output matrix).
  387. sigmoid_2 = sigmoid(input_matmul_matrix[:, 0:units])
  388. tanh_3 = np.tanh(input_matmul_matrix[:, units : units * 2])
  389. c_states = np.add(c_states, np.multiply(sigmoid_2, tanh_3))
  390. # Output gate (last slot in tf output matrix).
  391. sigmoid_4 = sigmoid(input_matmul_matrix[:, units * 3 : units * 4])
  392. h_states = np.multiply(sigmoid_4, np.tanh(c_states))
  393. # Store this output time-slice.
  394. if time_major:
  395. unrolled_outputs[t, :, :] = h_states
  396. else:
  397. unrolled_outputs[:, t, :] = h_states
  398. return unrolled_outputs, (c_states, h_states)
  399. @PublicAPI
  400. def one_hot(
  401. x: Union[TensorType, int],
  402. depth: int = 0,
  403. on_value: float = 1.0,
  404. off_value: float = 0.0,
  405. dtype: type = np.float32,
  406. ) -> np.ndarray:
  407. """One-hot utility function for numpy.
  408. Thanks to qianyizhang:
  409. https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.
  410. Args:
  411. x: The input to be one-hot encoded.
  412. depth: The max. number to be one-hot encoded (size of last rank).
  413. on_value: The value to use for on. Default: 1.0.
  414. off_value: The value to use for off. Default: 0.0.
  415. Returns:
  416. The one-hot encoded equivalent of the input array.
  417. """
  418. # Handle simple ints properly.
  419. if isinstance(x, int):
  420. x = np.array(x, dtype=np.int32)
  421. # Handle torch arrays properly.
  422. elif torch and isinstance(x, torch.Tensor):
  423. x = x.numpy()
  424. # Handle bool arrays correctly.
  425. if x.dtype == np.bool_:
  426. x = x.astype(np.int_)
  427. depth = 2
  428. # If depth is not given, try to infer it from the values in the array.
  429. if depth == 0:
  430. depth = np.max(x) + 1
  431. assert (
  432. np.max(x) < depth
  433. ), "ERROR: The max. index of `x` ({}) is larger than depth ({})!".format(
  434. np.max(x), depth
  435. )
  436. shape = x.shape
  437. out = np.ones(shape=(*shape, depth)) * off_value
  438. indices = []
  439. for i in range(x.ndim):
  440. tiles = [1] * x.ndim
  441. s = [1] * x.ndim
  442. s[i] = -1
  443. r = np.arange(shape[i]).reshape(s)
  444. if i > 0:
  445. tiles[i - 1] = shape[i - 1]
  446. r = np.tile(r, tiles)
  447. indices.append(r)
  448. indices.append(x)
  449. out[tuple(indices)] = on_value
  450. return out.astype(dtype)
  451. @PublicAPI
  452. def one_hot_multidiscrete(x, depths=List[int]):
  453. # Handle torch arrays properly.
  454. if torch and isinstance(x, torch.Tensor):
  455. x = x.numpy()
  456. shape = x.shape
  457. return np.concatenate(
  458. [
  459. one_hot(x[i] if len(shape) == 1 else x[:, i], depth=n).astype(np.float32)
  460. for i, n in enumerate(depths)
  461. ],
  462. axis=-1,
  463. )
  464. @PublicAPI
  465. def relu(x: np.ndarray, alpha: float = 0.0) -> np.ndarray:
  466. """Implementation of the leaky ReLU function.
  467. y = x * alpha if x < 0 else x
  468. Args:
  469. x: The input values.
  470. alpha: A scaling ("leak") factor to use for negative x.
  471. Returns:
  472. The leaky ReLU output for x.
  473. """
  474. return np.maximum(x, x * alpha, x)
  475. @PublicAPI
  476. def sigmoid(x: np.ndarray, derivative: bool = False) -> np.ndarray:
  477. """
  478. Returns the sigmoid function applied to x.
  479. Alternatively, can return the derivative or the sigmoid function.
  480. Args:
  481. x: The input to the sigmoid function.
  482. derivative: Whether to return the derivative or not.
  483. Default: False.
  484. Returns:
  485. The sigmoid function (or its derivative) applied to x.
  486. """
  487. if derivative:
  488. return x * (1 - x)
  489. else:
  490. return 1 / (1 + np.exp(-x))
  491. @PublicAPI
  492. def softmax(
  493. x: Union[np.ndarray, list], axis: int = -1, epsilon: Optional[float] = None
  494. ) -> np.ndarray:
  495. """Returns the softmax values for x.
  496. The exact formula used is:
  497. S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.
  498. Args:
  499. x: The input to the softmax function.
  500. axis: The axis along which to softmax.
  501. epsilon: Optional epsilon as a minimum value. If None, use
  502. `SMALL_NUMBER`.
  503. Returns:
  504. The softmax over x.
  505. """
  506. epsilon = epsilon or SMALL_NUMBER
  507. # x_exp = np.maximum(np.exp(x), SMALL_NUMBER)
  508. x_exp = np.exp(x)
  509. # return x_exp /
  510. # np.maximum(np.sum(x_exp, axis, keepdims=True), SMALL_NUMBER)
  511. return np.maximum(x_exp / np.sum(x_exp, axis, keepdims=True), epsilon)