misc.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. from typing import Any, Optional, Tuple
  2. import numpy as np
  3. from ray.rllib.utils.annotations import DeveloperAPI
  4. from ray.rllib.utils.framework import try_import_tf
  5. from ray.rllib.utils.typing import TensorType
  6. tf1, tf, tfv = try_import_tf()
  7. # TODO: (sven) obsolete this class.
  8. @DeveloperAPI
  9. def normc_initializer(std: float = 1.0) -> Any:
  10. def _initializer(shape, dtype=None, partition_info=None):
  11. out = np.random.randn(*shape).astype(
  12. dtype.name if hasattr(dtype, "name") else dtype or np.float32
  13. )
  14. out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
  15. return tf.constant(out)
  16. return _initializer
  17. @DeveloperAPI
  18. def conv2d(
  19. x: TensorType,
  20. num_filters: int,
  21. name: str,
  22. filter_size: Tuple[int, int] = (3, 3),
  23. stride: Tuple[int, int] = (1, 1),
  24. pad: str = "SAME",
  25. dtype: Optional[Any] = None,
  26. collections: Optional[Any] = None,
  27. ) -> TensorType:
  28. if dtype is None:
  29. dtype = tf.float32
  30. with tf1.variable_scope(name):
  31. stride_shape = [1, stride[0], stride[1], 1]
  32. filter_shape = [
  33. filter_size[0],
  34. filter_size[1],
  35. int(x.get_shape()[3]),
  36. num_filters,
  37. ]
  38. # There are "num input feature maps * filter height * filter width"
  39. # inputs to each hidden unit.
  40. fan_in = np.prod(filter_shape[:3])
  41. # Each unit in the lower layer receives a gradient from: "num output
  42. # feature maps * filter height * filter width" / pooling size.
  43. fan_out = np.prod(filter_shape[:2]) * num_filters
  44. # Initialize weights with random weights.
  45. w_bound = np.sqrt(6 / (fan_in + fan_out))
  46. w = tf1.get_variable(
  47. "W",
  48. filter_shape,
  49. dtype,
  50. tf1.random_uniform_initializer(-w_bound, w_bound),
  51. collections=collections,
  52. )
  53. b = tf1.get_variable(
  54. "b",
  55. [1, 1, 1, num_filters],
  56. initializer=tf1.constant_initializer(0.0),
  57. collections=collections,
  58. )
  59. return tf1.nn.conv2d(x, w, stride_shape, pad) + b
  60. @DeveloperAPI
  61. def linear(
  62. x: TensorType,
  63. size: int,
  64. name: str,
  65. initializer: Optional[Any] = None,
  66. bias_init: float = 0.0,
  67. ) -> TensorType:
  68. w = tf1.get_variable(name + "/w", [x.get_shape()[1], size], initializer=initializer)
  69. b = tf1.get_variable(
  70. name + "/b", [size], initializer=tf1.constant_initializer(bias_init)
  71. )
  72. return tf.matmul(x, w) + b
  73. @DeveloperAPI
  74. def flatten(x: TensorType) -> TensorType:
  75. return tf.reshape(x, [-1, np.prod(x.get_shape().as_list()[1:])])