tensorflow.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import os
  2. from typing import Dict, Optional, Union
  3. import numpy as np
  4. import tensorflow as tf
  5. from safetensors import numpy, safe_open
  6. def save(
  7. tensors: Dict[str, tf.Tensor], metadata: Optional[Dict[str, str]] = None
  8. ) -> bytes:
  9. """
  10. Saves a dictionary of tensors into raw bytes in safetensors format.
  11. Args:
  12. tensors (`Dict[str, tf.Tensor]`):
  13. The incoming tensors. Tensors need to be contiguous and dense.
  14. metadata (`Dict[str, str]`, *optional*, defaults to `None`):
  15. Optional text only metadata you might want to save in your header.
  16. For instance it can be useful to specify more about the underlying
  17. tensors. This is purely informative and does not affect tensor loading.
  18. Returns:
  19. `bytes`: The raw bytes representing the format
  20. Example:
  21. ```python
  22. from safetensors.tensorflow import save
  23. import tensorflow as tf
  24. tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
  25. byte_data = save(tensors)
  26. ```
  27. """
  28. np_tensors = _tf2np(tensors)
  29. return numpy.save(np_tensors, metadata=metadata)
  30. def save_file(
  31. tensors: Dict[str, tf.Tensor],
  32. filename: Union[str, os.PathLike],
  33. metadata: Optional[Dict[str, str]] = None,
  34. ) -> None:
  35. """
  36. Saves a dictionary of tensors into raw bytes in safetensors format.
  37. Args:
  38. tensors (`Dict[str, tf.Tensor]`):
  39. The incoming tensors. Tensors need to be contiguous and dense.
  40. filename (`str`, or `os.PathLike`)):
  41. The filename we're saving into.
  42. metadata (`Dict[str, str]`, *optional*, defaults to `None`):
  43. Optional text only metadata you might want to save in your header.
  44. For instance it can be useful to specify more about the underlying
  45. tensors. This is purely informative and does not affect tensor loading.
  46. Returns:
  47. `None`
  48. Example:
  49. ```python
  50. from safetensors.tensorflow import save_file
  51. import tensorflow as tf
  52. tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
  53. save_file(tensors, "model.safetensors")
  54. ```
  55. """
  56. np_tensors = _tf2np(tensors)
  57. return numpy.save_file(np_tensors, filename, metadata=metadata)
  58. def load(data: bytes) -> Dict[str, tf.Tensor]:
  59. """
  60. Loads a safetensors file into tensorflow format from pure bytes.
  61. Args:
  62. data (`bytes`):
  63. The content of a safetensors file
  64. Returns:
  65. `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` on cpu
  66. Example:
  67. ```python
  68. from safetensors.tensorflow import load
  69. file_path = "./my_folder/bert.safetensors"
  70. with open(file_path, "rb") as f:
  71. data = f.read()
  72. loaded = load(data)
  73. ```
  74. """
  75. flat = numpy.load(data)
  76. return _np2tf(flat)
  77. def load_file(filename: Union[str, os.PathLike]) -> Dict[str, tf.Tensor]:
  78. """
  79. Loads a safetensors file into tensorflow format.
  80. Args:
  81. filename (`str`, or `os.PathLike`)):
  82. The name of the file which contains the tensors
  83. Returns:
  84. `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor`
  85. Example:
  86. ```python
  87. from safetensors.tensorflow import load_file
  88. file_path = "./my_folder/bert.safetensors"
  89. loaded = load_file(file_path)
  90. ```
  91. """
  92. result = {}
  93. with safe_open(filename, framework="tf") as f:
  94. for k in f.offset_keys():
  95. result[k] = f.get_tensor(k)
  96. return result
  97. def _np2tf(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, tf.Tensor]:
  98. for k, v in numpy_dict.items():
  99. numpy_dict[k] = tf.convert_to_tensor(v)
  100. return numpy_dict
  101. def _tf2np(tf_dict: Dict[str, tf.Tensor]) -> Dict[str, np.array]:
  102. for k, v in tf_dict.items():
  103. tf_dict[k] = v.numpy()
  104. return tf_dict