_base.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. """
  2. .. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
  3. """
  4. import abc
  5. import os
  6. import re
  7. import sys
  8. from collections.abc import Sequence
  9. from typing import Final, Optional
  10. from ._common import normalize_platform, unprintable_ascii_chars
  11. from ._const import DEFAULT_MIN_LEN, Platform
  12. from ._types import PathType, PlatformType
  13. from .error import ReservedNameError, ValidationError
  14. from .handler import NullValueHandler, ReservedNameHandler, ValidationErrorHandler
  15. class BaseFile:
  16. _INVALID_PATH_CHARS: Final[str] = "".join(unprintable_ascii_chars)
  17. _INVALID_FILENAME_CHARS: Final[str] = _INVALID_PATH_CHARS + "/"
  18. _INVALID_WIN_PATH_CHARS: Final[str] = _INVALID_PATH_CHARS + ':*?"<>|\t\n\r\x0b\x0c'
  19. _INVALID_WIN_FILENAME_CHARS: Final[str] = (
  20. _INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\"
  21. )
  22. @property
  23. def platform(self) -> Platform:
  24. return self.__platform
  25. @property
  26. def reserved_keywords(self) -> tuple[str, ...]:
  27. return self._additional_reserved_names
  28. @property
  29. def max_len(self) -> int:
  30. return self._max_len
  31. def __init__(
  32. self,
  33. max_len: int,
  34. fs_encoding: Optional[str],
  35. additional_reserved_names: Optional[Sequence[str]] = None,
  36. platform_max_len: Optional[int] = None,
  37. platform: Optional[PlatformType] = None,
  38. ) -> None:
  39. if additional_reserved_names is None:
  40. additional_reserved_names = tuple()
  41. self._additional_reserved_names = tuple(n.upper() for n in additional_reserved_names)
  42. self.__platform = normalize_platform(platform)
  43. if platform_max_len is None:
  44. platform_max_len = self._get_default_max_path_len()
  45. if max_len <= 0:
  46. self._max_len = platform_max_len
  47. else:
  48. self._max_len = max_len
  49. self._max_len = min(self._max_len, platform_max_len)
  50. if fs_encoding:
  51. self._fs_encoding = fs_encoding
  52. else:
  53. self._fs_encoding = sys.getfilesystemencoding()
  54. def _is_posix(self) -> bool:
  55. return self.platform == Platform.POSIX
  56. def _is_universal(self) -> bool:
  57. return self.platform == Platform.UNIVERSAL
  58. def _is_linux(self, include_universal: bool = False) -> bool:
  59. if include_universal:
  60. return self.platform in (Platform.UNIVERSAL, Platform.LINUX)
  61. return self.platform == Platform.LINUX
  62. def _is_windows(self, include_universal: bool = False) -> bool:
  63. if include_universal:
  64. return self.platform in (Platform.UNIVERSAL, Platform.WINDOWS)
  65. return self.platform == Platform.WINDOWS
  66. def _is_macos(self, include_universal: bool = False) -> bool:
  67. if include_universal:
  68. return self.platform in (Platform.UNIVERSAL, Platform.MACOS)
  69. return self.platform == Platform.MACOS
  70. def _get_default_max_path_len(self) -> int:
  71. if self._is_linux():
  72. return 4096
  73. if self._is_windows():
  74. return 260
  75. if self._is_posix() or self._is_macos():
  76. return 1024
  77. return 260 # universal
  78. class AbstractValidator(BaseFile, metaclass=abc.ABCMeta):
  79. def __init__(
  80. self,
  81. max_len: int,
  82. fs_encoding: Optional[str],
  83. check_reserved: bool,
  84. additional_reserved_names: Optional[Sequence[str]] = None,
  85. platform_max_len: Optional[int] = None,
  86. platform: Optional[PlatformType] = None,
  87. ) -> None:
  88. self._check_reserved = check_reserved
  89. super().__init__(
  90. max_len,
  91. fs_encoding,
  92. additional_reserved_names=additional_reserved_names,
  93. platform_max_len=platform_max_len,
  94. platform=platform,
  95. )
  96. @property
  97. @abc.abstractmethod
  98. def min_len(self) -> int: # pragma: no cover
  99. pass
  100. @abc.abstractmethod
  101. def validate(self, value: PathType) -> None: # pragma: no cover
  102. pass
  103. def is_valid(self, value: PathType) -> bool:
  104. try:
  105. self.validate(value)
  106. except (TypeError, ValidationError):
  107. return False
  108. return True
  109. def _is_reserved_keyword(self, value: str) -> bool:
  110. return value.upper() in self.reserved_keywords
  111. class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta):
  112. def __init__(
  113. self,
  114. validator: AbstractValidator,
  115. max_len: int,
  116. fs_encoding: Optional[str],
  117. validate_after_sanitize: bool,
  118. null_value_handler: Optional[ValidationErrorHandler] = None,
  119. reserved_name_handler: Optional[ValidationErrorHandler] = None,
  120. additional_reserved_names: Optional[Sequence[str]] = None,
  121. platform_max_len: Optional[int] = None,
  122. platform: Optional[PlatformType] = None,
  123. ) -> None:
  124. super().__init__(
  125. max_len=max_len,
  126. fs_encoding=fs_encoding,
  127. additional_reserved_names=additional_reserved_names,
  128. platform_max_len=platform_max_len,
  129. platform=platform,
  130. )
  131. if null_value_handler is None:
  132. null_value_handler = NullValueHandler.return_null_string
  133. self._null_value_handler = null_value_handler
  134. if reserved_name_handler is None:
  135. reserved_name_handler = ReservedNameHandler.add_trailing_underscore
  136. self._reserved_name_handler = reserved_name_handler
  137. self._validate_after_sanitize = validate_after_sanitize
  138. self._validator = validator
  139. @abc.abstractmethod
  140. def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: # pragma: no cover
  141. pass
  142. class BaseValidator(AbstractValidator):
  143. __RE_ROOT_NAME: Final = re.compile(r"([^\.]+)")
  144. __RE_REPEAD_DOT: Final = re.compile(r"^\.{3,}")
  145. @property
  146. def min_len(self) -> int:
  147. return self._min_len
  148. def __init__(
  149. self,
  150. min_len: int,
  151. max_len: int,
  152. fs_encoding: Optional[str],
  153. check_reserved: bool,
  154. additional_reserved_names: Optional[Sequence[str]] = None,
  155. platform_max_len: Optional[int] = None,
  156. platform: Optional[PlatformType] = None,
  157. ) -> None:
  158. if min_len <= 0:
  159. min_len = DEFAULT_MIN_LEN
  160. self._min_len = max(min_len, 1)
  161. super().__init__(
  162. max_len=max_len,
  163. fs_encoding=fs_encoding,
  164. check_reserved=check_reserved,
  165. additional_reserved_names=additional_reserved_names,
  166. platform_max_len=platform_max_len,
  167. platform=platform,
  168. )
  169. self._validate_max_len()
  170. def _validate_reserved_keywords(self, name: str) -> None:
  171. if not self._check_reserved:
  172. return
  173. root_name = self.__extract_root_name(name)
  174. base_name = os.path.basename(name)
  175. for name in (root_name, base_name):
  176. if self._is_reserved_keyword(name):
  177. raise ReservedNameError(
  178. f"'{root_name}' is a reserved name",
  179. reusable_name=False,
  180. reserved_name=root_name,
  181. platform=self.platform,
  182. )
  183. def _validate_max_len(self) -> None:
  184. if self.max_len < 1:
  185. raise ValueError("max_len must be greater or equal to one")
  186. if self.min_len > self.max_len:
  187. raise ValueError("min_len must be lower than max_len")
  188. @classmethod
  189. def __extract_root_name(cls, path: str) -> str:
  190. if path in (".", ".."):
  191. return path
  192. if cls.__RE_REPEAD_DOT.search(path):
  193. return path
  194. match = cls.__RE_ROOT_NAME.match(os.path.basename(path))
  195. if match is None:
  196. return ""
  197. return match.group(1)