_symbol.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. .. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
  3. """
  4. import re
  5. from collections.abc import Sequence
  6. from typing import Final
  7. from ._common import ascii_symbols, to_str, unprintable_ascii_chars
  8. from .error import InvalidCharError
  9. __RE_SYMBOL: Final = re.compile(
  10. "[{}]".format(re.escape("".join(ascii_symbols + unprintable_ascii_chars))), re.UNICODE
  11. )
  12. def validate_symbol(text: str) -> None:
  13. """
  14. Verifying whether symbol(s) included in the ``text`` or not.
  15. Args:
  16. text:
  17. Input text to validate.
  18. Raises:
  19. ValidationError (ErrorReason.INVALID_CHARACTER):
  20. If symbol(s) included in the ``text``.
  21. """
  22. match_list = __RE_SYMBOL.findall(to_str(text))
  23. if match_list:
  24. raise InvalidCharError(f"invalid symbols found: {match_list}")
  25. def replace_symbol(
  26. text: str,
  27. replacement_text: str = "",
  28. exclude_symbols: Sequence[str] = [],
  29. is_replace_consecutive_chars: bool = False,
  30. is_strip: bool = False,
  31. ) -> str:
  32. """
  33. Replace all of the symbols in the ``text``.
  34. Args:
  35. text:
  36. Input text.
  37. replacement_text:
  38. Replacement text.
  39. exclude_symbols:
  40. Symbols that were excluded from the replacement.
  41. is_replace_consecutive_chars:
  42. If |True|, replace consecutive multiple ``replacement_text`` characters
  43. to a single character.
  44. is_strip:
  45. If |True|, strip ``replacement_text`` from the beginning/end of the replacement text.
  46. Returns:
  47. A replacement string.
  48. Example:
  49. :ref:`example-sanitize-symbol`
  50. """
  51. if exclude_symbols:
  52. regexp = re.compile(
  53. "[{}]".format(
  54. re.escape(
  55. "".join(set(ascii_symbols + unprintable_ascii_chars) - set(exclude_symbols))
  56. )
  57. ),
  58. re.UNICODE,
  59. )
  60. else:
  61. regexp = __RE_SYMBOL
  62. try:
  63. new_text = regexp.sub(replacement_text, to_str(text))
  64. except TypeError:
  65. raise TypeError("text must be a string")
  66. if not replacement_text:
  67. return new_text
  68. if is_replace_consecutive_chars:
  69. new_text = re.sub(f"{re.escape(replacement_text)}+", replacement_text, new_text)
  70. if is_strip:
  71. new_text = new_text.strip(replacement_text)
  72. return new_text