| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- import re
- import sys
- from ast import literal_eval
- from functools import total_ordering
- from typing import NamedTuple, Union
- # The following is a list in Python that are line breaks in str.splitlines, but
- # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
- # 0xA) are allowed to split lines.
- _NON_LINE_BREAKS = (
- '\v', # Vertical Tabulation 0xB
- '\f', # Form Feed 0xC
- '\x1C', # File Separator
- '\x1D', # Group Separator
- '\x1E', # Record Separator
- '\x85', # Next Line (NEL - Equivalent to CR+LF.
- # Used to mark end-of-line on some IBM mainframes.)
- '\u2028', # Line Separator
- '\u2029', # Paragraph Separator
- )
- class Version(NamedTuple):
- major: int
- minor: int
- micro: int
- def split_lines(string: str, keepends: bool = False) -> "list[str]":
- r"""
- Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
- looks at form feeds and other special characters as normal text. Just
- splits ``\n`` and ``\r\n``.
- Also different: Returns ``[""]`` for an empty string input.
- In Python 2.7 form feeds are used as normal characters when using
- str.splitlines. However in Python 3 somewhere there was a decision to split
- also on form feeds.
- """
- if keepends:
- lst = string.splitlines(True)
- # We have to merge lines that were broken by form feed characters.
- merge = []
- for i, line in enumerate(lst):
- try:
- last_chr = line[-1]
- except IndexError:
- pass
- else:
- if last_chr in _NON_LINE_BREAKS:
- merge.append(i)
- for index in reversed(merge):
- try:
- lst[index] = lst[index] + lst[index + 1]
- del lst[index + 1]
- except IndexError:
- # index + 1 can be empty and therefore there's no need to
- # merge.
- pass
- # The stdlib's implementation of the end is inconsistent when calling
- # it with/without keepends. One time there's an empty string in the
- # end, one time there's none.
- if string.endswith('\n') or string.endswith('\r') or string == '':
- lst.append('')
- return lst
- else:
- return re.split(r'\n|\r\n|\r', string)
- def python_bytes_to_unicode(
- source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict'
- ) -> str:
- """
- Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
- unicode object like in :py:meth:`bytes.decode`.
- :param encoding: See :py:meth:`bytes.decode` documentation.
- :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
- ``'strict'``, ``'replace'`` or ``'ignore'``.
- """
- def detect_encoding():
- """
- For the implementation of encoding definitions in Python, look at:
- - http://www.python.org/dev/peps/pep-0263/
- - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
- """
- byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
- if source.startswith(byte_mark):
- # UTF-8 byte-order mark
- return 'utf-8'
- first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0)
- possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
- first_two_lines)
- if possible_encoding:
- e = possible_encoding.group(1)
- if not isinstance(e, str):
- e = str(e, 'ascii', 'replace')
- return e
- else:
- # the default if nothing else has been set -> PEP 263
- return encoding
- if isinstance(source, str):
- # only cast str/bytes
- return source
- encoding = detect_encoding()
- try:
- # Cast to unicode
- return str(source, encoding, errors)
- except LookupError:
- if errors == 'replace':
- # This is a weird case that can happen if the given encoding is not
- # a valid encoding. This usually shouldn't happen with provided
- # encodings, but can happen if somebody uses encoding declarations
- # like `# coding: foo-8`.
- return str(source, 'utf-8', errors)
- raise
- def version_info() -> Version:
- """
- Returns a namedtuple of parso's version, similar to Python's
- ``sys.version_info``.
- """
- from parso import __version__
- tupl = re.findall(r'[a-z]+|\d+', __version__)
- return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
- class _PythonVersionInfo(NamedTuple):
- major: int
- minor: int
- @total_ordering
- class PythonVersionInfo(_PythonVersionInfo):
- def __gt__(self, other):
- if isinstance(other, tuple):
- if len(other) != 2:
- raise ValueError("Can only compare to tuples of length 2.")
- return (self.major, self.minor) > other
- super().__gt__(other)
- return (self.major, self.minor)
- def __eq__(self, other):
- if isinstance(other, tuple):
- if len(other) != 2:
- raise ValueError("Can only compare to tuples of length 2.")
- return (self.major, self.minor) == other
- super().__eq__(other)
- def __ne__(self, other):
- return not self.__eq__(other)
- def _parse_version(version) -> PythonVersionInfo:
- match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
- if match is None:
- raise ValueError('The given version is not in the right format. '
- 'Use something like "3.8" or "3".')
- major = int(match.group(1))
- minor = match.group(2)
- if minor is None:
- # Use the latest Python in case it's not exactly defined, because the
- # grammars are typically backwards compatible?
- if major == 2:
- minor = "7"
- elif major == 3:
- minor = "6"
- else:
- raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
- minor = int(minor)
- return PythonVersionInfo(major, minor)
- def parse_version_string(version: str = None) -> PythonVersionInfo:
- """
- Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and
- returns a corresponding version info that is always two characters long in
- decimal.
- """
- if version is None:
- version = '%s.%s' % sys.version_info[:2]
- if not isinstance(version, str):
- raise TypeError('version must be a string like "3.8"')
- return _parse_version(version)
|