| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- # coding: utf-8
- """
- Utilities for dealing with text encodings
- """
- from __future__ import annotations
- # -----------------------------------------------------------------------------
- # Copyright (C) 2008-2012 The IPython Development Team
- #
- # Distributed under the terms of the BSD License. The full license is in
- # the file COPYING, distributed as part of this software.
- # -----------------------------------------------------------------------------
- # -----------------------------------------------------------------------------
- # Imports
- # -----------------------------------------------------------------------------
- import sys
- import locale
- import warnings
- from typing import Any, Literal
- # to deal with the possibility of sys.std* not being a stream at all
- def get_stream_enc(stream: Any, default: str | None = None) -> str | None:
- """Return the given stream's encoding or a default.
- There are cases where ``sys.std*`` might not actually be a stream, so
- check for the encoding attribute prior to returning it, and return
- a default if it doesn't exist or evaluates as False. ``default``
- is None if not provided.
- """
- if not hasattr(stream, "encoding") or not stream.encoding:
- return default
- else:
- return stream.encoding
- _sentinel: object = object()
- # Less conservative replacement for sys.getdefaultencoding, that will try
- # to match the environment.
- # Defined here as central function, so if we find better choices, we
- # won't need to make changes all over IPython.
- def getdefaultencoding(prefer_stream: object | bool = _sentinel) -> str:
- """Return IPython's guess for the default encoding for bytes as text.
- If prefer_stream is True (default), asks for stdin.encoding first,
- to match the calling Terminal, but that is often None for subprocesses.
- Then fall back on locale.getpreferredencoding(),
- which should be a sensible platform default (that respects LANG environment),
- and finally to sys.getdefaultencoding() which is the most conservative option,
- and usually UTF8 as of Python 3.
- """
- if prefer_stream is not _sentinel:
- warnings.warn(
- "getpreferredencoding(prefer_stream=) argument is deprecated since "
- "IPython 9.0, getdefaultencoding() will take no argument in the "
- "future. If you rely on `prefer_stream`, please open an issue on "
- "the IPython repo.",
- DeprecationWarning,
- stacklevel=2,
- )
- prefer_stream = True
- enc: str | None = None
- if prefer_stream:
- enc = get_stream_enc(sys.stdin)
- if not enc or enc == "ascii":
- try:
- # There are reports of getpreferredencoding raising errors
- # in some cases, which may well be fixed, but let's be conservative here.
- enc = locale.getpreferredencoding()
- except Exception:
- pass
- enc = enc or sys.getdefaultencoding()
- # On windows `cp0` can be returned to indicate that there is no code page.
- # Since cp0 is an invalid encoding return instead cp1252 which is the
- # Western European default.
- if enc == "cp0":
- warnings.warn(
- "Invalid code page cp0 detected - using cp1252 instead."
- "If cp1252 is incorrect please ensure a valid code page "
- "is defined for the process.",
- RuntimeWarning,
- )
- return "cp1252"
- return enc
- DEFAULT_ENCODING = getdefaultencoding()
|