| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005 |
- # Copyright 2015 Google Inc. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import enum
- import math
- import re
- from typing import (
- Any,
- Callable,
- IO,
- Iterable,
- Mapping,
- Optional,
- Set,
- Tuple,
- Type,
- Union,
- )
- import unicodedata
- from json5.parser import Parser
- # Used when encoding keys, below.
- _reserved_word_re: Optional[re.Pattern] = None
- class QuoteStyle(enum.Enum):
- """Controls how strings will be quoted during encoding.
- By default, for compatibility with the `json` module and older versions of
- `json5`, strings (not being used as keys and that are legal identifiers)
- will always be double-quoted, and any double quotes in the string will be
- escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass
- `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and
- any single quotes in the string will be escaped. If you pass
- `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE
- and strings will be double-quoted *unless* the string contains more double
- quotes than single quotes, in which case the string will be single-quoted
- and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,
- then the behavior is the same as ALWAYS_SINGLE and strings will be
- single-quoted *unless* the string contains more single quotes than double
- quotes, in which case the string will be double-quoted and any double
- quotes will be escaped.
- *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in
- order to know which encoding to use you have to iterate over the entire
- string to count the number of single and double quotes. The codes guesses
- at an encoding while doing so, but if it guess wrong, the entire string has
- to be re-encoded, which will slow things down. If you are very concerned
- about performance (a) you probably shouldn't be using this library in the
- first place, because it just isn't very fast, and (b) you should use
- ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.
- """
- ALWAYS_DOUBLE = 'always_double'
- ALWAYS_SINGLE = 'always_single'
- PREFER_DOUBLE = 'prefer_double'
- PREFER_SINGLE = 'prefer_single'
- def load(
- fp: IO,
- *,
- encoding: Optional[str] = None,
- cls: Any = None,
- object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
- parse_float: Optional[Callable[[str], Any]] = None,
- parse_int: Optional[Callable[[str], Any]] = None,
- parse_constant: Optional[Callable[[str], Any]] = None,
- strict: bool = True,
- object_pairs_hook: Optional[
- Callable[[Iterable[Tuple[str, Any]]], Any]
- ] = None,
- allow_duplicate_keys: bool = True,
- consume_trailing: bool = True,
- start: Optional[int] = None,
- ) -> Any:
- """Deserialize ``fp`` (a ``.read()``-supporting file-like object
- containing a JSON document) to a Python object.
- Supports almost the same arguments as ``json.load()`` except that:
- - the `cls` keyword is ignored.
- - an extra `allow_duplicate_keys` parameter supports checking for
- duplicate keys in a object; by default, this is True for
- compatibility with ``json.load()``, but if set to False and
- the object contains duplicate keys, a ValueError will be raised.
- - an extra `consume_trailing` parameter specifies whether to
- consume any trailing characters after a valid object has been
- parsed. By default, this value is True and the only legal
- trailing characters are whitespace. If this value is set to False,
- parsing will stop when a valid object has been parsed and any
- trailing characters in the string will be ignored.
- - an extra `start` parameter specifies the zero-based offset into the
- file to start parsing at. If `start` is None, parsing will
- start at the current position in the file, and line number
- and column values will be reported as if starting from the
- beginning of the file; If `start` is not None,
- `load` will seek to zero and then read (and discard) the
- appropriate number of characters before beginning parsing;
- the file must be seekable for this to work correctly.
- You can use `load(..., consume_trailing=False)` to repeatedly read
- values from a file. However, in the current implementation `load` does
- this by reading the entire file into memory before doing anything, so
- it is not very efficient.
- Raises
- - `ValueError` if given an invalid document. This is different
- from the `json` module, which raises `json.JSONDecodeError`.
- - `UnicodeDecodeError` if given a byte string that is not a
- legal UTF-8 document (or the equivalent, if using a different
- `encoding`). This matches the `json` module.
- """
- s = fp.read()
- val, err, _ = parse(
- s,
- encoding=encoding,
- cls=cls,
- object_hook=object_hook,
- parse_float=parse_float,
- parse_int=parse_int,
- parse_constant=parse_constant,
- strict=strict,
- object_pairs_hook=object_pairs_hook,
- allow_duplicate_keys=allow_duplicate_keys,
- consume_trailing=consume_trailing,
- start=start,
- )
- if err:
- raise ValueError(err)
- return val
- def loads(
- s: str,
- *,
- encoding: Optional[str] = None,
- cls: Any = None,
- object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
- parse_float: Optional[Callable[[str], Any]] = None,
- parse_int: Optional[Callable[[str], Any]] = None,
- parse_constant: Optional[Callable[[str], Any]] = None,
- strict: bool = True,
- object_pairs_hook: Optional[
- Callable[[Iterable[Tuple[str, Any]]], Any]
- ] = None,
- allow_duplicate_keys: bool = True,
- consume_trailing: bool = True,
- start: Optional[int] = None,
- ):
- """Deserialize ``s`` (a string containing a JSON5 document) to a Python
- object.
- Supports the same arguments as ``json.load()`` except that:
- - the `cls` keyword is ignored.
- - an extra `allow_duplicate_keys` parameter supports checking for
- duplicate keys in a object; by default, this is True for
- compatibility with ``json.load()``, but if set to False and
- the object contains duplicate keys, a ValueError will be raised.
- - an extra `consume_trailing` parameter specifies whether to
- consume any trailing characters after a valid object has been
- parsed. By default, this value is True and the only legal
- trailing characters are whitespace. If this value is set to False,
- parsing will stop when a valid object has been parsed and any
- trailing characters in the string will be ignored.
- - an extra `start` parameter specifies the zero-based offset into the
- string to start parsing at.
- Raises
- - `ValueError` if given an invalid document. This is different
- from the `json` module, which raises `json.JSONDecodeError`.
- - `UnicodeDecodeError` if given a byte string that is not a
- legal UTF-8 document (or the equivalent, if using a different
- `encoding`). This matches the `json` module.
- """
- val, err, _ = parse(
- s=s,
- encoding=encoding,
- cls=cls,
- object_hook=object_hook,
- parse_float=parse_float,
- parse_int=parse_int,
- parse_constant=parse_constant,
- strict=strict,
- object_pairs_hook=object_pairs_hook,
- allow_duplicate_keys=allow_duplicate_keys,
- consume_trailing=consume_trailing,
- start=start,
- )
- if err:
- raise ValueError(err)
- return val
- def parse(
- s: str,
- *,
- encoding: Optional[str] = None,
- cls: Any = None,
- object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
- parse_float: Optional[Callable[[str], Any]] = None,
- parse_int: Optional[Callable[[str], Any]] = None,
- parse_constant: Optional[Callable[[str], Any]] = None,
- strict: bool = True,
- object_pairs_hook: Optional[
- Callable[[Iterable[Tuple[str, Any]]], Any]
- ] = None,
- allow_duplicate_keys: bool = True,
- consume_trailing: bool = True,
- start: Optional[int] = None,
- ):
- """Parse ```s``, returning positional information along with a value.
- This works exactly like `loads()`, except that (a) it returns the
- position in the string where the parsing stopped (either due to
- hitting an error or parsing a valid value) and any error as a string,
- (b) it takes an optional `consume_trailing` parameter that says whether
- to keep parsing the string after a valid value has been parsed; if True
- (the default), any trailing characters must be whitespace. If False,
- parsing stops when a valid value has been reached, (c) it takes an
- optional `start` parameter that specifies a zero-based offset to start
- parsing from in the string, and (d) the return value is different, as
- described below.
- `parse()` is useful if you have a string that might contain multiple
- values and you need to extract all of them; you can do so by repeatedly
- calling `parse`, setting `start` to the value returned in `position`
- from the previous call.
- Returns a tuple of (value, error_string, position). If the string
- was a legal value, `value` will be the deserialized value,
- `error_string` will be `None`, and `position` will be one
- past the zero-based offset where the parser stopped reading.
- If the string was not a legal value,
- `value` will be `None`, `error_string` will be the string value
- of the exception that would've been raised, and `position` will
- be the zero-based farthest offset into the string where the parser
- hit an error.
- Raises:
- - `UnicodeDecodeError` if given a byte string that is not a
- legal UTF-8 document (or the equivalent, if using a different
- `encoding`). This matches the `json` module.
- Note that this does *not* raise a `ValueError`; instead any error is
- returned as the second value in the tuple.
- You can use this method to read in a series of values from a string
- `s` as follows:
- >>> import json5
- >>> s = '1 2 3 4'
- >>> values = []
- >>> start = 0
- >>> while True:
- ... v, err, pos = json5.parse(s, start=start, consume_trailing=False)
- ... if v:
- ... values.append(v)
- ... start = pos
- ... if start == len(s) or s[start:].isspace():
- ... # Reached the end of the string (ignoring trailing
- ... # whitespace
- ... break
- ... continue
- ... raise ValueError(err)
- >>> values
- [1, 2, 3, 4]
- """
- assert cls is None, 'Custom decoders are not supported'
- if isinstance(s, bytes):
- encoding = encoding or 'utf-8'
- s = s.decode(encoding)
- if not s:
- raise ValueError('Empty strings are not legal JSON5')
- start = start or 0
- parser = Parser(s, '<string>', pos=start)
- ast, err, pos = parser.parse(
- global_vars={'_strict': strict, '_consume_trailing': consume_trailing}
- )
- if err:
- return None, err, pos
- try:
- value = _convert(
- ast,
- object_hook=object_hook,
- parse_float=parse_float,
- parse_int=parse_int,
- parse_constant=parse_constant,
- object_pairs_hook=object_pairs_hook,
- allow_duplicate_keys=allow_duplicate_keys,
- )
- return value, None, pos
- except ValueError as e:
- return None, str(e), pos
- def _convert(
- ast,
- object_hook,
- parse_float,
- parse_int,
- parse_constant,
- object_pairs_hook,
- allow_duplicate_keys,
- ):
- def _fp_constant_parser(s):
- return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))
- def _dictify(pairs):
- if not allow_duplicate_keys:
- keys = set()
- for key, _ in pairs:
- if key in keys:
- raise ValueError(f'Duplicate key "{key}" found in object')
- keys.add(key)
- if object_pairs_hook:
- return object_pairs_hook(pairs)
- if object_hook:
- return object_hook(dict(pairs))
- return dict(pairs)
- parse_float = parse_float or float
- parse_int = parse_int or int
- parse_constant = parse_constant or _fp_constant_parser
- return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)
- def _walk_ast(
- el,
- dictify: Callable[[Iterable[Tuple[str, Any]]], Any],
- parse_float,
- parse_int,
- parse_constant,
- ):
- if el == 'None':
- return None
- if el == 'True':
- return True
- if el == 'False':
- return False
- ty, v = el
- if ty == 'number':
- if v.startswith('0x') or v.startswith('0X'):
- return parse_int(v, base=16)
- if '.' in v or 'e' in v or 'E' in v:
- return parse_float(v)
- if 'Infinity' in v or 'NaN' in v:
- return parse_constant(v)
- return parse_int(v)
- if ty == 'string':
- return v
- if ty == 'object':
- pairs = []
- for key, val_expr in v:
- val = _walk_ast(
- val_expr, dictify, parse_float, parse_int, parse_constant
- )
- pairs.append((key, val))
- return dictify(pairs)
- if ty == 'array':
- return [
- _walk_ast(el, dictify, parse_float, parse_int, parse_constant)
- for el in v
- ]
- raise ValueError('unknown el: ' + el) # pragma: no cover
- def dump(
- obj: Any,
- fp: IO,
- *,
- skipkeys: bool = False,
- ensure_ascii: bool = True,
- check_circular: bool = True,
- allow_nan: bool = True,
- cls: Optional[Type['JSON5Encoder']] = None,
- indent: Optional[Union[int, str]] = None,
- separators: Optional[Tuple[str, str]] = None,
- default: Optional[Callable[[Any], Any]] = None,
- sort_keys: bool = False,
- quote_keys: bool = False,
- trailing_commas: bool = True,
- allow_duplicate_keys: bool = True,
- quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
- **kw,
- ):
- """Serialize ``obj`` to a JSON5-formatted stream to ``fp``,
- a ``.write()``-supporting file-like object.
- Supports the same arguments as ``dumps()``, below.
- Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \
- allow_duplicate_keys=True)``
- should produce exactly the same output as ``json.dump(obj, fp).``
- """
- fp.write(
- dumps(
- obj=obj,
- skipkeys=skipkeys,
- ensure_ascii=ensure_ascii,
- check_circular=check_circular,
- allow_nan=allow_nan,
- cls=cls,
- indent=indent,
- separators=separators,
- default=default,
- sort_keys=sort_keys,
- quote_keys=quote_keys,
- trailing_commas=trailing_commas,
- allow_duplicate_keys=allow_duplicate_keys,
- quote_style=quote_style,
- **kw,
- )
- )
- def dumps(
- obj: Any,
- *,
- skipkeys: bool = False,
- ensure_ascii: bool = True,
- check_circular: bool = True,
- allow_nan: bool = True,
- cls: Optional[Type['JSON5Encoder']] = None,
- indent: Optional[Union[int, str]] = None,
- separators: Optional[Tuple[str, str]] = None,
- default: Optional[Callable[[Any], Any]] = None,
- sort_keys: bool = False,
- quote_keys: bool = False,
- trailing_commas: bool = True,
- allow_duplicate_keys: bool = True,
- quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
- **kw: Any,
- ):
- """Serialize ``obj`` to a JSON5-formatted string.
- Supports the same arguments as ``json.dumps()``, except that:
- - The ``encoding`` keyword is ignored; Unicode strings are always written.
- - By default, object keys that are legal identifiers are not quoted; if you
- pass ``quote_keys=True``, they will be.
- - By default, if lists and objects span multiple lines of output (i.e.,
- when ``indent`` >=0), the last item will have a trailing comma after it.
- If you pass ``trailing_commas=False``, it will not.
- - If you use a number, a boolean, or ``None`` as a key value in a dict, it
- will be converted to the corresponding JSON string value, e.g. "1",
- "true", or "null". By default, ``dump()`` will match the `json` modules
- behavior and produce malformed JSON if you mix keys of different types
- that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``
- produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If
- you pass ``allow_duplicate_keys=False``, an exception will be raised
- instead.
- - If `quote_keys` is true, then keys of objects will be enclosed in quotes,
- as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless
- they contain whitespace.
- - If `trailing_commas` is false, then commas will not be inserted after the
- final elements of objects and arrays, as in regular JSON. Otherwise,
- such commas will be inserted.
- - If `allow_duplicate_keys` is false, then only the last entry with a given
- key will be written. Otherwise, all entries with the same key will be
- written.
- - `quote_style` controls how strings are encoded. See the documentation
- for the `QuoteStyle` class, above, for how this is used.
- *Note*: Strings that are being used as unquoted keys are not affected
- by this parameter and remain unquoted.
- *`quote_style` was added in version 0.10.0*.
- Other keyword arguments are allowed and will be passed to the
- encoder so custom encoders can get them, but otherwise they will
- be ignored in an attempt to provide some amount of forward-compatibility.
- *Note:* the standard JSON module explicitly calls `int.__repr(obj)__`
- and `float.__repr(obj)__` to encode ints and floats, thereby bypassing
- any custom representations you might have for objects that are subclasses
- of ints and floats, and, for compatibility, JSON5 does the same thing.
- To override this behavior, create a subclass of JSON5Encoder
- that overrides `encode()` and handles your custom representation.
- For example:
- ```
- >>> import json5
- >>> from typing import Any, Set
- >>>
- >>> class Hex(int):
- ... def __repr__(self):
- ... return hex(self)
- >>>
- >>> class CustomEncoder(json5.JSON5Encoder):
- ... def encode(
- ... self, obj: Any, seen: Set, level: int, *, as_key: bool
- ... ) -> str:
- ... if isinstance(obj, Hex):
- ... return repr(obj)
- ... return super().encode(obj, seen, level, as_key=as_key)
- ...
- >>> json5.dumps([20, Hex(20)], cls=CustomEncoder)
- '[20, 0x14]'
- ```
- *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \
- allow_duplicate_keys=True)``
- should produce exactly the same output as ``json.dumps(obj).``
- """
- cls = cls or JSON5Encoder
- enc = cls(
- skipkeys=skipkeys,
- ensure_ascii=ensure_ascii,
- check_circular=check_circular,
- allow_nan=allow_nan,
- indent=indent,
- separators=separators,
- default=default,
- sort_keys=sort_keys,
- quote_keys=quote_keys,
- trailing_commas=trailing_commas,
- allow_duplicate_keys=allow_duplicate_keys,
- quote_style=quote_style,
- **kw,
- )
- return enc.encode(obj, seen=set(), level=0, as_key=False)
- class JSON5Encoder:
- def __init__(
- self,
- *,
- skipkeys: bool = False,
- ensure_ascii: bool = True,
- check_circular: bool = True,
- allow_nan: bool = True,
- indent: Optional[Union[int, str]] = None,
- separators: Optional[Tuple[str, str]] = None,
- default: Optional[Callable[[Any], Any]] = None,
- sort_keys: bool = False,
- quote_keys: bool = False,
- trailing_commas: bool = True,
- allow_duplicate_keys: bool = True,
- quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
- **kw,
- ):
- """Provides a class that may be overridden to customize the behavior
- of `dumps()`. The keyword args are the same as for that function.
- *Added in version 0.10.0"""
- # Ignore unrecognized keyword arguments in the hope of providing
- # some level of backwards- and forwards-compatibility.
- del kw
- self.skipkeys = skipkeys
- self.ensure_ascii = ensure_ascii
- self.check_circular = check_circular
- self.allow_nan = allow_nan
- self.indent = indent
- self.separators = separators
- if separators is None:
- separators = (', ', ': ') if indent is None else (',', ': ')
- self.item_separator, self.kv_separator = separators
- self.default_fn = default or _raise_type_error
- self.sort_keys = sort_keys
- self.quote_keys = quote_keys
- self.trailing_commas = trailing_commas
- self.allow_duplicate_keys = allow_duplicate_keys
- self.quote_style = quote_style
- def default(self, obj: Any) -> Any:
- """Provides a last-ditch option to encode a value that the encoder
- doesn't otherwise recognize, by converting `obj` to a value that
- *can* (and will) be serialized by the other methods in the class.
- Note: this must not return a serialized value (i.e., string)
- directly, as that'll result in a doubly-encoded value."""
- return self.default_fn(obj)
- def encode(
- self,
- obj: Any,
- seen: Set,
- level: int,
- *,
- as_key: bool,
- ) -> str:
- """Returns an JSON5-encoded version of an arbitrary object. This can
- be used to provide customized serialization of objects. Overridden
- methods of this class should handle their custom objects and then
- fall back to super.encode() if they've been passed a normal object.
- `seen` is used for duplicate object tracking when `check_circular`
- is True.
- `level` represents the current indentation level, which increases
- by one for each recursive invocation of encode (i.e., whenever
- we're encoding the values of a dict or a list).
- May raise `TypeError` if the object is the wrong type to be
- encoded (i.e., your custom routine can't handle it either), and
- `ValueError` if there's something wrong with the value, e.g.
- a float value of NaN when `allow_nan` is false.
- If `as_key` is true, the return value should be a double-quoted string
- representation of the object, unless obj is a string that can be an
- identifier (and quote_keys is false and obj isn't a reserved word).
- If the object should not be used as a key, `TypeError` should be
- raised; that allows the base implementation to implement `skipkeys`
- properly.
- """
- seen = seen or set()
- s = self._encode_basic_type(obj, as_key=as_key)
- if s is not None:
- return s
- if as_key:
- raise TypeError(f'Invalid key f{obj}')
- return self._encode_non_basic_type(obj, seen, level)
- def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:
- """Returns None if the object is not a basic type."""
- if isinstance(obj, str):
- return self._encode_str(obj, as_key=as_key)
- # Check for True/False before ints because True and False are
- # also considered ints and so would be represented as 1 and 0
- # if we did ints first.
- if obj is True:
- return '"true"' if as_key else 'true'
- if obj is False:
- return '"false"' if as_key else 'false'
- if obj is None:
- return '"null"' if as_key else 'null'
- if isinstance(obj, int):
- return self._encode_int(obj, as_key=as_key)
- if isinstance(obj, float):
- return self._encode_float(obj, as_key=as_key)
- return None
- def _encode_int(self, obj: int, *, as_key: bool) -> str:
- s = int.__repr__(obj)
- return f'"{s}"' if as_key else s
- def _encode_float(self, obj: float, *, as_key: bool) -> str:
- if obj == float('inf'):
- allowed = self.allow_nan
- s = 'Infinity'
- elif obj == float('-inf'):
- allowed = self.allow_nan
- s = '-Infinity'
- elif math.isnan(obj):
- allowed = self.allow_nan
- s = 'NaN'
- else:
- allowed = True
- s = float.__repr__(obj)
- if not allowed:
- raise ValueError('Illegal JSON5 value: f{obj}')
- return f'"{s}"' if as_key else s
- def _encode_str(self, obj: str, *, as_key: bool) -> str:
- if (
- as_key
- and self.is_identifier(obj)
- and not self.quote_keys
- and not self.is_reserved_word(obj)
- ):
- return obj
- return self._encode_quoted_str(obj, self.quote_style)
- def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:
- """Returns a quoted string with a minimal number of escaped quotes."""
- ret = []
- double_quotes_seen = 0
- single_quotes_seen = 0
- sq = "'"
- dq = '"'
- for ch in obj:
- if ch == dq:
- # At first we will guess at which quotes to escape. If
- # we guess wrong, we reencode the string below.
- double_quotes_seen += 1
- if quote_style in (
- QuoteStyle.ALWAYS_DOUBLE,
- QuoteStyle.PREFER_DOUBLE,
- ):
- encoded_ch = self._escape_ch(dq)
- else:
- encoded_ch = dq
- elif ch == sq:
- single_quotes_seen += 1
- if quote_style in (
- QuoteStyle.ALWAYS_SINGLE,
- QuoteStyle.PREFER_SINGLE,
- ):
- encoded_ch = self._escape_ch(sq)
- else:
- encoded_ch = sq
- elif ch == '\\':
- encoded_ch = self._escape_ch(ch)
- else:
- o = ord(ch)
- if o < 32:
- encoded_ch = self._escape_ch(ch)
- elif o < 128:
- encoded_ch = ch
- elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):
- encoded_ch = ch
- else:
- encoded_ch = self._escape_ch(ch)
- ret.append(encoded_ch)
- # We may have guessed wrong and need to reencode the string.
- if (
- double_quotes_seen > single_quotes_seen
- and quote_style == QuoteStyle.PREFER_DOUBLE
- ):
- return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)
- if (
- single_quotes_seen > double_quotes_seen
- and quote_style == QuoteStyle.PREFER_SINGLE
- ):
- return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)
- if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):
- return '"' + ''.join(ret) + '"'
- return "'" + ''.join(ret) + "'"
- def _escape_ch(self, ch: str) -> str:
- """Returns the backslash-escaped representation of the char."""
- if ch == '\\':
- return '\\\\'
- if ch == "'":
- return r'\''
- if ch == '"':
- return r'\"'
- if ch == '\n':
- return r'\n'
- if ch == '\r':
- return r'\r'
- if ch == '\t':
- return r'\t'
- if ch == '\b':
- return r'\b'
- if ch == '\f':
- return r'\f'
- if ch == '\v':
- return r'\v'
- if ch == '\0':
- return r'\0'
- o = ord(ch)
- if o < 65536:
- return rf'\u{o:04x}'
- val = o - 0x10000
- high = 0xD800 + (val >> 10)
- low = 0xDC00 + (val & 0x3FF)
- return rf'\u{high:04x}\u{low:04x}'
- def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:
- # Basic types can't be recursive so we only check for circularity
- # on non-basic types. If for some reason the caller was using a
- # subclass of a basic type and wanted to check circularity on it,
- # it'd have to do so directly in a subclass of JSON5Encoder.
- if self.check_circular:
- i = id(obj)
- if i in seen:
- raise ValueError('Circular reference detected.')
- seen.add(i)
- # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence
- # here, but for backwards-compatibility with potential old callers,
- # we only check for the two attributes we need in each case.
- if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):
- s = self._encode_dict(obj, seen, level + 1)
- elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):
- s = self._encode_array(obj, seen, level + 1)
- else:
- s = self.encode(self.default(obj), seen, level, as_key=False)
- assert s is not None
- if self.check_circular:
- seen.remove(i)
- return s
- def _encode_dict(self, obj: Any, seen: set, level: int) -> str:
- if not obj:
- return '{}'
- indent_str, end_str = self._spacers(level)
- item_sep = self.item_separator + indent_str
- kv_sep = self.kv_separator
- if self.sort_keys:
- keys = sorted(obj.keys())
- else:
- keys = obj.keys()
- s = '{' + indent_str
- first_key = True
- new_keys = set()
- for key in keys:
- try:
- key_str = self.encode(key, seen, level, as_key=True)
- except TypeError:
- if self.skipkeys:
- continue
- raise
- if not self.allow_duplicate_keys:
- if key_str in new_keys:
- raise ValueError(f'duplicate key {repr(key)}')
- new_keys.add(key_str)
- if first_key:
- first_key = False
- else:
- s += item_sep
- val_str = self.encode(obj[key], seen, level, as_key=False)
- s += key_str + kv_sep + val_str
- s += end_str + '}'
- return s
- def _encode_array(self, obj: Any, seen: Set, level: int) -> str:
- if not obj:
- return '[]'
- indent_str, end_str = self._spacers(level)
- item_sep = self.item_separator + indent_str
- return (
- '['
- + indent_str
- + item_sep.join(
- self.encode(el, seen, level, as_key=False) for el in obj
- )
- + end_str
- + ']'
- )
- def _spacers(self, level: int) -> Tuple[str, str]:
- if self.indent is not None:
- end_str = ''
- if self.trailing_commas:
- end_str = ','
- if isinstance(self.indent, int):
- if self.indent > 0:
- indent_str = '\n' + ' ' * self.indent * level
- end_str += '\n' + ' ' * self.indent * (level - 1)
- else:
- indent_str = '\n'
- end_str += '\n'
- else:
- indent_str = '\n' + self.indent * level
- end_str += '\n' + self.indent * (level - 1)
- else:
- indent_str = ''
- end_str = ''
- return indent_str, end_str
- def is_identifier(self, key: str) -> bool:
- """Returns whether the string could be used as a legal
- EcmaScript/JavaScript identifier.
- There should normally be no reason to override this, unless
- the definition of identifiers change in later versions of the
- JSON5 spec and this implementation hasn't been updated to handle
- the changes yet."""
- if (
- not key
- or not self._is_id_start(key[0])
- and key[0] not in ('$', '_')
- ):
- return False
- for ch in key[1:]:
- if not self._is_id_continue(ch) and ch not in ('$', '_'):
- return False
- return True
- def _is_id_start(self, ch: str) -> bool:
- return unicodedata.category(ch) in (
- 'Lu',
- 'Ll',
- 'Li',
- 'Lt',
- 'Lm',
- 'Lo',
- 'Nl',
- )
- def _is_id_continue(self, ch: str) -> bool:
- return unicodedata.category(ch) in (
- 'Lu',
- 'Ll',
- 'Li',
- 'Lt',
- 'Lm',
- 'Lo',
- 'Nl',
- 'Nd',
- 'Mn',
- 'Mc',
- 'Pc',
- )
- def is_reserved_word(self, key: str) -> bool:
- """Returns whether the key is a reserved word.
- There should normally be no need to override this, unless there
- have been reserved words added in later versions of the JSON5
- spec and this implementation has not yet been updated to handle
- the changes yet."""
- global _reserved_word_re
- if _reserved_word_re is None:
- # List taken from section 7.6.1 of ECMA-262, version 5.1.
- # https://262.ecma-international.org/5.1/#sec-7.6.1.
- # This includes currently reserved words, words reserved
- # for future use (both as of 5.1), null, true, and false.
- _reserved_word_re = re.compile(
- '('
- + '|'.join(
- [
- 'break',
- 'case',
- 'catch',
- 'class',
- 'const',
- 'continue',
- 'debugger',
- 'default',
- 'delete',
- 'do',
- 'else',
- 'enum',
- 'export',
- 'extends',
- 'false',
- 'finally',
- 'for',
- 'function',
- 'if',
- 'implements',
- 'import',
- 'in',
- 'instanceof',
- 'interface',
- 'let',
- 'new',
- 'null',
- 'package',
- 'private',
- 'protected',
- 'public',
- 'return',
- 'static',
- 'super',
- 'switch',
- 'this',
- 'throw',
- 'true',
- 'try',
- 'typeof',
- 'var',
- 'void',
- 'while',
- 'with',
- 'yield',
- ]
- )
- + ')$'
- )
- return _reserved_word_re.match(key) is not None
- def _raise_type_error(obj) -> Any:
- raise TypeError(f'{repr(obj)} is not JSON5 serializable')
|