Source code for efro.dataclassio._api

# Released under the MIT License. See LICENSE for details.
#
"""Functionality for importing, exporting, and validating dataclasses.

This allows complex nested dataclasses to be flattened to json-compatible
data and restored from said data. It also gracefully handles and preserves
unrecognized attribute data, allowing older clients to interact with newer
data formats in a nondestructive manner.
"""

from __future__ import annotations

import json
from enum import Enum
from typing import TYPE_CHECKING, TypeVar

from efro.dataclassio._outputter import _Outputter
from efro.dataclassio._inputter import _Inputter
from efro.dataclassio._base import Codec

if TYPE_CHECKING:
    from typing import Any

T = TypeVar('T')


[docs] class JsonStyle(Enum): """Different style types for json.""" #: Single line, no spaces, no sorting. Not deterministic. #: Use this where speed is more important than determinism. FAST = 'fast' #: Single line, no spaces, sorted keys. Deterministic. #: Use this when output may be hashed or compared for equality. SORTED = 'sorted' #: Multiple lines, spaces, sorted keys. Deterministic. #: Use this for pretty human readable output. PRETTY = 'pretty'
[docs] def dataclass_to_dict( obj: Any, codec: Codec = Codec.JSON, coerce_to_float: bool = True, discard_extra_attrs: bool = False, ) -> dict: """Given a dataclass object, return a json-friendly dict. All values will be checked to ensure they match the types specified on fields. Note that a limited set of types and data configurations is supported. Values with type Any will be checked to ensure they match types supported directly by json. This does not include types such as tuples which are implicitly translated by Python's json module (as this would break the ability to do a lossless round-trip with data). If coerce_to_float is True, integer values present on float typed fields will be converted to float in the dict output. If False, a TypeError will be triggered. """ out = _Outputter( obj, create=True, codec=codec, coerce_to_float=coerce_to_float, discard_extra_attrs=discard_extra_attrs, ).run() assert isinstance(out, dict) return out
[docs] def dataclass_to_json( obj: Any, coerce_to_float: bool = True, pretty: bool = False, sort_keys: bool | None = None, ) -> str: """Utility function; return a json string from a dataclass instance. Basically json.dumps(dataclass_to_dict(...)). By default, keys are sorted for pretty output and not otherwise, but this can be overridden by supplying a value for the 'sort_keys' arg. """ jdict = dataclass_to_dict( obj=obj, coerce_to_float=coerce_to_float, codec=Codec.JSON ) if sort_keys is None: sort_keys = pretty if pretty: return json.dumps(jdict, indent=2, sort_keys=sort_keys) return json.dumps(jdict, separators=(',', ':'), sort_keys=sort_keys)
[docs] def dataclass_from_dict( cls: type[T], values: dict, *, codec: Codec = Codec.JSON, coerce_to_float: bool = True, allow_unknown_attrs: bool = True, discard_unknown_attrs: bool = False, lossy: bool = False, ) -> T: """Given a dict, return a dataclass of a given type. The dict must be formatted to match the specified codec (generally json-friendly object types). This means that sequence values such as tuples or sets should be passed as lists, enums should be passed as their associated values, nested dataclasses should be passed as dicts, etc. All values are checked to ensure their types/values are valid. Data for attributes of type Any will be checked to ensure they match types supported directly by json. This does not include types such as tuples which are implicitly translated by Python's json module (as this would break the ability to do a lossless round-trip with data). If `coerce_to_float` is True, int values passed for float typed fields will be converted to float values. Otherwise, a TypeError is raised. If 'allow_unknown_attrs' is False, AttributeErrors will be raised for attributes present in the dict but not on the data class. Otherwise, they will be preserved as part of the instance and included if it is exported back to a dict, unless `discard_unknown_attrs` is True, in which case they will simply be discarded. If `lossy` is True, Enum attrs and IOMultiType types are allowed to use any fallbacks defined for them. This can allow older schemas to successfully load newer data, but this can fundamentally modify the data, so the resulting object is flagged as 'lossy' and prevented from being serialized back out by default. """ val = _Inputter( cls, codec=codec, coerce_to_float=coerce_to_float, allow_unknown_attrs=allow_unknown_attrs, discard_unknown_attrs=discard_unknown_attrs, lossy=lossy, ).run(values) assert isinstance(val, cls) return val
[docs] def dataclass_from_json( cls: type[T], json_str: str, *, coerce_to_float: bool = True, allow_unknown_attrs: bool = True, discard_unknown_attrs: bool = False, lossy: bool = False, ) -> T: """Return a dataclass instance given a json string. Basically dataclass_from_dict(json.loads(...)) """ return dataclass_from_dict( cls=cls, values=json.loads(json_str), coerce_to_float=coerce_to_float, allow_unknown_attrs=allow_unknown_attrs, discard_unknown_attrs=discard_unknown_attrs, lossy=lossy, )
[docs] def dataclass_validate( obj: Any, coerce_to_float: bool = True, codec: Codec = Codec.JSON, discard_extra_attrs: bool = False, ) -> None: """Ensure that values in a dataclass instance are the correct types.""" # Simply run an output pass but tell it not to generate data; # only run validation. _Outputter( obj, create=False, codec=codec, coerce_to_float=coerce_to_float, discard_extra_attrs=discard_extra_attrs, ).run()
[docs] def dataclass_hash(obj: Any, coerce_to_float: bool = True) -> str: """Calculate a hash for the provided dataclass. Basically this emits json for the dataclass (with keys sorted to keep things deterministic) and hashes the resulting string. """ import hashlib from base64 import urlsafe_b64encode json_dict = dataclass_to_dict( obj, codec=Codec.JSON, coerce_to_float=coerce_to_float ) # Need to sort keys to keep things deterministic. json_str = json.dumps(json_dict, separators=(',', ':'), sort_keys=True) sha = hashlib.sha256() sha.update(json_str.encode()) # Go with urlsafe base64 instead of the usual hex to save some # space, and kill those ugly padding chars at the end. return urlsafe_b64encode(sha.digest()).decode().strip('=')
# Docs-generation hack; import some stuff that we likely only forward-declared # in our actual source code so that docs tools can find it. from typing import (Coroutine, Any, Literal, Callable, Generator, Awaitable, Sequence, Self) import asyncio from concurrent.futures import Future