Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 266 additions & 16 deletions azure/functions/_durable_functions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,64 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from typing import Union
import json
import logging
import os
import sys
import warnings
from typing import Any, Callable, Optional, Union

from . import _abc
from importlib import import_module

logger = logging.getLogger("azure.functions.DurableFunctions")

_STRICT_ENV_VAR = "AZURE_FUNCTIONS_DURABLE_STRICT_TYPING"
_TRUTHY = frozenset({"1", "true", "yes"})
_LEGACY_KEYS = frozenset({"__class__", "__module__", "__data__"})

# One-shot notice flags. Each becomes True after the corresponding
# advisory has been emitted in this process; tests may reset them.
_loose_codec_notice_emitted = False
_no_expected_type_notice_emitted = False


def _is_strict_mode() -> bool:
return os.environ.get(_STRICT_ENV_VAR, "").strip().lower() in _TRUTHY


def _notify_loose_codec_used() -> None:
"""Emit a one-time advisory the first time the loose-mode object_hook
path actually reconstructs a custom object in this process."""
global _loose_codec_notice_emitted
if _loose_codec_notice_emitted or _is_strict_mode():
return
_loose_codec_notice_emitted = True
msg = (
"azure.functions Durable JSON codec reconstructed a custom "
"object via the loose-mode object_hook path. Set "
"AZURE_FUNCTIONS_DURABLE_STRICT_TYPING=1 and supply "
"expected_type at decode call sites to enable type-validated "
"deserialization. This message will not be repeated."
)
logger.info(msg)
warnings.warn(msg, DeprecationWarning, stacklevel=2)


def _notify_no_expected_type() -> None:
"""Emit a one-time advisory the first time df_loads is called in
loose mode without an expected_type in this process."""
global _no_expected_type_notice_emitted
if _no_expected_type_notice_emitted or _is_strict_mode():
return
_no_expected_type_notice_emitted = True
msg = (
"azure.functions df_loads was called without expected_type. "
"Pass the destination type to enable validation and prepare "
"for strict typing (AZURE_FUNCTIONS_DURABLE_STRICT_TYPING=1). "
"This message will not be repeated."
)
logger.info(msg)
warnings.warn(msg, DeprecationWarning, stacklevel=2)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ActivityTriggerConverter.decode calls df_loads(data.value) without expected_type, unconditionally tripping _notify_no_expected_type(). The warning tells users to "pass the destination type," but the converter framework doesn't forward annotations so users can't act on it. Should we skip the warning for internal callers?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That should be caught with the first few lines of df_loads

if expected_type is not None:
        return _loads_with_expected_type(s, expected_type)

This method will be called if expected_type is None, which is what we expect



# Utilities
Expand Down Expand Up @@ -46,43 +101,238 @@ def _serialize_custom_object(obj):
def _deserialize_custom_object(obj: dict) -> object:
"""Deserialize a user-defined object from JSON.

Deserializes a dictionary encoding a custom object,
if it contains class metadata suggesting that it should be
decoded further.
Reconstructs a custom object from a dictionary that carries the
``{"__class__", "__module__", "__data__"}`` envelope produced by
:func:`_serialize_custom_object`. The class is resolved by looking
up ``__module__`` in :data:`sys.modules`; modules are never imported
on demand.

Parameters:
Parameters
----------
obj: dict
Dictionary object that potentially encodes a custom class
Dictionary that potentially encodes a custom class.

Returns:
--------
Returns
-------
object
Either the original `obj` dictionary or the custom object it encoded
Either the original ``obj`` dictionary (if it is not an
envelope) or the reconstructed custom object.

Exceptions
----------
Raises
------
ValueError
If the declared module is not present in ``sys.modules``.
AttributeError
If the declared module is loaded but does not define the
declared class.
TypeError
If the decoded object does not contain a `from_json` function
If the resolved class does not expose a ``from_json`` function.
"""
if ("__class__" in obj) and ("__module__" in obj) and ("__data__" in obj):
class_name = obj.pop("__class__")
module_name = obj.pop("__module__")
obj_data = obj.pop("__data__")

# Importing the clas
module = import_module(module_name)
class_ = getattr(module, class_name)
# Resolve the class from already-loaded modules; this function
# does not import modules on demand.
module = sys.modules.get(module_name)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure the module will always be in sysmodules? The import_module is removed, so how is this getting added to sys_modules? Shoud we run import_module(module_name) before?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main update. For majority of the cases, the module will be in sys.modules - it'll be added when the worker indexes the function app file.

if module is None:
raise ValueError(
f"cannot deserialize custom object: module "
f"{module_name!r} is not loaded in sys.modules"
)
class_ = getattr(module, class_name, None)
if class_ is None:
raise AttributeError(
f"cannot deserialize custom object: class {class_name!r} "
f"not found in module {module_name!r}"
)

if not hasattr(class_, "from_json"):
raise TypeError(f"class {type(obj)} does not expose a `from_json` "
"function")

# Initialize the object using its `from_json` deserializer
obj = class_.from_json(obj_data)
_notify_loose_codec_used()
return obj


# ---------------------------------------------------------------------------
# Public Durable Functions JSON codec
# ---------------------------------------------------------------------------


def df_dumps(value: Any) -> str:
"""Serialize *value* to a JSON string.

In **loose mode** (default) this is equivalent to
``json.dumps(value, default=_serialize_custom_object)``: nested
custom objects are wrapped recursively in the
``{"__class__", "__module__", "__data__"}`` envelope.

In **strict mode** (``AZURE_FUNCTIONS_DURABLE_STRICT_TYPING`` set
to ``1``, ``true`` or ``yes``) only the top-level custom object is
wrapped; its ``__data__`` payload is serialized as plain JSON
without a ``default=`` hook. ``to_json()`` must therefore return
a value that is natively JSON-serializable, and ``TypeError`` is
raised if any nested value is not.
"""
if _is_strict_mode():
if hasattr(value, "to_json"):
envelope = _serialize_custom_object(value)
return json.dumps(envelope)
# Primitive / plain-JSON value -- serialize without default=.
return json.dumps(value)
return json.dumps(value, default=_serialize_custom_object)


def df_loads(s: str, expected_type: Optional[type] = None) -> Any:
"""Deserialize a JSON string, optionally validating against *expected_type*.

When *expected_type* is ``None``:

* **Loose mode** (default) runs
``json.loads(s, object_hook=_deserialize_custom_object)``. Custom
objects whose declaring module is already present in
``sys.modules`` are reconstructed; otherwise ``ValueError`` is
raised.
* **Strict mode** parses without an ``object_hook``. A legacy
custom-object envelope at the top level raises ``TypeError`` --
the caller must supply ``expected_type`` to deserialize custom
objects in strict mode.

When *expected_type* is provided the raw JSON is parsed first
(without an ``object_hook``) so the payload can be inspected before
any class lookup. On a class/module mismatch loose mode logs a
warning and strict mode raises ``TypeError``. In loose mode the
legacy ``object_hook`` path then runs (so nested custom objects are
also reconstructed); in strict mode the matching custom-object
payload is reconstructed by calling
``expected_type.from_json(raw["__data__"])`` directly.
"""
if expected_type is not None:
return _loads_with_expected_type(s, expected_type)

_notify_no_expected_type()

if _is_strict_mode():
return _loads_strict_no_type(s)

return json.loads(s, object_hook=_deserialize_custom_object)


def _get_serialize_default() -> Optional[Callable]:
"""Return the ``default`` callback for ``json.dumps``.

Intended for call sites that build their own ``json.dumps``
invocation (e.g. ``OrchestratorState.to_json_string``) and want to
honour the active typing mode. Returns ``_serialize_custom_object``
in loose mode and ``None`` in strict mode.
"""
if _is_strict_mode():
return None
return _serialize_custom_object


def _loads_strict_no_type(s: str) -> Any:
"""Strict-mode deserialization when no *expected_type* is supplied.

Parses *s* without an ``object_hook``. Returns the parsed value
unchanged for primitive / plain-JSON payloads; raises ``TypeError``
if the top-level value is a legacy custom-object envelope.
"""
raw = json.loads(s)
if _is_legacy_custom_dict(raw):
raise TypeError(
"df_loads: strict mode requires expected_type to "
"deserialize custom-object payloads, but none was provided. "
f"Payload declares {raw['__module__']}.{raw['__class__']}."
)
return raw


def _is_legacy_custom_dict(d: Any) -> bool:
"""Return True if *d* is a dict with legacy custom-object markers."""
return isinstance(d, dict) and _LEGACY_KEYS.issubset(d)


def _has_json_protocol(cls: type) -> bool:
"""Return True iff *cls* exposes callable ``to_json`` and ``from_json``."""
return callable(getattr(cls, "to_json", None)) and callable(
getattr(cls, "from_json", None)
)


def _is_compatible(value: Any, expected_type: type) -> bool:
"""Best-effort ``isinstance`` check that tolerates generic type hints."""
try:
return isinstance(value, expected_type)
except TypeError:
# typing constructs like List[int] aren't valid for isinstance.
return True


def _loads_with_expected_type(s: str, expected_type: type) -> Any:
"""Parse *s* and validate the result against *expected_type*.

The raw JSON is parsed without an ``object_hook`` so the payload
shape can be inspected before any class lookup. In strict mode a
matching custom-object payload is reconstructed via
``expected_type.from_json``; in loose mode the legacy
``object_hook`` path runs so nested custom objects inside
``__data__`` are also reconstructed.
"""
raw = json.loads(s)
strict = _is_strict_mode()

if _is_legacy_custom_dict(raw):
class_name = raw["__class__"]
module_name = raw["__module__"]
type_matches = (class_name == expected_type.__name__
and module_name == expected_type.__module__)

if not type_matches:
msg = (
f"df_loads: payload declares class "
f"{module_name}.{class_name} but expected "
f"{expected_type.__module__}.{expected_type.__name__}"
)
if strict:
raise TypeError(msg)
logger.warning(msg)
# Fall through to the object_hook path below.

if strict:
if not _has_json_protocol(expected_type):
raise TypeError(
f"df_loads: expected_type "
f"{expected_type.__module__}.{expected_type.__name__} "
f"does not expose from_json"
)
return expected_type.from_json(raw["__data__"])

# Loose mode -- use the object_hook path so nested custom
# objects inside __data__ are also reconstructed.
return json.loads(s, object_hook=_deserialize_custom_object)

# Primitive / plain-JSON payload -- validate the Python type.
if not _is_compatible(raw, expected_type):
msg = (
f"df_loads: deserialized value ({type(raw).__name__}) is not "
f"compatible with expected type {expected_type}"
)
if strict:
raise TypeError(msg)
logger.warning(msg)

if strict:
return raw
# Loose mode -- use the object_hook path so nested custom objects
# inside dicts/lists are reconstructed.
return json.loads(s, object_hook=_deserialize_custom_object)


class OrchestrationContext(_abc.OrchestrationContext):
"""A durable function orchestration context.

Expand Down
17 changes: 13 additions & 4 deletions azure/functions/durable_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,20 @@ def decode(cls,

# Durable functions extension always returns a string of json
# See durable functions library's call_activity_task docs
#
# Strict-mode caveat: when the AZURE_FUNCTIONS_DURABLE_STRICT_TYPING
# environment variable is set, df_loads requires an `expected_type`
# to deserialize custom-object envelopes. The worker's converter
# dispatch does not currently forward the activity function's
# parameter type annotation to `decode`, so we have nothing to
# pass here -- a strict-mode payload carrying a custom-object
# envelope will surface as TypeError below and be re-raised as
# ValueError. Plumbing `expected_type` through `InConverter.decode`
# is tracked as future work in the spec (see
# spec-functions-sdk-df-serialization.md, section 6).
if data_type in ['string', 'json']:
try:
callback = _durable_functions._deserialize_custom_object
result = json.loads(data.value, object_hook=callback)
result = _durable_functions.df_loads(data.value)
except json.JSONDecodeError:
# String failover if the content is not json serializable
result = data.value
Expand All @@ -113,8 +123,7 @@ def decode(cls,
def encode(cls, obj: typing.Any, *,
expected_type: typing.Optional[type]) -> meta.Datum:
try:
callback = _durable_functions._serialize_custom_object
result = json.dumps(obj, default=callback)
result = _durable_functions.df_dumps(obj)
except TypeError as e:
raise ValueError(
f'activity trigger output must be json serializable ({obj})') from e
Expand Down
Loading
Loading