diff --git a/azure/functions/_durable_functions.py b/azure/functions/_durable_functions.py index aa53367..393c7b5 100644 --- a/azure/functions/_durable_functions.py +++ b/azure/functions/_durable_functions.py @@ -1,9 +1,64 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from typing import Union +import json +import logging +import os +import sys +import warnings +from typing import Any, Callable, Optional, Union + from . import _abc -from importlib import import_module + +logger = logging.getLogger("azure.functions.DurableFunctions") + +_STRICT_ENV_VAR = "AZURE_FUNCTIONS_DURABLE_STRICT_TYPING" +_TRUTHY = frozenset({"1", "true", "yes"}) +_LEGACY_KEYS = frozenset({"__class__", "__module__", "__data__"}) + +# One-shot notice flags. Each becomes True after the corresponding +# advisory has been emitted in this process; tests may reset them. +_loose_codec_notice_emitted = False +_no_expected_type_notice_emitted = False + + +def _is_strict_mode() -> bool: + return os.environ.get(_STRICT_ENV_VAR, "").strip().lower() in _TRUTHY + + +def _notify_loose_codec_used() -> None: + """Emit a one-time advisory the first time the loose-mode object_hook + path actually reconstructs a custom object in this process.""" + global _loose_codec_notice_emitted + if _loose_codec_notice_emitted or _is_strict_mode(): + return + _loose_codec_notice_emitted = True + msg = ( + "azure.functions Durable JSON codec reconstructed a custom " + "object via the loose-mode object_hook path. Set " + "AZURE_FUNCTIONS_DURABLE_STRICT_TYPING=1 and supply " + "expected_type at decode call sites to enable type-validated " + "deserialization. This message will not be repeated." + ) + logger.info(msg) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + +def _notify_no_expected_type() -> None: + """Emit a one-time advisory the first time df_loads is called in + loose mode without an expected_type in this process.""" + global _no_expected_type_notice_emitted + if _no_expected_type_notice_emitted or _is_strict_mode(): + return + _no_expected_type_notice_emitted = True + msg = ( + "azure.functions df_loads was called without expected_type. " + "Pass the destination type to enable validation and prepare " + "for strict typing (AZURE_FUNCTIONS_DURABLE_STRICT_TYPING=1). " + "This message will not be repeated." + ) + logger.info(msg) + warnings.warn(msg, DeprecationWarning, stacklevel=2) # Utilities @@ -46,33 +101,52 @@ def _serialize_custom_object(obj): def _deserialize_custom_object(obj: dict) -> object: """Deserialize a user-defined object from JSON. - Deserializes a dictionary encoding a custom object, - if it contains class metadata suggesting that it should be - decoded further. + Reconstructs a custom object from a dictionary that carries the + ``{"__class__", "__module__", "__data__"}`` envelope produced by + :func:`_serialize_custom_object`. The class is resolved by looking + up ``__module__`` in :data:`sys.modules`; modules are never imported + on demand. - Parameters: + Parameters ---------- obj: dict - Dictionary object that potentially encodes a custom class + Dictionary that potentially encodes a custom class. - Returns: - -------- + Returns + ------- object - Either the original `obj` dictionary or the custom object it encoded + Either the original ``obj`` dictionary (if it is not an + envelope) or the reconstructed custom object. - Exceptions - ---------- + Raises + ------ + ValueError + If the declared module is not present in ``sys.modules``. + AttributeError + If the declared module is loaded but does not define the + declared class. TypeError - If the decoded object does not contain a `from_json` function + If the resolved class does not expose a ``from_json`` function. """ if ("__class__" in obj) and ("__module__" in obj) and ("__data__" in obj): class_name = obj.pop("__class__") module_name = obj.pop("__module__") obj_data = obj.pop("__data__") - # Importing the clas - module = import_module(module_name) - class_ = getattr(module, class_name) + # Resolve the class from already-loaded modules; this function + # does not import modules on demand. + module = sys.modules.get(module_name) + if module is None: + raise ValueError( + f"cannot deserialize custom object: module " + f"{module_name!r} is not loaded in sys.modules" + ) + class_ = getattr(module, class_name, None) + if class_ is None: + raise AttributeError( + f"cannot deserialize custom object: class {class_name!r} " + f"not found in module {module_name!r}" + ) if not hasattr(class_, "from_json"): raise TypeError(f"class {type(obj)} does not expose a `from_json` " @@ -80,9 +154,185 @@ def _deserialize_custom_object(obj: dict) -> object: # Initialize the object using its `from_json` deserializer obj = class_.from_json(obj_data) + _notify_loose_codec_used() return obj +# --------------------------------------------------------------------------- +# Public Durable Functions JSON codec +# --------------------------------------------------------------------------- + + +def df_dumps(value: Any) -> str: + """Serialize *value* to a JSON string. + + In **loose mode** (default) this is equivalent to + ``json.dumps(value, default=_serialize_custom_object)``: nested + custom objects are wrapped recursively in the + ``{"__class__", "__module__", "__data__"}`` envelope. + + In **strict mode** (``AZURE_FUNCTIONS_DURABLE_STRICT_TYPING`` set + to ``1``, ``true`` or ``yes``) only the top-level custom object is + wrapped; its ``__data__`` payload is serialized as plain JSON + without a ``default=`` hook. ``to_json()`` must therefore return + a value that is natively JSON-serializable, and ``TypeError`` is + raised if any nested value is not. + """ + if _is_strict_mode(): + if hasattr(value, "to_json"): + envelope = _serialize_custom_object(value) + return json.dumps(envelope) + # Primitive / plain-JSON value -- serialize without default=. + return json.dumps(value) + return json.dumps(value, default=_serialize_custom_object) + + +def df_loads(s: str, expected_type: Optional[type] = None) -> Any: + """Deserialize a JSON string, optionally validating against *expected_type*. + + When *expected_type* is ``None``: + + * **Loose mode** (default) runs + ``json.loads(s, object_hook=_deserialize_custom_object)``. Custom + objects whose declaring module is already present in + ``sys.modules`` are reconstructed; otherwise ``ValueError`` is + raised. + * **Strict mode** parses without an ``object_hook``. A legacy + custom-object envelope at the top level raises ``TypeError`` -- + the caller must supply ``expected_type`` to deserialize custom + objects in strict mode. + + When *expected_type* is provided the raw JSON is parsed first + (without an ``object_hook``) so the payload can be inspected before + any class lookup. On a class/module mismatch loose mode logs a + warning and strict mode raises ``TypeError``. In loose mode the + legacy ``object_hook`` path then runs (so nested custom objects are + also reconstructed); in strict mode the matching custom-object + payload is reconstructed by calling + ``expected_type.from_json(raw["__data__"])`` directly. + """ + if expected_type is not None: + return _loads_with_expected_type(s, expected_type) + + _notify_no_expected_type() + + if _is_strict_mode(): + return _loads_strict_no_type(s) + + return json.loads(s, object_hook=_deserialize_custom_object) + + +def _get_serialize_default() -> Optional[Callable]: + """Return the ``default`` callback for ``json.dumps``. + + Intended for call sites that build their own ``json.dumps`` + invocation (e.g. ``OrchestratorState.to_json_string``) and want to + honour the active typing mode. Returns ``_serialize_custom_object`` + in loose mode and ``None`` in strict mode. + """ + if _is_strict_mode(): + return None + return _serialize_custom_object + + +def _loads_strict_no_type(s: str) -> Any: + """Strict-mode deserialization when no *expected_type* is supplied. + + Parses *s* without an ``object_hook``. Returns the parsed value + unchanged for primitive / plain-JSON payloads; raises ``TypeError`` + if the top-level value is a legacy custom-object envelope. + """ + raw = json.loads(s) + if _is_legacy_custom_dict(raw): + raise TypeError( + "df_loads: strict mode requires expected_type to " + "deserialize custom-object payloads, but none was provided. " + f"Payload declares {raw['__module__']}.{raw['__class__']}." + ) + return raw + + +def _is_legacy_custom_dict(d: Any) -> bool: + """Return True if *d* is a dict with legacy custom-object markers.""" + return isinstance(d, dict) and _LEGACY_KEYS.issubset(d) + + +def _has_json_protocol(cls: type) -> bool: + """Return True iff *cls* exposes callable ``to_json`` and ``from_json``.""" + return callable(getattr(cls, "to_json", None)) and callable( + getattr(cls, "from_json", None) + ) + + +def _is_compatible(value: Any, expected_type: type) -> bool: + """Best-effort ``isinstance`` check that tolerates generic type hints.""" + try: + return isinstance(value, expected_type) + except TypeError: + # typing constructs like List[int] aren't valid for isinstance. + return True + + +def _loads_with_expected_type(s: str, expected_type: type) -> Any: + """Parse *s* and validate the result against *expected_type*. + + The raw JSON is parsed without an ``object_hook`` so the payload + shape can be inspected before any class lookup. In strict mode a + matching custom-object payload is reconstructed via + ``expected_type.from_json``; in loose mode the legacy + ``object_hook`` path runs so nested custom objects inside + ``__data__`` are also reconstructed. + """ + raw = json.loads(s) + strict = _is_strict_mode() + + if _is_legacy_custom_dict(raw): + class_name = raw["__class__"] + module_name = raw["__module__"] + type_matches = (class_name == expected_type.__name__ + and module_name == expected_type.__module__) + + if not type_matches: + msg = ( + f"df_loads: payload declares class " + f"{module_name}.{class_name} but expected " + f"{expected_type.__module__}.{expected_type.__name__}" + ) + if strict: + raise TypeError(msg) + logger.warning(msg) + # Fall through to the object_hook path below. + + if strict: + if not _has_json_protocol(expected_type): + raise TypeError( + f"df_loads: expected_type " + f"{expected_type.__module__}.{expected_type.__name__} " + f"does not expose from_json" + ) + return expected_type.from_json(raw["__data__"]) + + # Loose mode -- use the object_hook path so nested custom + # objects inside __data__ are also reconstructed. + return json.loads(s, object_hook=_deserialize_custom_object) + + # Primitive / plain-JSON payload -- validate the Python type. + if not _is_compatible(raw, expected_type): + msg = ( + f"df_loads: deserialized value ({type(raw).__name__}) is not " + f"compatible with expected type {expected_type}" + ) + if strict: + raise TypeError(msg) + logger.warning(msg) + + if strict: + return raw + # Loose mode -- use the object_hook path so nested custom objects + # inside dicts/lists are reconstructed. + return json.loads(s, object_hook=_deserialize_custom_object) + + class OrchestrationContext(_abc.OrchestrationContext): """A durable function orchestration context. diff --git a/azure/functions/durable_functions.py b/azure/functions/durable_functions.py index 0ad861c..b7ac47e 100644 --- a/azure/functions/durable_functions.py +++ b/azure/functions/durable_functions.py @@ -92,10 +92,20 @@ def decode(cls, # Durable functions extension always returns a string of json # See durable functions library's call_activity_task docs + # + # Strict-mode caveat: when the AZURE_FUNCTIONS_DURABLE_STRICT_TYPING + # environment variable is set, df_loads requires an `expected_type` + # to deserialize custom-object envelopes. The worker's converter + # dispatch does not currently forward the activity function's + # parameter type annotation to `decode`, so we have nothing to + # pass here -- a strict-mode payload carrying a custom-object + # envelope will surface as TypeError below and be re-raised as + # ValueError. Plumbing `expected_type` through `InConverter.decode` + # is tracked as future work in the spec (see + # spec-functions-sdk-df-serialization.md, section 6). if data_type in ['string', 'json']: try: - callback = _durable_functions._deserialize_custom_object - result = json.loads(data.value, object_hook=callback) + result = _durable_functions.df_loads(data.value) except json.JSONDecodeError: # String failover if the content is not json serializable result = data.value @@ -113,8 +123,7 @@ def decode(cls, def encode(cls, obj: typing.Any, *, expected_type: typing.Optional[type]) -> meta.Datum: try: - callback = _durable_functions._serialize_custom_object - result = json.dumps(obj, default=callback) + result = _durable_functions.df_dumps(obj) except TypeError as e: raise ValueError( f'activity trigger output must be json serializable ({obj})') from e diff --git a/spec-functions-sdk-df-serialization.md b/spec-functions-sdk-df-serialization.md new file mode 100644 index 0000000..6c059db --- /dev/null +++ b/spec-functions-sdk-df-serialization.md @@ -0,0 +1,347 @@ +# Feature Spec: Centralized Durable Functions Serialization in azure-functions-python-library + +**Status:** Draft +**Author:** (Durable Functions team) +**Target repo:** `Azure/azure-functions-python-library` +**Target files:** `azure/functions/_durable_functions.py`, `azure/functions/durable_functions.py` +**Upstream consumer:** `Azure/azure-functions-durable-python` (will drop its local `df_serialization.py` and import from here) + +--- + +## 1. Motivation + +The `_serialize_custom_object` / `_deserialize_custom_object` pair in +`azure/functions/_durable_functions.py` is the canonical codec for Durable +Functions custom-object payloads. Today every call site (the Durable SDK's +action classes, context objects, entity state, **and** the activity trigger +converters in `azure/functions/durable_functions.py`) independently calls +`json.dumps(..., default=_serialize_custom_object)` / +`json.loads(..., object_hook=_deserialize_custom_object)`. + +Problems: + +1. **No type validation.** `_deserialize_custom_object` unconditionally calls + `importlib.import_module(module_name)` on whatever `__module__` string is in + the payload — there is no check that the declared class matches what the + caller expects. + +2. **Security concern.** An attacker-controlled payload can embed arbitrary + `__module__` / `__class__` values, causing `import_module` to load any + installed module and call its `from_json`. This is especially relevant for + `ActivityTriggerConverter.decode` and `LegacyActivityTriggerConverter.decode` + which deserialize inbound trigger data with no gating. + +3. **No single entry point.** Serialization logic is scattered; adding + cross-cutting behavior (logging, validation, strict mode) requires touching + every call site. + +## 2. Proposed API + +Add two public functions to `azure/functions/_durable_functions.py`: + +```python +def df_dumps(value: Any) -> str: + """Serialize *value* to a JSON string using the Durable Functions convention. + + In loose mode (default), equivalent to + ``json.dumps(value, default=_serialize_custom_object)``. + + In strict mode, the top-level custom object is wrapped in the + legacy envelope but its ``to_json()`` output is serialized as + plain JSON (no ``default=`` hook). Primitives and plain + containers are also serialized without ``default=``. + A ``TypeError`` is raised if any nested value is not natively + JSON-serializable. + """ + +def df_loads( + s: str, + expected_type: Optional[type] = None, +) -> Any: + """Deserialize a JSON string, optionally validating against *expected_type*. + + When *expected_type* is None, behaves identically to + ``json.loads(s, object_hook=_deserialize_custom_object)``. + + When *expected_type* is provided, the raw JSON is parsed first + (without ``object_hook``) so the payload can be inspected before + ``import_module`` fires. Behavior then depends on the typing mode: + + * **Loose mode** (default) — logs a warning on type mismatch, then + falls through to the legacy ``object_hook`` path. + * **Strict mode** — raises ``TypeError`` on mismatch. For custom- + object payloads, calls ``expected_type.from_json(raw["__data__"])`` + directly without ``import_module``. Opted in via the + ``AZURE_FUNCTIONS_DURABLE_STRICT_TYPING`` environment variable. + """ +``` + +### 2.1 Wire format — Loose mode + +**No change.** `df_dumps` produces the same JSON that +`json.dumps(value, default=_serialize_custom_object)` produces today: +- Builtins → plain JSON +- Custom objects with `to_json` → `{"__class__": ..., "__module__": ..., "__data__": ...}` +- Nested custom objects are recursively wrapped via the `default=` hook + +### 2.1.1 Wire format — Strict mode + +`df_dumps` in strict mode: +- Top-level custom objects with `to_json` → same `{"__class__", "__module__", "__data__"}` envelope +- But `__data__` is the **plain JSON** output of `to_json()` — serialized **without** `default=_serialize_custom_object` +- Primitives and plain containers → serialized without `default=` +- Any nested value that is not natively JSON-serializable → `TypeError` at encode time + +This means `to_json()` must produce a value that `json.dumps` can handle +natively. Nested custom objects must be serialized explicitly inside +`to_json()` (e.g., call `Hat.to_json(self.hat)` rather than returning +`self.hat` directly). This is a **deliberate breaking change** for strict +mode — it ensures no `__module__` strings reach storage at nested levels, +eliminating the `import_module` attack surface entirely. + +### 2.2 `df_loads` without `expected_type` — Loose mode + +Behaves identically to `json.loads(s, object_hook=_deserialize_custom_object)`. +This is the backward-compatible path for call sites that have no type info. + +### 2.2.1 `df_loads` without `expected_type` — Strict mode + +Parses without `object_hook` so `import_module` is **never** called. If the +top-level value is a legacy custom-object dict, raises `TypeError` — the caller +must supply an `expected_type` to deserialize custom objects in strict mode. +Primitive / plain-JSON payloads are returned as-is (no security risk, no +`import_module` involved). + +This ensures that enabling strict mode surfaces every untyped call site as a +loud failure rather than silently falling through to `import_module`. + +### 2.3 `df_loads` with `expected_type` — Loose mode (default) + +1. Parse `s` with plain `json.loads(s)` (no `object_hook`) → `raw`. +2. If `raw` is a legacy custom-object dict (`{"__class__", "__module__", "__data__"}` ⊆ keys): + a. Compare `raw["__class__"]` / `raw["__module__"]` against `expected_type.__name__` / `expected_type.__module__`. + b. On mismatch → `logger.warning(...)`. + c. Fall through to `json.loads(s, object_hook=_deserialize_custom_object)` (legacy behavior preserved). +3. If `raw` is a primitive/plain-JSON value: + a. Best-effort `isinstance(raw, expected_type)` check (tolerate `TypeError` for `typing` generics). + b. On mismatch → `logger.warning(...)`. + c. Fall through to `json.loads(s, object_hook=_deserialize_custom_object)` so nested custom objects in dicts/lists are still reconstructed. + +### 2.4 `df_loads` with `expected_type` — Strict mode + +Opted in by setting `AZURE_FUNCTIONS_DURABLE_STRICT_TYPING` to `1`, `true`, or `yes`. + +1. Parse `s` with plain `json.loads(s)` (no `object_hook`) → `raw`. +2. If `raw` is a legacy custom-object dict: + a. Compare class/module as above. + b. On mismatch → `raise TypeError(...)`. + c. Verify `expected_type` has callable `from_json`; if not → `raise TypeError(...)`. + d. Return `expected_type.from_json(raw["__data__"])` — **`import_module` is never called**. + e. Because `df_dumps` in strict mode produces plain-JSON `__data__` (no + nested envelopes), `from_json` receives clean data. If consuming + legacy (loose-encoded) payloads, nested `{"__class__", ...}` dicts + may still appear — `from_json` should handle both shapes. +3. If `raw` is a primitive/plain-JSON value: + a. `isinstance` check as above. + b. On mismatch → `raise TypeError(...)`. + c. Return `raw` directly (no `object_hook` pass). + +### 2.5 Environment variable + +| Variable | Values | Default | +|---|---|---| +| `AZURE_FUNCTIONS_DURABLE_STRICT_TYPING` | `1`, `true`, `yes` (case-insensitive, stripped) | unset = loose mode | + +This is the same env var already used by the Durable SDK's interim implementation. + +## 3. Internal helpers to expose + +The Durable SDK currently imports `_serialize_custom_object` directly for +`OrchestratorState.to_json_string` (which builds its own `json.dumps` call). +To avoid reaching into private names, also expose: + +```python +def _get_serialize_default() -> Callable: + """Return the ``default`` callback for ``json.dumps``. + + For use in call sites that build their own ``json.dumps`` invocation + (e.g. ``OrchestratorState.to_json_string``). + """ + return _serialize_custom_object +``` + +Alternatively, if the preference is to keep the public surface minimal, the +Durable SDK can continue importing `_serialize_custom_object` directly — it +already does so today. + +## 4. Converter changes + +### 4.1 `ActivityTriggerConverter.decode` and `LegacyActivityTriggerConverter.decode` + +Both converters currently do: + +```python +callback = _durable_functions._deserialize_custom_object +result = json.loads(data.value, object_hook=callback) +``` + +Change to: + +```python +result = _durable_functions.df_loads(data.value) +``` + +This is behavior-identical in the default (no `expected_type`) case. A future +enhancement could pass the activity function's input type annotation as +`expected_type` if the converter framework makes it available (see §6). + +### 4.2 `ActivityTriggerConverter.encode` and `LegacyActivityTriggerConverter.encode` + +Both converters currently do: + +```python +callback = _durable_functions._serialize_custom_object +result = json.dumps(obj, default=callback) +``` + +Change to: + +```python +result = _durable_functions.df_dumps(obj) +``` + +### 4.3 Error handling + +The converters' existing `try/except json.JSONDecodeError` and +`try/except TypeError` blocks remain unchanged — they wrap the `df_loads` / +`df_dumps` calls exactly as they wrap the current `json.loads` / `json.dumps` +calls. + +## 5. Implementation guidance + +### 5.1 Placement + +All new code goes in `azure/functions/_durable_functions.py`, next to the +existing `_serialize_custom_object` / `_deserialize_custom_object` functions. +The existing functions remain for backward compatibility (they are still called +internally by `df_loads` in loose mode). + +### 5.2 Logging + +Use `logging.getLogger("azure.functions._durable_functions")`. + +### 5.3 Reference implementation + +The Durable SDK's interim implementation is at: + +``` +azure-functions-durable-python/azure/durable_functions/models/utils/df_serialization.py +``` + +(branch: the PR that adds this spec) + +That file is ~180 lines and contains the complete logic for `df_dumps`, +`df_loads`, `_loads_with_expected_type`, `_is_strict_mode`, `_is_legacy_custom_dict`, +`_has_json_protocol`, and `_is_compatible`. The implementation should be +moved here essentially verbatim, with the only difference being that +`_serialize_custom_object` and `_deserialize_custom_object` are local +rather than imported. + +### 5.4 Strict-mode serialization contract + +In strict mode, the `to_json` / `from_json` contract is symmetric: + +> **`to_json()`** must return a value that is natively JSON-serializable — +> dicts, lists, strings, numbers, bools, None. Nested custom objects must +> be serialized explicitly (e.g., call `Hat.to_json(self.hat)`). +> +> **`from_json(data)`** receives exactly what `to_json()` produced — plain +> JSON data with no `{"__class__", "__module__", "__data__"}` markers at +> any nesting level. Reconstruct nested objects using their `from_json`. + +Example: + +```python +class Order: + @staticmethod + def to_json(obj): + return { + "item": obj.item, + "hat": Hat.to_json(obj.hat), # explicit — not obj.hat + } + + @staticmethod + def from_json(data): + return Order( + item=data["item"], + hat=Hat.from_json(data["hat"]), # symmetric + ) +``` + +If the application may also receive legacy (loose-encoded) payloads during +a rollout, `from_json` can check for both shapes: + +```python + @staticmethod + def from_json(data): + hat_data = data["hat"] + if isinstance(hat_data, Hat): + hat = hat_data # loose mode: object_hook already fired + else: + hat = Hat.from_json(hat_data) # strict mode: plain dict + return Order(item=data["item"], hat=hat) +``` + +## 6. Future work (out of scope for this PR) + +- **Pass `expected_type` into converter `decode`:** The `InConverter.decode` + interface currently only receives `data` and `trigger_metadata`. Adding an + optional `expected_type` kwarg (sourced from the function's parameter type + annotation) would let `ActivityTriggerConverter.decode` call + `df_loads(data.value, expected_type=pytype)` — closing the last + unprotected `import_module` path. This requires a change to the worker's + converter dispatch in `azure-functions-python-worker`. + +- **Deprecate direct use of `_serialize_custom_object` / + `_deserialize_custom_object`:** Once `df_dumps` / `df_loads` are available, + the underscore-prefixed functions become internal implementation details. + +## 7. Testing + +### 7.1 Unit tests to add (in the functions SDK repo) + +1. **`df_dumps` round-trips** — primitives, custom objects with `to_json`, nested structures. +2. **`df_loads` without expected_type (loose)** — identical to legacy `json.loads(s, object_hook=...)`. +3. **`df_loads` without expected_type (strict), primitive payload** — returns raw value, no `import_module`. +4. **`df_loads` without expected_type (strict), custom-object payload** — raises `TypeError`. +3. **`df_loads` loose mode with matching type** — no warning, correct object returned. +4. **`df_loads` loose mode with mismatched type** — warning logged, legacy path still runs. +5. **`df_loads` strict mode with matching type** — `from_json` called directly, `import_module` never called. +6. **`df_loads` strict mode with mismatched type** — `TypeError` raised. +7. **`df_loads` strict mode, type lacks `from_json`** — `TypeError` raised. +8. **`df_loads` with primitive payload and expected_type** — isinstance validation. +9. **`df_loads` with `typing` generics as expected_type** — no crash (isinstance tolerance). +10. **Converter integration** — `ActivityTriggerConverter.decode` / `.encode` use `df_loads` / `df_dumps`. + +### 7.2 Existing tests in the Durable SDK + +The Durable SDK has 101 round-trip / validation tests in +`tests/utils/test_df_serialization.py` that exercise the exact same logic. +These can serve as a reference / be ported. + +## 8. Migration plan + +1. **This PR (functions SDK):** Add `df_dumps`, `df_loads` to `_durable_functions.py`. + Update the four converter methods to use them. Ship as a new patch/minor. + +2. **Same PR (Durable SDK) — updated after functions SDK ships:** Remove + `azure/durable_functions/models/utils/df_serialization.py`. Change all + imports from `.utils.df_serialization` to + `azure.functions._durable_functions`. Add `azure-functions>=` + to `install_requires`. This is the same in-progress Durable SDK PR — it + will be rebased to consume the functions SDK's definitive implementation + before merging. + +3. **Future (worker):** Extend `InConverter.decode` to accept `expected_type`, + enabling `ActivityTriggerConverter` to pass the function's input annotation + through to `df_loads`. diff --git a/tests/test_durable_functions_codec.py b/tests/test_durable_functions_codec.py new file mode 100644 index 0000000..dbff96a --- /dev/null +++ b/tests/test_durable_functions_codec.py @@ -0,0 +1,703 @@ +"""Comprehensive round-trip and validation tests for the Durable Functions codec. + +Every data shape is tested in three configurations: + 1. No expected_type (legacy object_hook path) + 2. Loose mode + expected_type (warn on mismatch, legacy deserialize) + 3. Strict mode + expected_type (raise on mismatch, from_json directly) + +Ported from azure-functions-durable-python's df_serialization test suite. +""" + +import json +import logging + +import pytest + +from azure.functions import _durable_functions as df_serialization +from azure.functions._durable_functions import ( + _STRICT_ENV_VAR, + _get_serialize_default, + df_dumps, + df_loads, +) + + +# --------------------------------------------------------------------------- +# Helper classes +# --------------------------------------------------------------------------- + +class PlainPerson: + """Simple class: to_json returns a dict, from_json accepts a dict.""" + + def __init__(self, name: str, age: int): + self.name = name + self.age = age + + @staticmethod + def to_json(obj): + return {"name": obj.name, "age": obj.age} + + @staticmethod + def from_json(data): + return PlainPerson(data["name"], data["age"]) + + def __eq__(self, other): + return (isinstance(other, PlainPerson) + and self.name == other.name and self.age == other.age) + + +class ScalarPerson: + """to_json returns a scalar (str), not a dict.""" + + def __init__(self, name: str): + self.name = name + + @staticmethod + def to_json(obj): + return obj.name + + @staticmethod + def from_json(data): + return ScalarPerson(data) + + def __eq__(self, other): + return isinstance(other, ScalarPerson) and self.name == other.name + + +class Hat: + """Leaf object for nesting tests.""" + + def __init__(self, color: str): + self.color = color + + @staticmethod + def to_json(obj): + return {"color": obj.color} + + @staticmethod + def from_json(data): + return Hat(data["color"]) + + def __eq__(self, other): + return isinstance(other, Hat) and self.color == other.color + + +class NaiveOrder: + """Nested object whose from_json expects pre-constructed Hat instances. + + This relies on the bottom-up object_hook behavior -- from_json receives + a Hat instance at data["hat"], not a raw dict. Works in loose mode but + fails in strict mode because strict skips object_hook. + """ + + def __init__(self, item: str, hat: Hat): + self.item = item + self.hat = hat + + @staticmethod + def to_json(obj): + return {"item": obj.item, "hat": obj.hat} + + @staticmethod + def from_json(data): + # Assumes data["hat"] is already a Hat instance (object_hook fired) + return NaiveOrder(data["item"], data["hat"]) + + def __eq__(self, other): + return (isinstance(other, NaiveOrder) + and self.item == other.item and self.hat == other.hat) + + +class SmartOrder: + """Nested object with strict-mode-compatible to_json / from_json. + + to_json produces plain JSON (calls Hat.to_json explicitly), so the + result is natively JSON-serializable without ``default=``. from_json + handles both the strict-mode shape (plain dict from to_json) and + the loose-mode shape (pre-constructed Hat or raw legacy dict). + """ + + def __init__(self, item: str, hat: Hat): + self.item = item + self.hat = hat + + @staticmethod + def to_json(obj): + return {"item": obj.item, "hat": Hat.to_json(obj.hat)} + + @staticmethod + def from_json(data): + hat_data = data["hat"] + if isinstance(hat_data, Hat): + # Loose mode: object_hook already constructed the Hat + hat = hat_data + else: + # Strict mode or plain dict: reconstruct from to_json output + hat = Hat.from_json(hat_data) + return SmartOrder(data["item"], hat) + + def __eq__(self, other): + return (isinstance(other, SmartOrder) + and self.item == other.item and self.hat == other.hat) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(autouse=True) +def _reset_one_shot_notices(): + """Reset module-level one-shot notice flags around every test. + + df_serialization fires INFO + DeprecationWarning at most once per + process for (a) loose-mode reconstruction and (b) df_loads called + without expected_type. Without this reset the parametrized sweeps + below would only see the first emission. + """ + df_serialization._loose_codec_notice_emitted = False + df_serialization._no_expected_type_notice_emitted = False + yield + df_serialization._loose_codec_notice_emitted = False + df_serialization._no_expected_type_notice_emitted = False + + +@pytest.fixture +def strict(monkeypatch): + """Enable strict typing mode for the duration of a test.""" + monkeypatch.setenv(_STRICT_ENV_VAR, "1") + + +@pytest.fixture +def loose(monkeypatch): + """Explicitly disable strict typing mode.""" + monkeypatch.delenv(_STRICT_ENV_VAR, raising=False) + + +# =================================================================== +# 1. PRIMITIVES (str, int, float, bool, None, list, dict) +# =================================================================== + +@pytest.mark.parametrize("value", [ + None, + True, + False, + 0, + -1, + 42, + 3.14, + "", + "hello", + [], + [1, 2, 3], + [True, None, "mixed"], + {}, + {"a": 1, "b": [1, 2]}, + {"nested": {"deep": {"value": 7}}}, +]) +class TestPrimitiveRoundTrips: + """Primitives must round-trip identically in all three paths.""" + + def test_no_expected_type(self, value): + assert df_loads(df_dumps(value)) == value + + def test_loose_with_matching_type(self, value, loose, caplog): + et = type(value) if value is not None else type(None) + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + result = df_loads(df_dumps(value), expected_type=et) + assert result == value + + def test_strict_with_matching_type(self, value, strict): + et = type(value) if value is not None else type(None) + result = df_loads(df_dumps(value), expected_type=et) + assert result == value + + +# =================================================================== +# 2. SIMPLE CUSTOM OBJECTS (dict-returning to_json) +# =================================================================== + +class TestSimpleObject: + + def test_no_expected_type(self): + obj = PlainPerson("andy", 99) + decoded = df_loads(df_dumps(obj)) + assert decoded == obj + + def test_loose_matching_type(self, loose): + obj = PlainPerson("andy", 99) + decoded = df_loads(df_dumps(obj), expected_type=PlainPerson) + assert decoded == obj + + def test_strict_matching_type(self, strict): + obj = PlainPerson("andy", 99) + decoded = df_loads(df_dumps(obj), expected_type=PlainPerson) + assert decoded == obj + + def test_loose_mismatched_type_warns(self, loose, caplog): + encoded = df_dumps(PlainPerson("a", 1)) + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + decoded = df_loads(encoded, expected_type=ScalarPerson) + # Loose mode: legacy decoder uses the payload's class + assert isinstance(decoded, PlainPerson) + assert any("payload declares class" in r.message for r in caplog.records) + + def test_strict_mismatched_type_raises(self, strict): + encoded = df_dumps(PlainPerson("a", 1)) + with pytest.raises(TypeError, match="payload declares class"): + df_loads(encoded, expected_type=ScalarPerson) + + +# =================================================================== +# 3. SCALAR-RETURNING to_json +# =================================================================== + +class TestScalarToJson: + + def test_no_expected_type(self): + obj = ScalarPerson("andy") + decoded = df_loads(df_dumps(obj)) + assert decoded == obj + + def test_loose_matching_type(self, loose): + obj = ScalarPerson("andy") + decoded = df_loads(df_dumps(obj), expected_type=ScalarPerson) + assert decoded == obj + + def test_strict_matching_type(self, strict): + obj = ScalarPerson("andy") + decoded = df_loads(df_dumps(obj), expected_type=ScalarPerson) + assert decoded == obj + + def test_loose_mismatched_type_warns(self, loose, caplog): + encoded = df_dumps(ScalarPerson("andy")) + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + decoded = df_loads(encoded, expected_type=PlainPerson) + # Loose mode: legacy decoder still uses the payload's class. + assert isinstance(decoded, ScalarPerson) + assert any("payload declares class" in r.message for r in caplog.records) + + def test_strict_mismatched_type_raises(self, strict): + encoded = df_dumps(ScalarPerson("andy")) + with pytest.raises(TypeError, match="payload declares class"): + df_loads(encoded, expected_type=PlainPerson) + + +# =================================================================== +# 4. DICT WITH OBJECT PROPERTIES e.g. {"person": PlainPerson(...)} +# =================================================================== + +class TestDictWithObjectProperty: + """A plain dict containing a custom object as a value.""" + + def _make_payload(self): + return {"person": PlainPerson("a", 1), "count": 7} + + def test_no_expected_type(self): + """Loose path: object_hook reconstructs nested objects.""" + decoded = df_loads(df_dumps(self._make_payload())) + assert decoded["count"] == 7 + assert isinstance(decoded["person"], PlainPerson) + assert decoded["person"].name == "a" + + def test_loose_expected_dict(self, loose, caplog): + """Loose path + expected_type=dict: works, inner objects reconstructed.""" + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + decoded = df_loads(df_dumps(self._make_payload()), expected_type=dict) + assert isinstance(decoded["person"], PlainPerson) + # No warning -- top-level is a dict matching expected_type + assert not any("not compatible" in r.message for r in caplog.records) + + def test_strict_encode_fails_for_nested_custom_objects(self, strict): + """Strict mode: a plain dict containing a custom object cannot be + encoded -- json.dumps runs without default= so PlainPerson raises + TypeError.""" + with pytest.raises(TypeError): + df_dumps(self._make_payload()) + + +# =================================================================== +# 5. NESTED OBJECTS -- "naive" from_json (expects pre-constructed) +# =================================================================== + +class TestNaiveNestedObject: + """NaiveOrder.from_json expects Hat to already be a Hat instance.""" + + def _make(self): + return NaiveOrder("widget", Hat("red")) + + def test_no_expected_type(self): + """Legacy path: object_hook fires bottom-up, Hat constructed first.""" + decoded = df_loads(df_dumps(self._make())) + assert isinstance(decoded, NaiveOrder) + assert isinstance(decoded.hat, Hat) + assert decoded.hat.color == "red" + + def test_loose_matching_type(self, loose): + """Loose + expected_type: legacy path still fires, nested works.""" + decoded = df_loads(df_dumps(self._make()), expected_type=NaiveOrder) + assert decoded == self._make() + + def test_strict_encode_fails_for_naive_to_json(self, strict): + """Strict mode: NaiveOrder.to_json returns a Hat instance, which + is not natively JSON-serializable. df_dumps should fail at encode.""" + with pytest.raises(TypeError): + df_dumps(self._make()) + + +# =================================================================== +# 6. NESTED OBJECTS -- "smart" from_json (handles raw dicts) +# =================================================================== + +class TestSmartNestedObject: + """SmartOrder.from_json manually calls Hat.from_json when needed.""" + + def _make(self): + return SmartOrder("gadget", Hat("blue")) + + def test_no_expected_type(self): + decoded = df_loads(df_dumps(self._make())) + assert isinstance(decoded, SmartOrder) + assert decoded.hat == Hat("blue") + + def test_loose_matching_type(self, loose): + decoded = df_loads(df_dumps(self._make()), expected_type=SmartOrder) + assert decoded == self._make() + + def test_strict_matching_type(self, strict): + """Strict mode works: SmartOrder.from_json handles the raw dict.""" + decoded = df_loads(df_dumps(self._make()), expected_type=SmartOrder) + assert decoded == self._make() + assert isinstance(decoded.hat, Hat) + assert decoded.hat.color == "blue" + + +# =================================================================== +# 7. LIST OF OBJECTS +# =================================================================== + +class TestListOfObjects: + + def _make(self): + return [PlainPerson("a", 1), PlainPerson("b", 2)] + + def test_no_expected_type(self): + decoded = df_loads(df_dumps(self._make())) + assert len(decoded) == 2 + assert all(isinstance(p, PlainPerson) for p in decoded) + + def test_loose_expected_list(self, loose): + decoded = df_loads(df_dumps(self._make()), expected_type=list) + assert len(decoded) == 2 + assert all(isinstance(p, PlainPerson) for p in decoded) + + def test_strict_encode_fails_for_nested_custom_objects(self, strict): + """Strict mode: a list of custom objects cannot be encoded -- the + list itself doesn't have to_json, and json.dumps runs without + default= so PlainPerson raises TypeError.""" + with pytest.raises(TypeError): + df_dumps(self._make()) + + +# =================================================================== +# 8. PRIMITIVE TYPE MISMATCHES +# =================================================================== + +class TestPrimitiveTypeMismatch: + + def test_loose_warns(self, loose, caplog): + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + result = df_loads(df_dumps("hello"), expected_type=int) + assert result == "hello" + assert any("not compatible" in r.message for r in caplog.records) + + def test_strict_raises(self, strict): + with pytest.raises(TypeError, match="not compatible with expected type"): + df_loads(df_dumps("hello"), expected_type=int) + + def test_loose_str_expected_dict_warns(self, loose, caplog): + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + result = df_loads(df_dumps("hello"), expected_type=dict) + assert result == "hello" + assert any("not compatible" in r.message for r in caplog.records) + + def test_strict_str_expected_dict_raises(self, strict): + with pytest.raises(TypeError): + df_loads(df_dumps("hello"), expected_type=dict) + + +# =================================================================== +# 9. typing CONSTRUCTS (List[int], Optional[str], etc.) +# =================================================================== + +class TestTypingConstructs: + """Generic type hints can't be validated with isinstance -- we pass + through without error in both modes.""" + + def test_loose_list_of_int(self, loose): + from typing import List + decoded = df_loads(df_dumps([1, 2, 3]), expected_type=List[int]) + assert decoded == [1, 2, 3] + + def test_strict_list_of_int(self, strict): + from typing import List + decoded = df_loads(df_dumps([1, 2, 3]), expected_type=List[int]) + assert decoded == [1, 2, 3] + + def test_loose_optional_str(self, loose): + from typing import Optional + decoded = df_loads(df_dumps("hi"), expected_type=Optional[str]) + assert decoded == "hi" + + def test_strict_optional_str(self, strict): + from typing import Optional + decoded = df_loads(df_dumps("hi"), expected_type=Optional[str]) + assert decoded == "hi" + + +# =================================================================== +# 10. STRICT MODE ENV VAR VALUES +# =================================================================== + +class TestStrictModeEnvVar: + + @pytest.mark.parametrize("val", ["1", "true", "yes", "TRUE", "Yes", " 1 "]) + def test_truthy_values_enable_strict(self, monkeypatch, val): + monkeypatch.setenv(_STRICT_ENV_VAR, val) + with pytest.raises(TypeError): + df_loads(df_dumps("hello"), expected_type=int) + + @pytest.mark.parametrize("val", ["0", "false", "no", "", "nope"]) + def test_non_truthy_values_stay_loose(self, monkeypatch, val, caplog): + monkeypatch.setenv(_STRICT_ENV_VAR, val) + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + result = df_loads(df_dumps("hello"), expected_type=int) + assert result == "hello" + + def test_unset_is_loose(self, monkeypatch): + monkeypatch.delenv(_STRICT_ENV_VAR, raising=False) + result = df_loads(df_dumps("hello"), expected_type=int) + assert result == "hello" + + +# =================================================================== +# 10b. STRICT MODE WITHOUT expected_type +# =================================================================== + +class TestStrictNoExpectedType: + """In strict mode, df_loads without expected_type must not attempt + custom-object reconstruction.""" + + def test_primitive_returns_raw(self, strict): + assert df_loads(df_dumps(42)) == 42 + + def test_string_returns_raw(self, strict): + assert df_loads(df_dumps("hello")) == "hello" + + def test_none_returns_raw(self, strict): + assert df_loads(df_dumps(None)) is None + + def test_plain_dict_returns_raw(self, strict): + d = {"key": "value", "n": 1} + assert df_loads(df_dumps(d)) == d + + def test_plain_list_returns_raw(self, strict): + lst = [1, "two", None] + assert df_loads(df_dumps(lst)) == lst + + def test_custom_object_raises(self, strict): + s = df_dumps(PlainPerson("alice", 30)) + with pytest.raises(TypeError, match="strict mode requires expected_type"): + df_loads(s) + + def test_custom_object_error_includes_class(self, strict): + s = df_dumps(PlainPerson("alice", 30)) + with pytest.raises(TypeError, match="PlainPerson"): + df_loads(s) + + def test_loose_mode_custom_object_still_works(self, loose): + """Without strict, the legacy path runs even without expected_type.""" + p = PlainPerson("bob", 25) + result = df_loads(df_dumps(p)) + assert isinstance(result, PlainPerson) + assert result.name == "bob" + + +# =================================================================== +# 11. WIRE FORMAT VERIFICATION +# =================================================================== + +class TestWireFormat: + + def test_df_dumps_matches_legacy_json_dumps(self): + from azure.functions._durable_functions import _serialize_custom_object + value = {"key": "value", "list": [1, 2, 3]} + assert df_dumps(value) == json.dumps(value, default=_serialize_custom_object) + + def test_custom_object_produces_legacy_keys(self): + raw = json.loads(df_dumps(PlainPerson("andy", 99))) + assert raw == { + "__class__": "PlainPerson", + "__module__": __name__, + "__data__": {"name": "andy", "age": 99}, + } + + def test_scalar_to_json_produces_legacy_keys(self): + raw = json.loads(df_dumps(ScalarPerson("andy"))) + assert raw == { + "__class__": "ScalarPerson", + "__module__": __name__, + "__data__": "andy", + } + + def test_nested_object_produces_plain_json_data(self): + """SmartOrder.to_json serializes Hat explicitly, so __data__ + contains plain JSON -- no nested legacy envelope.""" + raw = json.loads(df_dumps(SmartOrder("gadget", Hat("blue")))) + assert raw["__class__"] == "SmartOrder" + assert raw["__data__"] == {"item": "gadget", "hat": {"color": "blue"}} + + +# =================================================================== +# 12. _get_serialize_default +# =================================================================== + +class TestGetSerializeDefault: + + def test_returns_callable(self): + cb = _get_serialize_default() + assert callable(cb) + + def test_produces_legacy_dict(self): + cb = _get_serialize_default() + result = cb(PlainPerson("a", 1)) + assert result == { + "__class__": "PlainPerson", + "__module__": __name__, + "__data__": {"name": "a", "age": 1}, + } + + def test_strict_returns_none(self, strict): + cb = _get_serialize_default() + assert cb is None + + +# =================================================================== +# 13. ENCODE ERRORS +# =================================================================== + +class TestEncodeErrors: + + def test_class_without_to_json(self): + class NoProtocol: + pass + with pytest.raises(TypeError): + df_dumps(NoProtocol()) + + def test_set(self): + with pytest.raises(TypeError): + df_dumps({1, 2, 3}) + + def test_bytes(self): + with pytest.raises(TypeError): + df_dumps(b"hello") + + +# =================================================================== +# 13b. STRICT-MODE ENCODE +# =================================================================== + +class TestStrictEncode: + """In strict mode, df_dumps rejects non-serializable nested values.""" + + def test_primitive(self, strict): + assert df_dumps(42) == "42" + + def test_string(self, strict): + assert df_dumps("hello") == '"hello"' + + def test_plain_dict(self, strict): + assert json.loads(df_dumps({"a": 1})) == {"a": 1} + + def test_custom_object_top_level_ok(self, strict): + """Top-level custom object is wrapped in envelope.""" + raw = json.loads(df_dumps(PlainPerson("andy", 99))) + assert raw["__class__"] == "PlainPerson" + assert raw["__data__"] == {"name": "andy", "age": 99} + + def test_strict_smart_order_data_is_plain_json(self, strict): + """SmartOrder.to_json returns plain JSON, so encoding succeeds + and __data__ contains no nested envelopes.""" + raw = json.loads(df_dumps(SmartOrder("gadget", Hat("blue")))) + assert raw["__class__"] == "SmartOrder" + assert raw["__data__"] == {"item": "gadget", "hat": {"color": "blue"}} + + def test_strict_naive_order_fails(self, strict): + """NaiveOrder.to_json returns a Hat instance -- not serializable.""" + with pytest.raises(TypeError): + df_dumps(NaiveOrder("widget", Hat("red"))) + + def test_strict_dict_with_custom_value_fails(self, strict): + """Plain dict containing a custom object -- not serializable.""" + with pytest.raises(TypeError): + df_dumps({"person": PlainPerson("a", 1)}) + + def test_strict_list_with_custom_value_fails(self, strict): + """List containing custom objects -- not serializable.""" + with pytest.raises(TypeError): + df_dumps([PlainPerson("a", 1)]) + + def test_loose_dict_with_custom_value_ok(self, loose): + """In loose mode, nested custom objects are still auto-wrapped.""" + raw = json.loads(df_dumps({"person": PlainPerson("a", 1)})) + assert raw["person"]["__class__"] == "PlainPerson" + + +# =================================================================== +# 14. EDGE CASES +# =================================================================== + +class TestEdgeCases: + + def test_bool_does_not_become_int(self): + """bool is a subclass of int -- verify it stays bool.""" + out = df_loads(df_dumps(True)) + assert out is True + assert isinstance(out, bool) + + def test_none_with_expected_type_nonetype(self, loose): + assert df_loads(df_dumps(None), expected_type=type(None)) is None + + def test_none_with_expected_type_nonetype_strict(self, strict): + assert df_loads(df_dumps(None), expected_type=type(None)) is None + + def test_empty_dict_expected_dict(self, loose): + assert df_loads(df_dumps({}), expected_type=dict) == {} + + def test_empty_list_expected_list(self, strict): + assert df_loads(df_dumps([]), expected_type=list) == [] + + def test_tuple_becomes_list(self): + """Tuples serialize as JSON arrays -- come back as lists.""" + assert df_loads(df_dumps((1, 2, 3))) == [1, 2, 3] + + def test_tuple_becomes_list_strict(self, strict): + """Same coercion in strict mode (decoded value is a list).""" + assert df_loads(df_dumps((1, 2, 3)), expected_type=list) == [1, 2, 3] + + def test_int_dict_keys_become_strings(self): + decoded = df_loads(df_dumps({1: "one", 2: "two"})) + assert decoded == {"1": "one", "2": "two"} + + def test_int_dict_keys_become_strings_strict(self, strict): + """JSON has no int-keyed objects -- coercion happens in strict too.""" + decoded = df_loads(df_dumps({1: "one", 2: "two"}), expected_type=dict) + assert decoded == {"1": "one", "2": "two"} + + def test_no_expected_type_no_per_call_warning(self, caplog): + """When expected_type is None, the per-call mismatch / declares + warnings must not fire (the one-shot advisory is separate and + emitted at INFO level, not WARNING).""" + with caplog.at_level(logging.WARNING, logger=df_serialization.__name__): + df_loads(df_dumps(PlainPerson("a", 1))) + assert not any("not compatible" in r.message for r in caplog.records) + assert not any("payload declares" in r.message for r in caplog.records) diff --git a/tests/test_durable_functions_serialization.py b/tests/test_durable_functions_serialization.py new file mode 100644 index 0000000..62f8f18 --- /dev/null +++ b/tests/test_durable_functions_serialization.py @@ -0,0 +1,461 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import typing +import unittest +import warnings +from unittest import mock + +from azure.functions import _durable_functions +from azure.functions._durable_functions import ( + df_dumps, + df_loads, + _deserialize_custom_object, + _serialize_custom_object, + _get_serialize_default, +) +from azure.functions.durable_functions import ActivityTriggerConverter +from azure.functions.meta import Datum + + +def _reset_notice_flags(): + _durable_functions._loose_codec_notice_emitted = False + _durable_functions._no_expected_type_notice_emitted = False + + +class _NoticeIsolatedTestCase(unittest.TestCase): + """Reset the one-shot notice flags before each test.""" + + def setUp(self): + _reset_notice_flags() + self.addCleanup(_reset_notice_flags) + + +# --------------------------------------------------------------------------- +# Test fixtures: simple custom classes with to_json / from_json +# --------------------------------------------------------------------------- + + +class Hat: + def __init__(self, color): + self.color = color + + def __eq__(self, other): + return isinstance(other, Hat) and self.color == other.color + + @staticmethod + def to_json(obj): + return {"color": obj.color} + + @staticmethod + def from_json(data): + return Hat(color=data["color"]) + + +class Order: + def __init__(self, item, hat): + self.item = item + self.hat = hat + + def __eq__(self, other): + return (isinstance(other, Order) + and self.item == other.item + and self.hat == other.hat) + + @staticmethod + def to_json(obj): + # Strict-mode contract: explicitly serialize nested custom objects. + return {"item": obj.item, "hat": Hat.to_json(obj.hat)} + + @staticmethod + def from_json(data): + hat_data = data["hat"] + if isinstance(hat_data, Hat): + hat = hat_data + else: + hat = Hat.from_json(hat_data) + return Order(item=data["item"], hat=hat) + + +class NoFromJson: + @staticmethod + def to_json(obj): + return {} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _strict_env(value): + return mock.patch.dict( + "os.environ", + {"AZURE_FUNCTIONS_DURABLE_STRICT_TYPING": value}, + ) + + +def _no_strict_env(): + # Ensure the env var is unset for loose-mode tests. + env = dict() + return mock.patch.dict("os.environ", env, clear=False) + + +# --------------------------------------------------------------------------- +# df_dumps +# --------------------------------------------------------------------------- + + +class TestDfDumps(_NoticeIsolatedTestCase): + + def test_loose_primitive_roundtrip(self): + for value in [None, True, 1, 1.5, "x", [1, 2], {"a": 1}]: + self.assertEqual(json.loads(df_dumps(value)), value) + + def test_loose_custom_object(self): + s = df_dumps(Hat("red")) + raw = json.loads(s) + self.assertEqual(raw["__class__"], "Hat") + self.assertEqual(raw["__module__"], Hat.__module__) + self.assertEqual(raw["__data__"], {"color": "red"}) + + def test_loose_nested_custom_via_default(self): + # Loose mode: nested custom objects are auto-wrapped via default=. + nested = {"hats": [Hat("red"), Hat("blue")]} + s = df_dumps(nested) + raw = json.loads(s) + self.assertEqual(raw["hats"][0]["__class__"], "Hat") + self.assertEqual(raw["hats"][1]["__data__"], {"color": "blue"}) + + def test_strict_top_level_custom_object(self): + with _strict_env("1"): + s = df_dumps(Order(item="x", hat=Hat("red"))) + raw = json.loads(s) + self.assertEqual(raw["__class__"], "Order") + # __data__ must be plain JSON (no nested envelope) in strict mode. + self.assertEqual(raw["__data__"], {"item": "x", "hat": {"color": "red"}}) + + def test_strict_primitive(self): + with _strict_env("yes"): + self.assertEqual(df_dumps([1, 2, 3]), "[1, 2, 3]") + + def test_strict_rejects_unencodable_nested(self): + # Strict mode does not pass default=, so a stray custom object inside + # a plain container raises TypeError immediately. + with _strict_env("true"): + with self.assertRaises(TypeError): + df_dumps({"hat": Hat("red")}) + + +# --------------------------------------------------------------------------- +# df_loads (no expected_type) +# --------------------------------------------------------------------------- + + +class TestDfLoadsNoType(_NoticeIsolatedTestCase): + + def test_loose_primitive(self): + s = json.dumps({"a": 1, "b": [2, 3]}) + self.assertEqual(df_loads(s), {"a": 1, "b": [2, 3]}) + + def test_loose_custom_object_module_loaded(self): + # Hat's module is the test module itself, which is loaded. + s = df_dumps(Hat("red")) + result = df_loads(s) + self.assertEqual(result, Hat("red")) + + def test_loose_does_not_call_import_module(self): + s = df_dumps(Hat("red")) + with mock.patch("importlib.import_module") as imp: + df_loads(s) + imp.assert_not_called() + + def test_loose_unloaded_module_raises_value_error(self): + payload = json.dumps({ + "__class__": "Whatever", + "__module__": "definitely.not.loaded.module.xyz", + "__data__": {}, + }) + with self.assertRaises(ValueError): + df_loads(payload) + + def test_loose_unknown_class_in_loaded_module_raises_attribute_error(self): + payload = json.dumps({ + "__class__": "ThisClassDoesNotExist", + "__module__": __name__, + "__data__": {}, + }) + with self.assertRaises(AttributeError): + df_loads(payload) + + def test_loose_class_without_from_json_raises_type_error(self): + payload = json.dumps({ + "__class__": "NoFromJson", + "__module__": __name__, + "__data__": {}, + }) + with self.assertRaises(TypeError): + df_loads(payload) + + def test_strict_primitive_no_type(self): + with _strict_env("1"): + self.assertEqual(df_loads('{"a": 1}'), {"a": 1}) + + def test_strict_custom_payload_no_type_raises(self): + s = df_dumps(Hat("red")) + with _strict_env("1"): + with self.assertRaises(TypeError): + df_loads(s) + + +# --------------------------------------------------------------------------- +# df_loads (with expected_type) +# --------------------------------------------------------------------------- + + +class TestDfLoadsWithType(_NoticeIsolatedTestCase): + + def test_loose_match_uses_object_hook(self): + # Loose mode preserves the legacy object_hook path so nested custom + # objects inside __data__ are also reconstructed. importlib is + # still never called because _deserialize_custom_object now uses + # sys.modules. + s = df_dumps(Hat("red")) + with mock.patch("importlib.import_module") as imp: + result = df_loads(s, expected_type=Hat) + imp.assert_not_called() + self.assertEqual(result, Hat("red")) + + def test_loose_mismatch_warns_and_falls_through(self): + # Encode a Hat but ask for an Order -- mismatch in loose mode logs + # a warning, then falls through to object_hook, which reconstructs + # Hat (its module is loaded). + s = df_dumps(Hat("red")) + with self.assertLogs("azure.functions.DurableFunctions", + level="WARNING") as cm: + result = df_loads(s, expected_type=Order) + self.assertTrue(any("payload declares" in m for m in cm.output)) + self.assertEqual(result, Hat("red")) + + def test_strict_match_uses_from_json_directly(self): + with _strict_env("1"): + s = df_dumps(Order(item="x", hat=Hat("red"))) + with mock.patch("importlib.import_module") as imp: + result = df_loads(s, expected_type=Order) + imp.assert_not_called() + self.assertEqual(result, Order(item="x", hat=Hat("red"))) + + def test_strict_mismatch_raises(self): + s = df_dumps(Hat("red")) + with _strict_env("1"): + with self.assertRaises(TypeError): + df_loads(s, expected_type=Order) + + def test_strict_type_without_from_json_raises(self): + # Build an envelope that names NoFromJson; with the matching + # expected_type strict mode should reject it. + payload = json.dumps({ + "__class__": "NoFromJson", + "__module__": __name__, + "__data__": {}, + }) + with _strict_env("1"): + with self.assertRaises(TypeError): + df_loads(payload, expected_type=NoFromJson) + + def test_primitive_type_validation_loose_mismatch_warns(self): + s = json.dumps("hello") + with self.assertLogs("azure.functions.DurableFunctions", + level="WARNING") as cm: + result = df_loads(s, expected_type=int) + self.assertTrue(any("not compatible" in m for m in cm.output)) + self.assertEqual(result, "hello") + + def test_primitive_type_validation_strict_mismatch_raises(self): + s = json.dumps("hello") + with _strict_env("1"): + with self.assertRaises(TypeError): + df_loads(s, expected_type=int) + + def test_typing_generics_do_not_crash(self): + # isinstance(value, List[int]) raises TypeError; df_loads tolerates it. + s = json.dumps([1, 2, 3]) + result = df_loads(s, expected_type=typing.List[int]) + self.assertEqual(result, [1, 2, 3]) + + +# --------------------------------------------------------------------------- +# _get_serialize_default +# --------------------------------------------------------------------------- + + +class TestGetSerializeDefault(_NoticeIsolatedTestCase): + + def test_loose_returns_serializer(self): + self.assertIs(_get_serialize_default(), _serialize_custom_object) + + def test_strict_returns_none(self): + with _strict_env("1"): + self.assertIsNone(_get_serialize_default()) + + +# --------------------------------------------------------------------------- +# _deserialize_custom_object direct +# --------------------------------------------------------------------------- + + +class TestDeserializeCustomObjectDirect(_NoticeIsolatedTestCase): + + def test_module_loaded_reconstructs(self): + result = _deserialize_custom_object({ + "__class__": "Hat", + "__module__": __name__, + "__data__": {"color": "red"}, + }) + self.assertEqual(result, Hat("red")) + + def test_module_not_loaded_raises_value_error(self): + with self.assertRaises(ValueError): + _deserialize_custom_object({ + "__class__": "Whatever", + "__module__": "definitely.not.loaded.xyz", + "__data__": {}, + }) + + def test_does_not_import_module(self): + # Ensure the symbol isn't even referenced. + with mock.patch("importlib.import_module") as imp: + _deserialize_custom_object({ + "__class__": "Hat", + "__module__": __name__, + "__data__": {"color": "blue"}, + }) + imp.assert_not_called() + + def test_non_envelope_passthrough(self): + d = {"a": 1} + self.assertEqual(_deserialize_custom_object(d), {"a": 1}) + + +# --------------------------------------------------------------------------- +# ActivityTriggerConverter integration +# --------------------------------------------------------------------------- + + +class TestActivityTriggerConverterIntegration(_NoticeIsolatedTestCase): + + def test_decode_uses_df_loads(self): + datum = Datum(type="json", value=json.dumps({"x": 1})) + with mock.patch.object(_durable_functions, "df_loads", + wraps=_durable_functions.df_loads) as spy: + ActivityTriggerConverter.decode(datum, trigger_metadata=None) + spy.assert_called_once_with(datum.value) + + def test_encode_uses_df_dumps(self): + with mock.patch.object(_durable_functions, "df_dumps", + wraps=_durable_functions.df_dumps) as spy: + ActivityTriggerConverter.encode({"x": 1}, expected_type=None) + spy.assert_called_once_with({"x": 1}) + + def test_decode_custom_object_loaded_module(self): + s = df_dumps(Hat("green")) + datum = Datum(type="json", value=s) + result = ActivityTriggerConverter.decode(datum, trigger_metadata=None) + self.assertEqual(result, Hat("green")) + + def test_decode_unloaded_module_raises_value_error(self): + payload = json.dumps({ + "__class__": "Whatever", + "__module__": "definitely.not.loaded.xyz", + "__data__": {}, + }) + datum = Datum(type="json", value=payload) + with self.assertRaises(ValueError): + ActivityTriggerConverter.decode(datum, trigger_metadata=None) + + def test_encode_unserializable_raises_value_error(self): + class NotSerializable: + pass + with self.assertRaises(ValueError): + ActivityTriggerConverter.encode(NotSerializable(), + expected_type=None) + + +# --------------------------------------------------------------------------- +# One-shot loose-mode notices +# --------------------------------------------------------------------------- + + +class TestLooseModeNotices(_NoticeIsolatedTestCase): + + def test_loose_codec_notice_fires_once(self): + s = df_dumps(Hat("red")) + with self.assertLogs("azure.functions.DurableFunctions", + level="INFO") as cm, \ + warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + df_loads(s) + df_loads(s) # second call must not emit again + df_loads(s) + self.assertEqual( + sum("loose-mode object_hook" in m for m in cm.output), 1) + self.assertEqual( + sum(issubclass(w.category, DeprecationWarning) + and "loose-mode object_hook" in str(w.message) + for w in caught), + 1, + ) + + def test_loose_codec_notice_not_emitted_for_primitive(self): + # No custom-object reconstruction -> no loose-codec notice. + with self.assertNoLogs(_durable_functions.__name__, level="INFO"): + df_loads(json.dumps({"a": 1}), expected_type=dict) + self.assertFalse( + _durable_functions._loose_codec_notice_emitted) + + def test_loose_codec_notice_suppressed_in_strict_mode(self): + s = df_dumps(Hat("red")) + with _strict_env("1"): + # In strict mode df_loads with expected_type uses from_json + # directly -- the object_hook path doesn't fire. Even if it + # did, the notice helper short-circuits in strict mode. + df_loads(s, expected_type=Hat) + self.assertFalse( + _durable_functions._loose_codec_notice_emitted) + + def test_no_expected_type_notice_fires_once(self): + s = json.dumps({"a": 1}) + with self.assertLogs("azure.functions.DurableFunctions", + level="INFO") as cm, \ + warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + df_loads(s) + df_loads(s) + df_loads(s) + self.assertEqual( + sum("without expected_type" in m for m in cm.output), 1) + self.assertEqual( + sum(issubclass(w.category, DeprecationWarning) + and "without expected_type" in str(w.message) + for w in caught), + 1, + ) + + def test_no_expected_type_notice_not_emitted_when_type_provided(self): + s = json.dumps({"a": 1}) + with self.assertNoLogs(_durable_functions.__name__, level="INFO"): + df_loads(s, expected_type=dict) + self.assertFalse( + _durable_functions._no_expected_type_notice_emitted) + + def test_no_expected_type_notice_suppressed_in_strict_mode(self): + s = json.dumps({"a": 1}) + with _strict_env("1"): + df_loads(s) + self.assertFalse( + _durable_functions._no_expected_type_notice_emitted) + + +if __name__ == "__main__": + unittest.main()