diff --git a/changes/2996.bugfix.rst b/changes/2996.bugfix.rst new file mode 100644 index 0000000000..977dc79d0b --- /dev/null +++ b/changes/2996.bugfix.rst @@ -0,0 +1,4 @@ +Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be +consistent with the behavior of `ArrayMetadata`. + + diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index da2aa5f754..925252ccf0 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -49,7 +49,7 @@ ) from zarr.core.config import config from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata -from zarr.core.metadata.v3 import V3JsonEncoder +from zarr.core.metadata.v3 import V3JsonEncoder, _replace_special_floats from zarr.core.sync import SyncMixin, sync from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError from zarr.storage import StoreLike, StorePath @@ -334,7 +334,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: if self.zarr_format == 3: return { ZARR_JSON: prototype.buffer.from_bytes( - json.dumps(self.to_dict(), cls=V3JsonEncoder).encode() + json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode() ) } else: @@ -355,10 +355,10 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: assert isinstance(consolidated_metadata, dict) for k, v in consolidated_metadata.items(): attrs = v.pop("attributes", None) - d[f"{k}/{ZATTRS_JSON}"] = attrs + d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs) if "shape" in v: # it's an array - d[f"{k}/{ZARRAY_JSON}"] = v + d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v) else: d[f"{k}/{ZGROUP_JSON}"] = { "zarr_format": self.zarr_format, diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index c1ff2e130a..a179982e94 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -573,3 +573,37 @@ async def test_use_consolidated_false( assert len([x async for x in good.members()]) == 2 assert good.metadata.consolidated_metadata assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] + + +@pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) +async def test_consolidated_metadata_encodes_special_chars( + memory_store: Store, zarr_format: ZarrFormat, fill_value: float +): + root = await group(store=memory_store, zarr_format=zarr_format) + _child = await root.create_group("child", attributes={"test": fill_value}) + _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value) + await zarr.api.asynchronous.consolidate_metadata(memory_store) + + root = await group(store=memory_store, zarr_format=zarr_format) + root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype()) + + if zarr_format == 2: + root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"] + elif zarr_format == 3: + root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[ + "consolidated_metadata" + ]["metadata"] + + if np.isnan(fill_value): + expected_fill_value = "NaN" + elif np.isneginf(fill_value): + expected_fill_value = "-Infinity" + elif np.isinf(fill_value): + expected_fill_value = "Infinity" + + if zarr_format == 2: + assert root_metadata["child/.zattrs"]["test"] == expected_fill_value + assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value + elif zarr_format == 3: + assert root_metadata["child"]["attributes"]["test"] == expected_fill_value + assert root_metadata["time"]["fill_value"] == expected_fill_value