From 2d1246eb8da467cf8d0e3c3040bd53c24b0e6d98 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 17 Apr 2025 14:46:19 -0400 Subject: [PATCH 1/6] Fix nan encoding in consolidated metadata --- src/zarr/core/group.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index da2aa5f754..e799856560 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -49,7 +49,7 @@ ) from zarr.core.config import config from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata -from zarr.core.metadata.v3 import V3JsonEncoder +from zarr.core.metadata.v3 import V3JsonEncoder, _replace_special_floats from zarr.core.sync import SyncMixin, sync from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError from zarr.storage import StoreLike, StorePath @@ -355,10 +355,10 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: assert isinstance(consolidated_metadata, dict) for k, v in consolidated_metadata.items(): attrs = v.pop("attributes", None) - d[f"{k}/{ZATTRS_JSON}"] = attrs + d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs) if "shape" in v: # it's an array - d[f"{k}/{ZARRAY_JSON}"] = v + d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v) else: d[f"{k}/{ZGROUP_JSON}"] = { "zarr_format": self.zarr_format, From 4551d082d05c930408aec5d2dd11141427c8244a Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 17 Apr 2025 15:15:05 -0400 Subject: [PATCH 2/6] Fix for zarr_format=2, 3 still fails --- tests/test_metadata/test_consolidated.py | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index c1ff2e130a..d7779aa8f6 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -573,3 +573,28 @@ async def test_use_consolidated_false( assert len([x async for x in good.members()]) == 2 assert good.metadata.consolidated_metadata assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] + + +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) +async def test_consolidated_metadata_encodes_special_chars( + memory_store: Store, zarr_format: ZarrFormat, fill_value: float +): + root = await group(store=memory_store, zarr_format=zarr_format) + _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value) + await zarr.api.asynchronous.consolidate_metadata(memory_store) + + root = await group(store=memory_store, zarr_format=zarr_format) + root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype()) + + if zarr_format == 2: + root_metadata = root_buffer[".zmetadata"].to_bytes().decode("utf-8") + elif zarr_format == 3: + root_metadata = root_buffer["zarr.json"].to_bytes().decode("utf-8") + + if np.isnan(fill_value): + assert '"NaN"' in root_metadata + elif np.isneginf(fill_value): + assert '"-Infinity"' in root_metadata + elif np.isinf(fill_value): + assert '"Infinity"' in root_metadata From f37ee04209830204e2fc2396bbcdb7a4efdbbc8e Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 17 Apr 2025 15:16:57 -0400 Subject: [PATCH 3/6] Passing for zarr 3 --- src/zarr/core/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index e799856560..925252ccf0 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -334,7 +334,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: if self.zarr_format == 3: return { ZARR_JSON: prototype.buffer.from_bytes( - json.dumps(self.to_dict(), cls=V3JsonEncoder).encode() + json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode() ) } else: From 37e5d7055dd60365e699b6244b9fdc3f3f191df7 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 17 Apr 2025 15:50:24 -0400 Subject: [PATCH 4/6] Try to improve tests coverage report --- tests/test_metadata/test_consolidated.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index d7779aa8f6..d1d3ec8bb0 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -581,6 +581,7 @@ async def test_consolidated_metadata_encodes_special_chars( memory_store: Store, zarr_format: ZarrFormat, fill_value: float ): root = await group(store=memory_store, zarr_format=zarr_format) + _child = await root.create_group("child", attributes={"test": fill_value}) _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value) await zarr.api.asynchronous.consolidate_metadata(memory_store) @@ -588,13 +589,22 @@ async def test_consolidated_metadata_encodes_special_chars( root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype()) if zarr_format == 2: - root_metadata = root_buffer[".zmetadata"].to_bytes().decode("utf-8") + root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"] elif zarr_format == 3: - root_metadata = root_buffer["zarr.json"].to_bytes().decode("utf-8") + root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[ + "consolidated_metadata" + ]["metadata"] if np.isnan(fill_value): - assert '"NaN"' in root_metadata + expected_fill_value = "NaN" elif np.isneginf(fill_value): - assert '"-Infinity"' in root_metadata + expected_fill_value = "-Infinity" elif np.isinf(fill_value): - assert '"Infinity"' in root_metadata + expected_fill_value = "Infinity" + + if zarr_format == 2: + assert root_metadata["child/.zattrs"]["test"] == expected_fill_value + assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value + elif zarr_format == 3: + assert root_metadata["child"]["attributes"]["test"] == expected_fill_value + assert root_metadata["time"]["fill_value"] == expected_fill_value From 91415ac24c7a52b6f4e41a91291547cce5a4656f Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 17 Apr 2025 19:46:44 -0400 Subject: [PATCH 5/6] Update test_consolidated.py Co-authored-by: Davis Bennett --- tests/test_metadata/test_consolidated.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index d1d3ec8bb0..a179982e94 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -575,7 +575,6 @@ async def test_use_consolidated_false( assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] -@pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) async def test_consolidated_metadata_encodes_special_chars( memory_store: Store, zarr_format: ZarrFormat, fill_value: float From 681313498d1f3ca726c5febd4ebe11635670c290 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Fri, 18 Apr 2025 09:18:38 -0400 Subject: [PATCH 6/6] Update with changelog --- changes/2996.bugfix.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/2996.bugfix.rst diff --git a/changes/2996.bugfix.rst b/changes/2996.bugfix.rst new file mode 100644 index 0000000000..977dc79d0b --- /dev/null +++ b/changes/2996.bugfix.rst @@ -0,0 +1,4 @@ +Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be +consistent with the behavior of `ArrayMetadata`. + +