Skip to content

Fix nan encoding in consolidated metadata #2996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changes/2996.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be
consistent with the behavior of `ArrayMetadata`.


8 changes: 4 additions & 4 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
)
from zarr.core.config import config
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
from zarr.core.metadata.v3 import V3JsonEncoder
from zarr.core.metadata.v3 import V3JsonEncoder, _replace_special_floats
from zarr.core.sync import SyncMixin, sync
from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError
from zarr.storage import StoreLike, StorePath
Expand Down Expand Up @@ -334,7 +334,7 @@
if self.zarr_format == 3:
return {
ZARR_JSON: prototype.buffer.from_bytes(
json.dumps(self.to_dict(), cls=V3JsonEncoder).encode()
json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode()
)
}
else:
Expand All @@ -355,10 +355,10 @@
assert isinstance(consolidated_metadata, dict)
for k, v in consolidated_metadata.items():
attrs = v.pop("attributes", None)
d[f"{k}/{ZATTRS_JSON}"] = attrs
d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)

Check warning on line 358 in src/zarr/core/group.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/group.py#L358

Added line #L358 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mpiannucci was this particular change important? Given that the "special floats" logic is very narrowly scoped to array metadata, I don't think we want to apply it to the attributes.

I'm just seeing this now as I adapt #2874 to the latest changes.

if "shape" in v:
# it's an array
d[f"{k}/{ZARRAY_JSON}"] = v
d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v)

Check warning on line 361 in src/zarr/core/group.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/group.py#L361

Added line #L361 was not covered by tests
else:
d[f"{k}/{ZGROUP_JSON}"] = {
"zarr_format": self.zarr_format,
Expand Down
34 changes: 34 additions & 0 deletions tests/test_metadata/test_consolidated.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,3 +573,37 @@ async def test_use_consolidated_false(
assert len([x async for x in good.members()]) == 2
assert good.metadata.consolidated_metadata
assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"]


@pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf])
async def test_consolidated_metadata_encodes_special_chars(
memory_store: Store, zarr_format: ZarrFormat, fill_value: float
):
root = await group(store=memory_store, zarr_format=zarr_format)
_child = await root.create_group("child", attributes={"test": fill_value})
_time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value)
await zarr.api.asynchronous.consolidate_metadata(memory_store)

root = await group(store=memory_store, zarr_format=zarr_format)
root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype())

if zarr_format == 2:
root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"]
elif zarr_format == 3:
root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[
"consolidated_metadata"
]["metadata"]

if np.isnan(fill_value):
expected_fill_value = "NaN"
elif np.isneginf(fill_value):
expected_fill_value = "-Infinity"
elif np.isinf(fill_value):
expected_fill_value = "Infinity"

if zarr_format == 2:
assert root_metadata["child/.zattrs"]["test"] == expected_fill_value
assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value
elif zarr_format == 3:
assert root_metadata["child"]["attributes"]["test"] == expected_fill_value
assert root_metadata["time"]["fill_value"] == expected_fill_value