Skip to content

Commit c42377c

Browse files
authored
Add optional orjson support (#85)
1 parent a0d8c63 commit c42377c

File tree

6 files changed

+92
-49
lines changed

6 files changed

+92
-49
lines changed

docs/guide/configuration.asciidoc

+18
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,24 @@ es = Elasticsearch(
264264
)
265265
------------------------------------
266266

267+
If the `orjson` package is installed, you can use the faster ``OrjsonSerializer`` for the default mimetype (``application/json``):
268+
269+
[source,python]
270+
------------------------------------
271+
from elasticsearch import Elasticsearch, OrjsonSerializer
272+
273+
es = Elasticsearch(
274+
...,
275+
serializer=OrjsonSerializer()
276+
)
277+
------------------------------------
278+
279+
orjson is particularly fast when serializing vectors as it has native numpy support. This will be the default in a future release. Note that you can install orjson with the `orjson` extra:
280+
281+
[source,sh]
282+
--------------------------------------------
283+
$ python -m pip install elasticsearch[orjson]
284+
--------------------------------------------
267285

268286
[discrete]
269287
[[nodes]]

elasticsearch_serverless/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
)
6464
from .serializer import JSONSerializer, JsonSerializer
6565

66+
try:
67+
from .serializer import OrjsonSerializer
68+
except ImportError:
69+
OrjsonSerializer = None # type: ignore[assignment,misc]
70+
6671
# Only raise one warning per deprecation message so as not
6772
# to spam up the user if the same action is done multiple times.
6873
warnings.simplefilter("default", category=ElasticsearchWarning, append=True)
@@ -86,6 +91,8 @@
8691
"UnsupportedProductError",
8792
"ElasticsearchWarning",
8893
]
94+
if OrjsonSerializer is not None:
95+
__all__.append("OrjsonSerializer")
8996

9097
fixup_module_metadata(__name__, globals())
9198
del fixup_module_metadata

elasticsearch_serverless/serializer.py

+14
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@
4141
"MapboxVectorTileSerializer",
4242
]
4343

44+
try:
45+
from elastic_transport import OrjsonSerializer as _OrjsonSerializer
46+
47+
__all__.append("OrjsonSerializer")
48+
except ImportError:
49+
_OrjsonSerializer = None # type: ignore[assignment,misc]
50+
4451

4552
class JsonSerializer(_JsonSerializer):
4653
mimetype: ClassVar[str] = "application/json"
@@ -73,6 +80,13 @@ def default(self, data: Any) -> Any:
7380
raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")
7481

7582

83+
if _OrjsonSerializer is not None:
84+
85+
class OrjsonSerializer(JsonSerializer, _OrjsonSerializer):
86+
def default(self, data: Any) -> Any:
87+
return JsonSerializer.default(self, data)
88+
89+
7690
class NdjsonSerializer(JsonSerializer, _NdjsonSerializer):
7791
mimetype: ClassVar[str] = "application/x-ndjson"
7892

noxfile.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def lint(session):
8686
session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
8787

8888
# Workaround to make '-r' to still work despite uninstalling aiohttp below.
89-
session.install(".[async,requests]", env=INSTALL_ENV)
89+
session.install(".[async,requests,orjson]", env=INSTALL_ENV)
9090

9191
# Run mypy on the package and then the type examples separately for
9292
# the two different mypy use-cases, ourselves and our users.
@@ -118,5 +118,5 @@ def lint(session):
118118

119119
@nox.session()
120120
def docs(session):
121-
session.install(".[docs]")
121+
session.install(".[docs,orjson]")
122122
session.run("sphinx-build", "docs/sphinx/", "docs/sphinx/_build", "-b", "html")

pyproject.toml

+4-6
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,9 @@ dependencies = [
4444
]
4545

4646
[project.optional-dependencies]
47-
async = [
48-
"aiohttp>=3,<4",
49-
]
50-
requests = [
51-
"requests>=2.4.0, <3.0.0",
52-
]
47+
async = ["aiohttp>=3,<4"]
48+
requests = ["requests>=2.4.0, <3.0.0" ]
49+
orjson = ["orjson>=3"]
5350
dev = [
5451
"requests>=2, <3",
5552
"aiohttp",
@@ -66,6 +63,7 @@ dev = [
6663
"twine",
6764
"build",
6865
"nox",
66+
"orjson",
6967
"numpy",
7068
"pandas",
7169
"mapbox-vector-tile",

test_elasticsearch_serverless/test_serializer.py

+47-41
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
import sys
2019
import uuid
2120
from datetime import datetime
2221
from decimal import Decimal
@@ -33,131 +32,138 @@
3332

3433
from elasticsearch_serverless import Elasticsearch
3534
from elasticsearch_serverless.exceptions import SerializationError
36-
from elasticsearch_serverless.serializer import JSONSerializer, TextSerializer
35+
from elasticsearch_serverless.serializer import (
36+
JSONSerializer,
37+
OrjsonSerializer,
38+
TextSerializer,
39+
)
3740

3841
requires_numpy_and_pandas = pytest.mark.skipif(
39-
np is None or pd is None, reason="Test requires numpy or pandas to be available"
42+
np is None or pd is None, reason="Test requires numpy and pandas to be available"
4043
)
4144

4245

43-
def test_datetime_serialization():
44-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
46+
@pytest.fixture(params=[JSONSerializer, OrjsonSerializer])
47+
def json_serializer(request: pytest.FixtureRequest):
48+
yield request.param()
49+
50+
51+
def test_datetime_serialization(json_serializer):
52+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
4553
{"d": datetime(2010, 10, 1, 2, 30)}
4654
)
4755

4856

49-
def test_decimal_serialization():
50-
requires_numpy_and_pandas()
57+
def test_decimal_serialization(json_serializer):
58+
assert b'{"d":3.8}' == json_serializer.dumps({"d": Decimal("3.8")})
5159

52-
if sys.version_info[:2] == (2, 6):
53-
pytest.skip("Float rounding is broken in 2.6.")
54-
assert b'{"d":3.8}' == JSONSerializer().dumps({"d": Decimal("3.8")})
5560

56-
57-
def test_uuid_serialization():
58-
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == JSONSerializer().dumps(
61+
def test_uuid_serialization(json_serializer):
62+
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == json_serializer.dumps(
5963
{"d": uuid.UUID("00000000-0000-0000-0000-000000000003")}
6064
)
6165

6266

6367
@requires_numpy_and_pandas
64-
def test_serializes_numpy_bool():
65-
assert b'{"d":true}' == JSONSerializer().dumps({"d": np.bool_(True)})
68+
def test_serializes_numpy_bool(json_serializer):
69+
assert b'{"d":true}' == json_serializer.dumps({"d": np.bool_(True)})
6670

6771

6872
@requires_numpy_and_pandas
69-
def test_serializes_numpy_integers():
70-
ser = JSONSerializer()
73+
def test_serializes_numpy_integers(json_serializer):
7174
for np_type in (
7275
np.int_,
7376
np.int8,
7477
np.int16,
7578
np.int32,
7679
np.int64,
7780
):
78-
assert ser.dumps({"d": np_type(-1)}) == b'{"d":-1}'
81+
assert json_serializer.dumps({"d": np_type(-1)}) == b'{"d":-1}'
7982

8083
for np_type in (
8184
np.uint8,
8285
np.uint16,
8386
np.uint32,
8487
np.uint64,
8588
):
86-
assert ser.dumps({"d": np_type(1)}) == b'{"d":1}'
89+
assert json_serializer.dumps({"d": np_type(1)}) == b'{"d":1}'
8790

8891

8992
@requires_numpy_and_pandas
90-
def test_serializes_numpy_floats():
91-
ser = JSONSerializer()
93+
def test_serializes_numpy_floats(json_serializer):
9294
for np_type in (
9395
np.float32,
9496
np.float64,
9597
):
96-
assert re.search(rb'^{"d":1\.2[\d]*}$', ser.dumps({"d": np_type(1.2)}))
98+
assert re.search(
99+
rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)})
100+
)
97101

98102

99103
@requires_numpy_and_pandas
100-
def test_serializes_numpy_datetime():
101-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
104+
def test_serializes_numpy_datetime(json_serializer):
105+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
102106
{"d": np.datetime64("2010-10-01T02:30:00")}
103107
)
104108

105109

106110
@requires_numpy_and_pandas
107-
def test_serializes_numpy_ndarray():
108-
assert b'{"d":[0,0,0,0,0]}' == JSONSerializer().dumps(
111+
def test_serializes_numpy_ndarray(json_serializer):
112+
assert b'{"d":[0,0,0,0,0]}' == json_serializer.dumps(
109113
{"d": np.zeros((5,), dtype=np.uint8)}
110114
)
111115
# This isn't useful for Elasticsearch, just want to make sure it works.
112-
assert b'{"d":[[0,0],[0,0]]}' == JSONSerializer().dumps(
116+
assert b'{"d":[[0,0],[0,0]]}' == json_serializer.dumps(
113117
{"d": np.zeros((2, 2), dtype=np.uint8)}
114118
)
115119

116120

117121
@requires_numpy_and_pandas
118122
def test_serializes_numpy_nan_to_nan():
119-
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": np.nan})
123+
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": float("NaN")})
124+
# NaN is invalid JSON, and orjson silently converts it to null
125+
assert b'{"d":null}' == OrjsonSerializer().dumps({"d": float("NaN")})
120126

121127

122128
@requires_numpy_and_pandas
123-
def test_serializes_pandas_timestamp():
124-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
129+
def test_serializes_pandas_timestamp(json_serializer):
130+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
125131
{"d": pd.Timestamp("2010-10-01T02:30:00")}
126132
)
127133

128134

129135
@requires_numpy_and_pandas
130-
def test_serializes_pandas_series():
131-
assert b'{"d":["a","b","c","d"]}' == JSONSerializer().dumps(
136+
def test_serializes_pandas_series(json_serializer):
137+
assert b'{"d":["a","b","c","d"]}' == json_serializer.dumps(
132138
{"d": pd.Series(["a", "b", "c", "d"])}
133139
)
134140

135141

136142
@requires_numpy_and_pandas
137143
@pytest.mark.skipif(not hasattr(pd, "NA"), reason="pandas.NA is required")
138-
def test_serializes_pandas_na():
139-
assert b'{"d":null}' == JSONSerializer().dumps({"d": pd.NA})
144+
def test_serializes_pandas_na(json_serializer):
145+
assert b'{"d":null}' == json_serializer.dumps({"d": pd.NA})
140146

141147

142148
@requires_numpy_and_pandas
143149
@pytest.mark.skipif(not hasattr(pd, "NaT"), reason="pandas.NaT required")
144-
def test_raises_serialization_error_pandas_nat():
150+
def test_raises_serialization_error_pandas_nat(json_serializer):
145151
with pytest.raises(SerializationError):
146-
JSONSerializer().dumps({"d": pd.NaT})
152+
json_serializer.dumps({"d": pd.NaT})
147153

148154

149155
@requires_numpy_and_pandas
150-
def test_serializes_pandas_category():
156+
def test_serializes_pandas_category(json_serializer):
151157
cat = pd.Categorical(["a", "c", "b", "a"], categories=["a", "b", "c"])
152-
assert b'{"d":["a","c","b","a"]}' == JSONSerializer().dumps({"d": cat})
158+
assert b'{"d":["a","c","b","a"]}' == json_serializer.dumps({"d": cat})
153159

154160
cat = pd.Categorical([1, 2, 3], categories=[1, 2, 3])
155-
assert b'{"d":[1,2,3]}' == JSONSerializer().dumps({"d": cat})
161+
assert b'{"d":[1,2,3]}' == json_serializer.dumps({"d": cat})
156162

157163

158-
def test_json_raises_serialization_error_on_dump_error():
164+
def test_json_raises_serialization_error_on_dump_error(json_serializer):
159165
with pytest.raises(SerializationError):
160-
JSONSerializer().dumps(object())
166+
json_serializer.dumps(object())
161167

162168

163169
def test_raises_serialization_error_on_load_error():

0 commit comments

Comments
 (0)