Skip to content

Commit 94cd288

Browse files
authored
feat: improve throughput of http based storage#reader between 100 MiB/s and 200 MiB/s (#1799)
### Work Implement new BlobReadChannelV2 which replaces BlobReadChannel and improves on its resource usage to reduce min number of RPCs to 1 from (objSize / chunkSize + 1) while still maintaining the ability to restart a stream that may have been interrupted. ### Results Throughput in MiB/s has increased across the board: ``` ClassName mean 25% 50% 75% 90% 95% 99% max READ[0] BlobReadChannel 32.2 25.3 29.0 32.6 42.1 56.1 111.9 214.1 READ[1] BlobReadChannel 32.1 25.4 28.7 32.6 41.7 55.4 106.1 224.4 READ[2] BlobReadChannel 31.9 25.2 28.6 32.8 41.6 55.2 105.4 227.2 READ[0] BlobReadChannelV2 214.1 196.4 219.8 239.3 254.3 261.2 278.0 315.2 READ[1] BlobReadChannelV2 215.9 198.8 221.0 240.0 254.4 261.8 281.8 315.6 READ[2] BlobReadChannelV2 216.4 199.5 221.2 239.4 253.9 261.6 281.6 308.6 ``` Data collected using all default settings, against a regional bucket, across a range of object sizes [256KiB, 2GiB]. Each object is read in full three times to account for any GCS caching variability. ### Internal implementation notes Add ByteRangeSpec to encapsulate relative vs explicit(open) vs explicit(closed) vs null vs open-ended ranges and their associated logical subtleties. New StorageReadChannel interface possible candidate for new storage specific interface we can expose to folks for improvements independent of core and BigQuery. ### Future Breaking Change In order to facilitate migrating any `RestorableState<ReadChannel>` customers might have, we have left the existing class hierarchy in place and updated `BlobReadChannel.StateImpl#restore()` to produce a new `BlobReadChannelV2` instance when called. In the next major version this compatibility path will be removed.
1 parent f8cad99 commit 94cd288

40 files changed

+3252
-764
lines changed

google-cloud-storage/clirr-ignored-differences.xml

+12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<!-- see https://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html -->
33
<differences>
4+
<!-- Not breaking, internal only interface and the new methods have default implementations -->
5+
<difference>
6+
<differenceType>7012</differenceType>
7+
<className>com/google/cloud/storage/UnbufferedReadableByteChannelSession$UnbufferedReadableByteChannel</className>
8+
<method>* read(*)</method>
9+
</difference>
10+
<!-- Allow accessing the underlying Apiary instance -->
11+
<difference>
12+
<differenceType>7012</differenceType>
13+
<className>com/google/cloud/storage/spi/v1/StorageRpc</className>
14+
<method>* getStorage()</method>
15+
</difference>
416

517
<difference>
618
<differenceType>8001</differenceType>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
/*
2+
* Copyright 2022 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.storage;
18+
19+
import static com.google.cloud.storage.Utils.ifNonNull;
20+
21+
import com.google.api.client.http.HttpHeaders;
22+
import com.google.api.client.http.HttpResponse;
23+
import com.google.api.client.http.HttpResponseException;
24+
import com.google.api.core.SettableApiFuture;
25+
import com.google.api.gax.retrying.ResultRetryAlgorithm;
26+
import com.google.api.services.storage.Storage;
27+
import com.google.api.services.storage.Storage.Objects;
28+
import com.google.api.services.storage.Storage.Objects.Get;
29+
import com.google.api.services.storage.model.StorageObject;
30+
import com.google.cloud.storage.UnbufferedReadableByteChannelSession.UnbufferedReadableByteChannel;
31+
import com.google.cloud.storage.spi.v1.StorageRpc;
32+
import com.google.common.annotations.VisibleForTesting;
33+
import com.google.common.base.MoreObjects;
34+
import com.google.common.hash.HashFunction;
35+
import com.google.common.hash.Hashing;
36+
import com.google.common.io.BaseEncoding;
37+
import com.google.gson.Gson;
38+
import com.google.gson.stream.JsonReader;
39+
import java.io.IOException;
40+
import java.io.InputStream;
41+
import java.io.ObjectInputStream;
42+
import java.io.ObjectOutputStream;
43+
import java.io.Serializable;
44+
import java.io.StringReader;
45+
import java.nio.ByteBuffer;
46+
import java.nio.channels.Channels;
47+
import java.nio.channels.ReadableByteChannel;
48+
import java.nio.channels.ScatteringByteChannel;
49+
import java.util.List;
50+
import java.util.Map;
51+
import java.util.function.Consumer;
52+
import java.util.function.Function;
53+
import javax.annotation.concurrent.Immutable;
54+
55+
class ApiaryUnbufferedReadableByteChannel implements UnbufferedReadableByteChannel {
56+
57+
private final ApiaryReadRequest apiaryReadRequest;
58+
private final Storage storage;
59+
private final SettableApiFuture<StorageObject> result;
60+
private final HttpStorageOptions options;
61+
private final ResultRetryAlgorithm<?> resultRetryAlgorithm;
62+
private final Consumer<StorageObject> resolvedObjectCallback;
63+
64+
private long position;
65+
private ScatteringByteChannel sbc;
66+
private boolean open;
67+
68+
// returned X-Goog-Generation header value
69+
private Long xGoogGeneration;
70+
71+
ApiaryUnbufferedReadableByteChannel(
72+
ApiaryReadRequest apiaryReadRequest,
73+
Storage storage,
74+
SettableApiFuture<StorageObject> result,
75+
HttpStorageOptions options,
76+
ResultRetryAlgorithm<?> resultRetryAlgorithm,
77+
Consumer<StorageObject> resolvedObjectCallback) {
78+
this.apiaryReadRequest = apiaryReadRequest;
79+
this.storage = storage;
80+
this.result = result;
81+
this.options = options;
82+
this.resultRetryAlgorithm = resultRetryAlgorithm;
83+
this.resolvedObjectCallback = resolvedObjectCallback;
84+
this.open = true;
85+
this.position =
86+
apiaryReadRequest.getByteRangeSpec() != null
87+
? apiaryReadRequest.getByteRangeSpec().beginOffset()
88+
: 0;
89+
}
90+
91+
@SuppressWarnings("UnnecessaryContinue")
92+
@Override
93+
public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
94+
do {
95+
if (sbc == null) {
96+
sbc = Retrying.run(options, resultRetryAlgorithm, this::open, Function.identity());
97+
}
98+
99+
try {
100+
// According to the contract of Retrying#run it's possible for sbc to be null even after
101+
// invocation. However, the function we provide is guaranteed to return non-null or throw
102+
// an exception. So we suppress the warning from intellij here.
103+
//noinspection ConstantConditions
104+
long read = sbc.read(dsts, offset, length);
105+
if (read == -1) {
106+
open = false;
107+
} else {
108+
position += read;
109+
}
110+
return read;
111+
} catch (Exception t) {
112+
if (resultRetryAlgorithm.shouldRetry(t, null)) {
113+
// if our retry algorithm COULD allow a retry, continue the loop and allow trying to
114+
// open the stream again.
115+
sbc = null;
116+
continue;
117+
} else if (t instanceof IOException) {
118+
IOException ioE = (IOException) t;
119+
if (resultRetryAlgorithm.shouldRetry(StorageException.translate(ioE), null)) {
120+
sbc = null;
121+
continue;
122+
} else {
123+
throw ioE;
124+
}
125+
} else {
126+
throw new IOException(StorageException.coalesce(t));
127+
}
128+
}
129+
} while (true);
130+
}
131+
132+
@Override
133+
public boolean isOpen() {
134+
return open;
135+
}
136+
137+
@Override
138+
public void close() throws IOException {
139+
open = false;
140+
if (sbc != null) {
141+
sbc.close();
142+
}
143+
}
144+
145+
private ScatteringByteChannel open() {
146+
try {
147+
Boolean b =
148+
(Boolean) apiaryReadRequest.options.get(StorageRpc.Option.RETURN_RAW_INPUT_STREAM);
149+
boolean returnRawInputStream = b != null ? b : true;
150+
ApiaryReadRequest request = apiaryReadRequest.withNewBeginOffset(position);
151+
Get get = createGetRequest(request, storage.objects(), xGoogGeneration, returnRawInputStream);
152+
153+
HttpResponse media = get.executeMedia();
154+
InputStream content = media.getContent();
155+
if (xGoogGeneration == null) {
156+
HttpHeaders responseHeaders = media.getHeaders();
157+
//noinspection unchecked
158+
List<String> xGoogGenHeader = (List<String>) responseHeaders.get("x-goog-generation");
159+
// TODO: wire in result metadata population
160+
if (xGoogGenHeader != null && !xGoogGenHeader.isEmpty()) {
161+
String s = xGoogGenHeader.get(0);
162+
Long generation = Long.valueOf(s);
163+
this.xGoogGeneration = generation;
164+
resolvedObjectCallback.accept(
165+
apiaryReadRequest.getObject().clone().setGeneration(generation));
166+
}
167+
}
168+
169+
ReadableByteChannel rbc = Channels.newChannel(content);
170+
return StorageByteChannels.readable().asScatteringByteChannel(rbc);
171+
} catch (HttpResponseException e) {
172+
if (xGoogGeneration != null) {
173+
int statusCode = e.getStatusCode();
174+
if (statusCode == 404) {
175+
throw new StorageException(404, "Failure while trying to resume download", e);
176+
}
177+
}
178+
throw StorageException.translate(e);
179+
} catch (IOException e) {
180+
throw StorageException.translate(e);
181+
} catch (Throwable t) {
182+
throw StorageException.coalesce(t);
183+
}
184+
}
185+
186+
@VisibleForTesting
187+
static Get createGetRequest(
188+
ApiaryReadRequest apiaryReadRequest,
189+
Objects objects,
190+
Long xGoogGeneration,
191+
boolean returnRawInputStream)
192+
throws IOException {
193+
StorageObject from = apiaryReadRequest.getObject();
194+
Map<StorageRpc.Option, ?> options = apiaryReadRequest.getOptions();
195+
Get get = objects.get(from.getBucket(), from.getName());
196+
if (from.getGeneration() != null) {
197+
get.setGeneration(from.getGeneration());
198+
} else if (xGoogGeneration != null) {
199+
get.setGeneration(xGoogGeneration);
200+
}
201+
ifNonNull(
202+
options.get(StorageRpc.Option.IF_GENERATION_MATCH),
203+
ApiaryUnbufferedReadableByteChannel::cast,
204+
get::setIfGenerationMatch);
205+
ifNonNull(
206+
options.get(StorageRpc.Option.IF_GENERATION_NOT_MATCH),
207+
ApiaryUnbufferedReadableByteChannel::cast,
208+
get::setIfGenerationNotMatch);
209+
ifNonNull(
210+
options.get(StorageRpc.Option.IF_METAGENERATION_MATCH),
211+
ApiaryUnbufferedReadableByteChannel::cast,
212+
get::setIfMetagenerationMatch);
213+
ifNonNull(
214+
options.get(StorageRpc.Option.IF_METAGENERATION_NOT_MATCH),
215+
ApiaryUnbufferedReadableByteChannel::cast,
216+
get::setIfMetagenerationNotMatch);
217+
ifNonNull(
218+
options.get(StorageRpc.Option.USER_PROJECT),
219+
ApiaryUnbufferedReadableByteChannel::cast,
220+
get::setUserProject);
221+
HttpHeaders headers = get.getRequestHeaders();
222+
ifNonNull(
223+
options.get(StorageRpc.Option.CUSTOMER_SUPPLIED_KEY),
224+
ApiaryUnbufferedReadableByteChannel::cast,
225+
(String key) -> {
226+
BaseEncoding base64 = BaseEncoding.base64();
227+
HashFunction hashFunction = Hashing.sha256();
228+
headers.set("x-goog-encryption-algorithm", "AES256");
229+
headers.set("x-goog-encryption-key", key);
230+
headers.set(
231+
"x-goog-encryption-key-sha256",
232+
base64.encode(hashFunction.hashBytes(base64.decode(key)).asBytes()));
233+
});
234+
235+
get.setReturnRawInputStream(returnRawInputStream);
236+
String range = apiaryReadRequest.getByteRangeSpec().getHttpRangeHeader();
237+
if (range != null) {
238+
get.getRequestHeaders().setRange(range);
239+
}
240+
get.getMediaHttpDownloader().setDirectDownloadEnabled(true);
241+
242+
return get;
243+
}
244+
245+
@SuppressWarnings("unchecked")
246+
private static <T> T cast(Object o) {
247+
return (T) o;
248+
}
249+
250+
@Immutable
251+
static final class ApiaryReadRequest implements Serializable {
252+
private static final long serialVersionUID = -4059435314115374448L;
253+
private static final Gson gson = new Gson();
254+
private transient StorageObject object;
255+
private final Map<StorageRpc.Option, ?> options;
256+
private final ByteRangeSpec byteRangeSpec;
257+
258+
private volatile String objectJson;
259+
260+
ApiaryReadRequest(
261+
StorageObject object, Map<StorageRpc.Option, ?> options, ByteRangeSpec byteRangeSpec) {
262+
this.object = object;
263+
this.options = options;
264+
this.byteRangeSpec = byteRangeSpec;
265+
}
266+
267+
StorageObject getObject() {
268+
return object;
269+
}
270+
271+
Map<StorageRpc.Option, ?> getOptions() {
272+
return options;
273+
}
274+
275+
ByteRangeSpec getByteRangeSpec() {
276+
return byteRangeSpec;
277+
}
278+
279+
ApiaryReadRequest withNewBeginOffset(long beginOffset) {
280+
if (beginOffset > 0 && beginOffset != byteRangeSpec.beginOffset()) {
281+
return new ApiaryReadRequest(
282+
object, options, byteRangeSpec.withNewBeginOffset(beginOffset));
283+
} else {
284+
return this;
285+
}
286+
}
287+
288+
@Override
289+
public boolean equals(Object o) {
290+
if (this == o) {
291+
return true;
292+
}
293+
if (!(o instanceof ApiaryReadRequest)) {
294+
return false;
295+
}
296+
ApiaryReadRequest that = (ApiaryReadRequest) o;
297+
return java.util.Objects.equals(object, that.object)
298+
&& java.util.Objects.equals(options, that.options)
299+
&& java.util.Objects.equals(byteRangeSpec, that.byteRangeSpec);
300+
}
301+
302+
@Override
303+
public int hashCode() {
304+
return java.util.Objects.hash(object, options, byteRangeSpec);
305+
}
306+
307+
@Override
308+
public String toString() {
309+
return MoreObjects.toStringHelper(this)
310+
.add("byteRangeSpec", byteRangeSpec)
311+
.add("options", options)
312+
.add("object", getObjectJson())
313+
.toString();
314+
}
315+
316+
private String getObjectJson() {
317+
if (objectJson == null) {
318+
synchronized (this) {
319+
if (objectJson == null) {
320+
objectJson = gson.toJson(object);
321+
}
322+
}
323+
}
324+
return objectJson;
325+
}
326+
327+
private void writeObject(ObjectOutputStream out) throws IOException {
328+
String ignore = getObjectJson();
329+
out.defaultWriteObject();
330+
}
331+
332+
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
333+
in.defaultReadObject();
334+
JsonReader jsonReader = gson.newJsonReader(new StringReader(this.objectJson));
335+
this.object = gson.fromJson(jsonReader, StorageObject.class);
336+
}
337+
}
338+
}

0 commit comments

Comments
 (0)