Skip to content

Commit 4d9a320

Browse files
authored
RUST-1631 Always use polling monitoring when running in a FaaS environment (#1030)
1 parent 8d09980 commit 4d9a320

File tree

16 files changed

+834
-24
lines changed

16 files changed

+834
-24
lines changed

.evergreen/aws-lambda-test/mongodb/src/main.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@ impl Stats {
3434

3535
fn handle_sdam(&mut self, event: &SdamEvent) {
3636
match event {
37-
SdamEvent::ServerHeartbeatStarted(_) => {
37+
SdamEvent::ServerHeartbeatStarted(ev) => {
38+
assert!(!ev.awaited);
3839
self.heartbeats_started += 1;
3940
}
4041
SdamEvent::ServerHeartbeatFailed(ev) => {
42+
assert!(!ev.awaited);
4143
self.failed_heartbeat_durations_millis.push(ev.duration.as_millis());
4244
}
4345
_ => (),

src/client/options.rs

+46
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ const URI_OPTIONS: &[&str] = &[
6868
"replicaset",
6969
"retrywrites",
7070
"retryreads",
71+
"servermonitoringmode",
7172
"serverselectiontimeoutms",
7273
"sockettimeoutms",
7374
"tls",
@@ -512,6 +513,12 @@ pub struct ClientOptions {
512513
#[builder(default)]
513514
pub retry_writes: Option<bool>,
514515

516+
/// Configures which server monitoring protocol to use.
517+
///
518+
/// The default is [`Auto`](ServerMonitoringMode::Auto).
519+
#[builder(default)]
520+
pub server_monitoring_mode: Option<ServerMonitoringMode>,
521+
515522
/// The handler that should process all Server Discovery and Monitoring events.
516523
#[derivative(Debug = "ignore", PartialEq = "ignore")]
517524
#[builder(default, setter(strip_option))]
@@ -683,6 +690,8 @@ impl Serialize for ClientOptions {
683690

684691
retrywrites: &'a Option<bool>,
685692

693+
servermonitoringmode: Option<String>,
694+
686695
#[serde(
687696
flatten,
688697
serialize_with = "SelectionCriteria::serialize_for_client_options"
@@ -723,6 +732,10 @@ impl Serialize for ClientOptions {
723732
replicaset: &self.repl_set_name,
724733
retryreads: &self.retry_reads,
725734
retrywrites: &self.retry_writes,
735+
servermonitoringmode: self
736+
.server_monitoring_mode
737+
.as_ref()
738+
.map(|m| format!("{:?}", m).to_lowercase()),
726739
selectioncriteria: &self.selection_criteria,
727740
serverselectiontimeoutms: &self.server_selection_timeout,
728741
sockettimeoutms: &self.socket_timeout,
@@ -844,6 +857,11 @@ pub struct ConnectionString {
844857
/// The default value is true.
845858
pub retry_writes: Option<bool>,
846859

860+
/// Configures which server monitoring protocol to use.
861+
///
862+
/// The default is [`Auto`](ServerMonitoringMode::Auto).
863+
pub server_monitoring_mode: Option<ServerMonitoringMode>,
864+
847865
/// Specifies whether the Client should directly connect to a single host rather than
848866
/// autodiscover all servers in the cluster.
849867
///
@@ -1340,6 +1358,7 @@ impl ClientOptions {
13401358
connect_timeout: conn_str.connect_timeout,
13411359
retry_reads: conn_str.retry_reads,
13421360
retry_writes: conn_str.retry_writes,
1361+
server_monitoring_mode: conn_str.server_monitoring_mode,
13431362
socket_timeout: conn_str.socket_timeout,
13441363
direct_connection: conn_str.direct_connection,
13451364
default_database: conn_str.default_database,
@@ -2182,6 +2201,19 @@ impl ConnectionString {
21822201
k @ "retryreads" => {
21832202
self.retry_reads = Some(get_bool!(value, k));
21842203
}
2204+
"servermonitoringmode" => {
2205+
self.server_monitoring_mode = Some(match value.to_lowercase().as_str() {
2206+
"stream" => ServerMonitoringMode::Stream,
2207+
"poll" => ServerMonitoringMode::Poll,
2208+
"auto" => ServerMonitoringMode::Auto,
2209+
other => {
2210+
return Err(Error::invalid_argument(format!(
2211+
"{:?} is not a valid server monitoring mode",
2212+
other
2213+
)));
2214+
}
2215+
});
2216+
}
21852217
k @ "serverselectiontimeoutms" => {
21862218
self.server_selection_timeout = Some(Duration::from_millis(get_duration!(value, k)))
21872219
}
@@ -2875,3 +2907,17 @@ pub struct TransactionOptions {
28752907
)]
28762908
pub max_commit_time: Option<Duration>,
28772909
}
2910+
2911+
/// Which server monitoring protocol to use.
2912+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
2913+
#[non_exhaustive]
2914+
pub enum ServerMonitoringMode {
2915+
/// The client will use the streaming protocol when the server supports it and fall back to the
2916+
/// polling protocol otherwise.
2917+
Stream,
2918+
/// The client will use the polling protocol.
2919+
Poll,
2920+
/// The client will use the polling protocol when running on a FaaS platform and behave the
2921+
/// same as `Stream` otherwise.
2922+
Auto,
2923+
}

src/cmap.rs

+4
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,7 @@ impl ConnectionPool {
184184
self.manager.broadcast(msg)
185185
}
186186
}
187+
188+
pub(crate) fn is_faas() -> bool {
189+
establish::handshake::FaasEnvironmentName::new().is_some()
190+
}

src/cmap/establish/handshake.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ struct RuntimeEnvironment {
6767
}
6868

6969
#[derive(Copy, Clone, Debug, PartialEq)]
70-
enum FaasEnvironmentName {
70+
pub(crate) enum FaasEnvironmentName {
7171
AwsLambda,
7272
AzureFunc,
7373
GcpFunc,
@@ -221,7 +221,7 @@ fn var_set(name: &str) -> bool {
221221
}
222222

223223
impl FaasEnvironmentName {
224-
fn new() -> Option<Self> {
224+
pub(crate) fn new() -> Option<Self> {
225225
use FaasEnvironmentName::*;
226226
let mut found: Option<Self> = None;
227227
let lambda_env = env::var_os("AWS_EXECUTION_ENV")

src/sdam/monitor.rs

+40-11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use super::{
1616
TopologyWatcher,
1717
};
1818
use crate::{
19+
client::options::ServerMonitoringMode,
1920
cmap::{establish::ConnectionEstablisher, Connection},
2021
error::{Error, Result},
2122
event::sdam::{
@@ -48,11 +49,15 @@ pub(crate) struct Monitor {
4849
sdam_event_emitter: Option<SdamEventEmitter>,
4950
client_options: ClientOptions,
5051

52+
/// Whether this monitor is allowed to use the streaming protocol.
53+
allow_streaming: bool,
54+
5155
/// The most recent topology version returned by the server in a hello response.
52-
/// If some, indicates that this monitor should use the streaming protocol. If none, it should
53-
/// use the polling protocol.
5456
topology_version: Option<TopologyVersion>,
5557

58+
/// The RTT monitor; once it's started this is None.
59+
pending_rtt_monitor: Option<RttMonitor>,
60+
5661
/// Handle to the RTT monitor, used to get the latest known round trip time for a given server
5762
/// and to reset the RTT when the monitor disconnects from the server.
5863
rtt_monitor_handle: RttMonitorHandle,
@@ -79,21 +84,31 @@ impl Monitor {
7984
connection_establisher.clone(),
8085
client_options.clone(),
8186
);
87+
let allow_streaming = match client_options
88+
.server_monitoring_mode
89+
.clone()
90+
.unwrap_or(ServerMonitoringMode::Auto)
91+
{
92+
ServerMonitoringMode::Stream => true,
93+
ServerMonitoringMode::Poll => false,
94+
ServerMonitoringMode::Auto => !crate::cmap::is_faas(),
95+
};
8296
let monitor = Self {
8397
address,
8498
client_options,
8599
connection_establisher,
86100
topology_updater,
87101
topology_watcher,
88102
sdam_event_emitter,
103+
pending_rtt_monitor: Some(rtt_monitor),
89104
rtt_monitor_handle,
90105
request_receiver: manager_receiver,
91106
connection: None,
107+
allow_streaming,
92108
topology_version: None,
93109
};
94110

95111
runtime::execute(monitor.execute());
96-
runtime::execute(rtt_monitor.execute());
97112
}
98113

99114
async fn execute(mut self) {
@@ -102,13 +117,19 @@ impl Monitor {
102117
while self.is_alive() {
103118
let check_succeeded = self.check_server().await;
104119

120+
if self.topology_version.is_some() && self.allow_streaming {
121+
if let Some(rtt_monitor) = self.pending_rtt_monitor.take() {
122+
runtime::execute(rtt_monitor.execute());
123+
}
124+
}
125+
105126
// In the streaming protocol, we read from the socket continuously
106127
// rather than polling at specific intervals, unless the most recent check
107128
// failed.
108129
//
109130
// We only go to sleep when using the polling protocol (i.e. server never returned a
110131
// topologyVersion) or when the most recent check failed.
111-
if self.topology_version.is_none() || !check_succeeded {
132+
if self.topology_version.is_none() || !check_succeeded || !self.allow_streaming {
112133
self.request_receiver
113134
.wait_for_check_request(
114135
self.client_options.min_heartbeat_frequency(),
@@ -180,7 +201,7 @@ impl Monitor {
180201
self.emit_event(|| {
181202
SdamEvent::ServerHeartbeatStarted(ServerHeartbeatStartedEvent {
182203
server_address: self.address.clone(),
183-
awaited: self.topology_version.is_some(),
204+
awaited: self.topology_version.is_some() && self.allow_streaming,
184205
driver_connection_id,
185206
server_connection_id: self.connection.as_ref().and_then(|c| c.server_id),
186207
})
@@ -213,10 +234,14 @@ impl Monitor {
213234
} else {
214235
// If the initial handshake returned a topology version, send it back to the
215236
// server to begin streaming responses.
216-
let opts = self.topology_version.map(|tv| AwaitableHelloOptions {
217-
topology_version: tv,
218-
max_await_time: heartbeat_frequency,
219-
});
237+
let opts = if self.allow_streaming {
238+
self.topology_version.map(|tv| AwaitableHelloOptions {
239+
topology_version: tv,
240+
max_await_time: heartbeat_frequency,
241+
})
242+
} else {
243+
None
244+
};
220245

221246
let command = hello_command(
222247
self.client_options.server_api.as_ref(),
@@ -266,8 +291,12 @@ impl Monitor {
266291
};
267292
let duration = start.elapsed();
268293

294+
let awaited = self.topology_version.is_some() && self.allow_streaming;
269295
match result {
270296
HelloResult::Ok(ref r) => {
297+
if !awaited {
298+
self.rtt_monitor_handle.add_sample(duration);
299+
}
271300
self.emit_event(|| {
272301
let mut reply = r
273302
.raw_command_response
@@ -280,7 +309,7 @@ impl Monitor {
280309
duration,
281310
reply,
282311
server_address: self.address.clone(),
283-
awaited: self.topology_version.is_some(),
312+
awaited,
284313
driver_connection_id,
285314
server_connection_id: self.connection.as_ref().and_then(|c| c.server_id),
286315
})
@@ -296,7 +325,7 @@ impl Monitor {
296325
duration,
297326
failure: e.clone(),
298327
server_address: self.address.clone(),
299-
awaited: self.topology_version.is_some(),
328+
awaited,
300329
driver_connection_id,
301330
server_connection_id: self.connection.as_ref().and_then(|c| c.server_id),
302331
})

src/test/spec/json/server-discovery-and-monitoring/README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ Integration Tests
192192

193193
Integration tests are provided in the "unified" directory and are
194194
written in the `Unified Test Format
195-
<../unified-test-format/unified-test-format.rst>`_.
195+
<../../unified-test-format/unified-test-format.md>`_.
196196

197197
Prose Tests
198198
-----------
@@ -264,4 +264,4 @@ Run the following test(s) on MongoDB 4.4+.
264264

265265
.. Section for links.
266266
267-
.. _Server Description Equality: /source/server-discovery-and-monitoring/server-discovery-and-monitoring.rst#server-description-equality
267+
.. _Server Description Equality: /source/server-discovery-and-monitoring/server-discovery-and-monitoring.rst#server-description-equality

src/test/spec/json/server-discovery-and-monitoring/unified/pool-cleared-error.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ tests:
200200
event:
201201
poolClearedEvent: {}
202202
count: 1
203-
# Perform an operation to ensure the node still useable.
203+
# Perform an operation to ensure the node still usable.
204204
- name: insertOne
205205
object: *collection
206206
arguments:

0 commit comments

Comments
 (0)