|
2 | 2 | # Use of this source code is governed by a BSD-style
|
3 | 3 | # license that can be found in the LICENSE file.
|
4 | 4 |
|
| 5 | +import copy |
| 6 | +import concurrent.futures |
5 | 7 | from datetime import datetime
|
6 | 8 | import logging
|
7 | 9 | import re
|
@@ -378,6 +380,9 @@ def process_http_error(ex):
|
378 | 380 | # See `BigQuery Troubleshooting Errors
|
379 | 381 | # <https://cloud.google.com/bigquery/troubleshooting-errors>`__
|
380 | 382 |
|
| 383 | + if "cancelled" in ex.message: |
| 384 | + raise QueryTimeout("Reason: {0}".format(ex)) |
| 385 | + |
381 | 386 | raise GenericGBQException("Reason: {0}".format(ex))
|
382 | 387 |
|
383 | 388 | def download_table(
|
@@ -406,8 +411,41 @@ def download_table(
|
406 | 411 | user_dtypes=dtypes,
|
407 | 412 | )
|
408 | 413 |
|
| 414 | + def _wait_for_query_job(self, query_reply, timeout_ms): |
| 415 | + """Wait for query to complete, pausing occasionally to update progress. |
| 416 | +
|
| 417 | + Args: |
| 418 | + query_reply (QueryJob): |
| 419 | + A query job which has started. |
| 420 | +
|
| 421 | + timeout_ms (Optional[int]): |
| 422 | + How long to wait before cancelling the query. |
| 423 | + """ |
| 424 | + # Wait at most 10 seconds so we can show progress. |
| 425 | + # TODO(https://github.com/googleapis/python-bigquery-pandas/issues/327): |
| 426 | + # Include a tqdm progress bar here instead of a stream of log messages. |
| 427 | + timeout_sec = 10.0 |
| 428 | + if timeout_ms: |
| 429 | + timeout_sec = min(timeout_sec, timeout_ms / 1000.0) |
| 430 | + |
| 431 | + while query_reply.state != "DONE": |
| 432 | + self.log_elapsed_seconds(" Elapsed", "s. Waiting...") |
| 433 | + |
| 434 | + if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000: |
| 435 | + self.client.cancel_job( |
| 436 | + query_reply.job_id, location=query_reply.location |
| 437 | + ) |
| 438 | + raise QueryTimeout("Query timeout: {} ms".format(timeout_ms)) |
| 439 | + |
| 440 | + try: |
| 441 | + query_reply.result(timeout=timeout_sec) |
| 442 | + except concurrent.futures.TimeoutError: |
| 443 | + # Use our own timeout logic |
| 444 | + pass |
| 445 | + except self.http_error as ex: |
| 446 | + self.process_http_error(ex) |
| 447 | + |
409 | 448 | def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
|
410 |
| - from concurrent.futures import TimeoutError |
411 | 449 | from google.auth.exceptions import RefreshError
|
412 | 450 | from google.cloud import bigquery
|
413 | 451 | import pandas
|
@@ -449,28 +487,11 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
|
449 | 487 | job_id = query_reply.job_id
|
450 | 488 | logger.debug("Job ID: %s" % job_id)
|
451 | 489 |
|
452 |
| - while query_reply.state != "DONE": |
453 |
| - self.log_elapsed_seconds(" Elapsed", "s. Waiting...") |
454 |
| - |
455 |
| - timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get( |
456 |
| - "timeoutMs" |
457 |
| - ) |
458 |
| - timeout_ms = int(timeout_ms) if timeout_ms else None |
459 |
| - if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000: |
460 |
| - raise QueryTimeout("Query timeout: {} ms".format(timeout_ms)) |
461 |
| - |
462 |
| - timeout_sec = 1.0 |
463 |
| - if timeout_ms: |
464 |
| - # Wait at most 1 second so we can show progress bar |
465 |
| - timeout_sec = min(1.0, timeout_ms / 1000.0) |
466 |
| - |
467 |
| - try: |
468 |
| - query_reply.result(timeout=timeout_sec) |
469 |
| - except TimeoutError: |
470 |
| - # Use our own timeout logic |
471 |
| - pass |
472 |
| - except self.http_error as ex: |
473 |
| - self.process_http_error(ex) |
| 490 | + timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get( |
| 491 | + "timeoutMs" |
| 492 | + ) |
| 493 | + timeout_ms = int(timeout_ms) if timeout_ms else None |
| 494 | + self._wait_for_query_job(query_reply, timeout_ms) |
474 | 495 |
|
475 | 496 | if query_reply.cache_hit:
|
476 | 497 | logger.debug("Query done.\nCache hit.\n")
|
@@ -673,6 +694,28 @@ def _finalize_dtypes(
|
673 | 694 | return df
|
674 | 695 |
|
675 | 696 |
|
| 697 | +def _transform_read_gbq_configuration(configuration): |
| 698 | + """ |
| 699 | + For backwards-compatibility, convert any previously client-side only |
| 700 | + parameters such as timeoutMs to the property name expected by the REST API. |
| 701 | +
|
| 702 | + Makes a copy of configuration if changes are needed. |
| 703 | + """ |
| 704 | + |
| 705 | + if configuration is None: |
| 706 | + return None |
| 707 | + |
| 708 | + timeout_ms = configuration.get("query", {}).get("timeoutMs") |
| 709 | + if timeout_ms is not None: |
| 710 | + # Transform timeoutMs to an actual server-side configuration. |
| 711 | + # https://github.com/googleapis/python-bigquery-pandas/issues/479 |
| 712 | + configuration = copy.deepcopy(configuration) |
| 713 | + del configuration["query"]["timeoutMs"] |
| 714 | + configuration["jobTimeoutMs"] = timeout_ms |
| 715 | + |
| 716 | + return configuration |
| 717 | + |
| 718 | + |
676 | 719 | def read_gbq(
|
677 | 720 | query_or_table,
|
678 | 721 | project_id=None,
|
@@ -847,6 +890,8 @@ def read_gbq(
|
847 | 890 | if dialect not in ("legacy", "standard"):
|
848 | 891 | raise ValueError("'{0}' is not valid for dialect".format(dialect))
|
849 | 892 |
|
| 893 | + configuration = _transform_read_gbq_configuration(configuration) |
| 894 | + |
850 | 895 | if configuration and "query" in configuration and "query" in configuration["query"]:
|
851 | 896 | if query_or_table is not None:
|
852 | 897 | raise ValueError(
|
|
0 commit comments