3
3
# license that can be found in the LICENSE file.
4
4
5
5
import logging
6
+ import re
6
7
import time
7
8
import warnings
8
9
from datetime import datetime
10
+ import typing
11
+ from typing import Any , Dict , Optional , Union
9
12
10
13
import numpy as np
11
14
15
+ # Only import at module-level at type checking time to avoid circular
16
+ # dependencies in the pandas package, which has an optional dependency on
17
+ # pandas-gbq.
18
+ if typing .TYPE_CHECKING : # pragma: NO COVER
19
+ import pandas
20
+
12
21
# Required dependencies, but treat as optional so that _test_google_api_imports
13
22
# can provide a better error message.
14
23
try :
@@ -64,6 +73,10 @@ def _test_google_api_imports():
64
73
raise ImportError ("pandas-gbq requires google-cloud-bigquery" ) from ex
65
74
66
75
76
+ def _is_query (query_or_table : str ) -> bool :
77
+ return re .search (r"\s" , query_or_table .strip (), re .MULTILINE ) is not None
78
+
79
+
67
80
class DatasetCreationError (ValueError ):
68
81
"""
69
82
Raised when the create dataset method fails
@@ -374,6 +387,30 @@ def process_http_error(ex):
374
387
375
388
raise GenericGBQException ("Reason: {0}" .format (ex ))
376
389
390
+ def download_table (
391
+ self ,
392
+ table_id : str ,
393
+ max_results : Optional [int ] = None ,
394
+ progress_bar_type : Optional [str ] = None ,
395
+ dtypes : Optional [Dict [str , Union [str , Any ]]] = None ,
396
+ ) -> "pandas.DataFrame" :
397
+ self ._start_timer ()
398
+
399
+ try :
400
+ table_ref = bigquery .TableReference .from_string (
401
+ table_id , default_project = self .project_id
402
+ )
403
+ rows_iter = self .client .list_rows (table_ref , max_results = max_results )
404
+ except self .http_error as ex :
405
+ self .process_http_error (ex )
406
+
407
+ return self ._download_results (
408
+ rows_iter ,
409
+ max_results = max_results ,
410
+ progress_bar_type = progress_bar_type ,
411
+ user_dtypes = dtypes ,
412
+ )
413
+
377
414
def run_query (self , query , max_results = None , progress_bar_type = None , ** kwargs ):
378
415
from concurrent .futures import TimeoutError
379
416
from google .auth .exceptions import RefreshError
@@ -390,15 +427,6 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
390
427
if config is not None :
391
428
job_config .update (config )
392
429
393
- if "query" in config and "query" in config ["query" ]:
394
- if query is not None :
395
- raise ValueError (
396
- "Query statement can't be specified "
397
- "inside config while it is specified "
398
- "as parameter"
399
- )
400
- query = config ["query" ].pop ("query" )
401
-
402
430
self ._start_timer ()
403
431
404
432
try :
@@ -464,15 +492,25 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
464
492
)
465
493
466
494
dtypes = kwargs .get ("dtypes" )
495
+
496
+ # Ensure destination is populated.
497
+ try :
498
+ query_reply .result ()
499
+ except self .http_error as ex :
500
+ self .process_http_error (ex )
501
+
502
+ rows_iter = self .client .list_rows (
503
+ query_reply .destination , max_results = max_results
504
+ )
467
505
return self ._download_results (
468
- query_reply ,
506
+ rows_iter ,
469
507
max_results = max_results ,
470
508
progress_bar_type = progress_bar_type ,
471
509
user_dtypes = dtypes ,
472
510
)
473
511
474
512
def _download_results (
475
- self , query_job , max_results = None , progress_bar_type = None , user_dtypes = None ,
513
+ self , rows_iter , max_results = None , progress_bar_type = None , user_dtypes = None ,
476
514
):
477
515
# No results are desired, so don't bother downloading anything.
478
516
if max_results == 0 :
@@ -504,11 +542,6 @@ def _download_results(
504
542
to_dataframe_kwargs ["create_bqstorage_client" ] = create_bqstorage_client
505
543
506
544
try :
507
- query_job .result ()
508
- # Get the table schema, so that we can list rows.
509
- destination = self .client .get_table (query_job .destination )
510
- rows_iter = self .client .list_rows (destination , max_results = max_results )
511
-
512
545
schema_fields = [field .to_api_repr () for field in rows_iter .schema ]
513
546
conversion_dtypes = _bqschema_to_nullsafe_dtypes (schema_fields )
514
547
conversion_dtypes .update (user_dtypes )
@@ -644,7 +677,7 @@ def _cast_empty_df_dtypes(schema_fields, df):
644
677
645
678
646
679
def read_gbq (
647
- query ,
680
+ query_or_table ,
648
681
project_id = None ,
649
682
index_col = None ,
650
683
col_order = None ,
@@ -668,17 +701,18 @@ def read_gbq(
668
701
669
702
This method uses the Google Cloud client library to make requests to
670
703
Google BigQuery, documented `here
671
- <https://google-cloud-python.readthedocs.io/en/latest/ bigquery/usage .html>`__.
704
+ <https://googleapis.dev/python/ bigquery/latest/index .html>`__.
672
705
673
706
See the :ref:`How to authenticate with Google BigQuery <authentication>`
674
707
guide for authentication instructions.
675
708
676
709
Parameters
677
710
----------
678
- query : str
679
- SQL-Like Query to return data values.
711
+ query_or_table : str
712
+ SQL query to return data values. If the string is a table ID, fetch the
713
+ rows directly from the table without running a query.
680
714
project_id : str, optional
681
- Google BigQuery Account project ID. Optional when available from
715
+ Google Cloud Platform project ID. Optional when available from
682
716
the environment.
683
717
index_col : str, optional
684
718
Name of result column to use for index in results DataFrame.
@@ -688,14 +722,14 @@ def read_gbq(
688
722
reauth : boolean, default False
689
723
Force Google BigQuery to re-authenticate the user. This is useful
690
724
if multiple accounts are used.
691
- auth_local_webserver : boolean , default False
692
- Use the `local webserver flow`_ instead of the `console flow`_
693
- when getting user credentials.
694
-
695
- .. _local webserver flow:
696
- http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
697
- .. _console flow:
698
- http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
725
+ auth_local_webserver : bool , default False
726
+ Use the `local webserver flow
727
+ <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server>`_
728
+ instead of the `console flow
729
+ <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib. flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console>`_
730
+ when getting user credentials. Your code must run on the same machine
731
+ as your web browser and your web browser can access your application
732
+ via ``localhost:808X``.
699
733
700
734
.. versionadded:: 0.2.0
701
735
dialect : str, default 'standard'
@@ -745,13 +779,6 @@ def read_gbq(
745
779
<https://cloud.google.com/bigquery/docs/access-control#roles>`__
746
780
permission on the project you are billing queries to.
747
781
748
- **Note:** Due to a `known issue in the ``google-cloud-bigquery``
749
- package
750
- <https://github.com/googleapis/google-cloud-python/pull/7633>`__
751
- (fixed in version 1.11.0), you must write your query results to a
752
- destination table. To do this with ``read_gbq``, supply a
753
- ``configuration`` dictionary.
754
-
755
782
This feature requires the ``google-cloud-bigquery-storage`` and
756
783
``pyarrow`` packages.
757
784
@@ -823,6 +850,15 @@ def read_gbq(
823
850
if dialect not in ("legacy" , "standard" ):
824
851
raise ValueError ("'{0}' is not valid for dialect" .format (dialect ))
825
852
853
+ if configuration and "query" in configuration and "query" in configuration ["query" ]:
854
+ if query_or_table is not None :
855
+ raise ValueError (
856
+ "Query statement can't be specified "
857
+ "inside config while it is specified "
858
+ "as parameter"
859
+ )
860
+ query_or_table = configuration ["query" ].pop ("query" )
861
+
826
862
connector = GbqConnector (
827
863
project_id ,
828
864
reauth = reauth ,
@@ -834,13 +870,21 @@ def read_gbq(
834
870
use_bqstorage_api = use_bqstorage_api ,
835
871
)
836
872
837
- final_df = connector .run_query (
838
- query ,
839
- configuration = configuration ,
840
- max_results = max_results ,
841
- progress_bar_type = progress_bar_type ,
842
- dtypes = dtypes ,
843
- )
873
+ if _is_query (query_or_table ):
874
+ final_df = connector .run_query (
875
+ query_or_table ,
876
+ configuration = configuration ,
877
+ max_results = max_results ,
878
+ progress_bar_type = progress_bar_type ,
879
+ dtypes = dtypes ,
880
+ )
881
+ else :
882
+ final_df = connector .download_table (
883
+ query_or_table ,
884
+ max_results = max_results ,
885
+ progress_bar_type = progress_bar_type ,
886
+ dtypes = dtypes ,
887
+ )
844
888
845
889
# Reindex the DataFrame on the provided column
846
890
if index_col is not None :
@@ -889,7 +933,7 @@ def to_gbq(
889
933
890
934
This method uses the Google Cloud client library to make requests to
891
935
Google BigQuery, documented `here
892
- <https://google-cloud-python.readthedocs.io/en/latest/ bigquery/usage .html>`__.
936
+ <https://googleapis.dev/python/ bigquery/latest/index .html>`__.
893
937
894
938
See the :ref:`How to authenticate with Google BigQuery <authentication>`
895
939
guide for authentication instructions.
@@ -902,7 +946,7 @@ def to_gbq(
902
946
Name of table to be written, in the form ``dataset.tablename`` or
903
947
``project.dataset.tablename``.
904
948
project_id : str, optional
905
- Google BigQuery Account project ID. Optional when available from
949
+ Google Cloud Platform project ID. Optional when available from
906
950
the environment.
907
951
chunksize : int, optional
908
952
Number of rows to be inserted in each chunk from the dataframe.
@@ -920,13 +964,13 @@ def to_gbq(
920
964
``'append'``
921
965
If table exists, insert data. Create if does not exist.
922
966
auth_local_webserver : bool, default False
923
- Use the `local webserver flow`_ instead of the `console flow`_
924
- when getting user credentials.
925
-
926
- .. _local webserver flow:
927
- http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
928
- .. _console flow:
929
- http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
967
+ Use the `local webserver flow
968
+ <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server>`_
969
+ instead of the `console flow
970
+ <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib. flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console>`_
971
+ when getting user credentials. Your code must run on the same machine
972
+ as your web browser and your web browser can access your application
973
+ via ``localhost:808X``.
930
974
931
975
.. versionadded:: 0.2.0
932
976
table_schema : list of dicts, optional
0 commit comments